1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53#define pr_fmt(fmt) "TCP: " fmt
54
55#include <linux/bottom_half.h>
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
64#include <linux/slab.h>
65
66#include <net/net_namespace.h>
67#include <net/icmp.h>
68#include <net/inet_hashtables.h>
69#include <net/tcp.h>
70#include <net/transp_v6.h>
71#include <net/ipv6.h>
72#include <net/inet_common.h>
73#include <net/timewait_sock.h>
74#include <net/xfrm.h>
75#include <net/secure_seq.h>
76#include <net/busy_poll.h>
77
78#include <linux/inet.h>
79#include <linux/ipv6.h>
80#include <linux/stddef.h>
81#include <linux/proc_fs.h>
82#include <linux/seq_file.h>
83
84#include <crypto/hash.h>
85#include <linux/scatterlist.h>
86
87int sysctl_tcp_low_latency __read_mostly;
88
89#ifdef CONFIG_TCP_MD5SIG
90static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
91 __be32 daddr, __be32 saddr, const struct tcphdr *th);
92#endif
93
94struct inet_hashinfo tcp_hashinfo;
95EXPORT_SYMBOL(tcp_hashinfo);
96
97static u32 tcp_v4_init_sequence(const struct sk_buff *skb, u32 *tsoff)
98{
99 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
100 ip_hdr(skb)->saddr,
101 tcp_hdr(skb)->dest,
102 tcp_hdr(skb)->source, tsoff);
103}
104
105int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
106{
107 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
108 struct tcp_sock *tp = tcp_sk(sk);
109
110
111
112
113
114
115
116
117
118
119
120
121 if (tcptw->tw_ts_recent_stamp &&
122 (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse &&
123 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
124 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
125 if (tp->write_seq == 0)
126 tp->write_seq = 1;
127 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
128 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
129 sock_hold(sktw);
130 return 1;
131 }
132
133 return 0;
134}
135EXPORT_SYMBOL_GPL(tcp_twsk_unique);
136
137
138int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
139{
140 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
141 struct inet_sock *inet = inet_sk(sk);
142 struct tcp_sock *tp = tcp_sk(sk);
143 __be16 orig_sport, orig_dport;
144 __be32 daddr, nexthop;
145 struct flowi4 *fl4;
146 struct rtable *rt;
147 int err;
148 u32 seq;
149 struct ip_options_rcu *inet_opt;
150 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
151
152 if (addr_len < sizeof(struct sockaddr_in))
153 return -EINVAL;
154
155 if (usin->sin_family != AF_INET)
156 return -EAFNOSUPPORT;
157
158 nexthop = daddr = usin->sin_addr.s_addr;
159 inet_opt = rcu_dereference_protected(inet->inet_opt,
160 lockdep_sock_is_held(sk));
161 if (inet_opt && inet_opt->opt.srr) {
162 if (!daddr)
163 return -EINVAL;
164 nexthop = inet_opt->opt.faddr;
165 }
166
167 orig_sport = inet->inet_sport;
168 orig_dport = usin->sin_port;
169 fl4 = &inet->cork.fl.u.ip4;
170 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
171 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
172 IPPROTO_TCP,
173 orig_sport, orig_dport, sk);
174 if (IS_ERR(rt)) {
175 err = PTR_ERR(rt);
176 if (err == -ENETUNREACH)
177 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
178 return err;
179 }
180
181 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
182 ip_rt_put(rt);
183 return -ENETUNREACH;
184 }
185
186 if (!inet_opt || !inet_opt->opt.srr)
187 daddr = fl4->daddr;
188
189 if (!inet->inet_saddr)
190 inet->inet_saddr = fl4->saddr;
191 sk_rcv_saddr_set(sk, inet->inet_saddr);
192
193 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
194
195 tp->rx_opt.ts_recent = 0;
196 tp->rx_opt.ts_recent_stamp = 0;
197 if (likely(!tp->repair))
198 tp->write_seq = 0;
199 }
200
201 if (tcp_death_row->sysctl_tw_recycle &&
202 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
203 tcp_fetch_timewait_stamp(sk, &rt->dst);
204
205 inet->inet_dport = usin->sin_port;
206 sk_daddr_set(sk, daddr);
207
208 inet_csk(sk)->icsk_ext_hdr_len = 0;
209 if (inet_opt)
210 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
211
212 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
213
214
215
216
217
218
219 tcp_set_state(sk, TCP_SYN_SENT);
220 err = inet_hash_connect(tcp_death_row, sk);
221 if (err)
222 goto failure;
223
224 sk_set_txhash(sk);
225
226 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
227 inet->inet_sport, inet->inet_dport, sk);
228 if (IS_ERR(rt)) {
229 err = PTR_ERR(rt);
230 rt = NULL;
231 goto failure;
232 }
233
234 sk->sk_gso_type = SKB_GSO_TCPV4;
235 sk_setup_caps(sk, &rt->dst);
236 rt = NULL;
237
238 if (likely(!tp->repair)) {
239 seq = secure_tcp_sequence_number(inet->inet_saddr,
240 inet->inet_daddr,
241 inet->inet_sport,
242 usin->sin_port,
243 &tp->tsoffset);
244 if (!tp->write_seq)
245 tp->write_seq = seq;
246 }
247
248 inet->inet_id = tp->write_seq ^ jiffies;
249
250 if (tcp_fastopen_defer_connect(sk, &err))
251 return err;
252 if (err)
253 goto failure;
254
255 err = tcp_connect(sk);
256
257 if (err)
258 goto failure;
259
260 return 0;
261
262failure:
263
264
265
266
267 tcp_set_state(sk, TCP_CLOSE);
268 ip_rt_put(rt);
269 sk->sk_route_caps = 0;
270 inet->inet_dport = 0;
271 return err;
272}
273EXPORT_SYMBOL(tcp_v4_connect);
274
275
276
277
278
279
280void tcp_v4_mtu_reduced(struct sock *sk)
281{
282 struct inet_sock *inet = inet_sk(sk);
283 struct dst_entry *dst;
284 u32 mtu;
285
286 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
287 return;
288 mtu = tcp_sk(sk)->mtu_info;
289 dst = inet_csk_update_pmtu(sk, mtu);
290 if (!dst)
291 return;
292
293
294
295
296 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
297 sk->sk_err_soft = EMSGSIZE;
298
299 mtu = dst_mtu(dst);
300
301 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
302 ip_sk_accept_pmtu(sk) &&
303 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
304 tcp_sync_mss(sk, mtu);
305
306
307
308
309
310
311 tcp_simple_retransmit(sk);
312 }
313}
314EXPORT_SYMBOL(tcp_v4_mtu_reduced);
315
316static void do_redirect(struct sk_buff *skb, struct sock *sk)
317{
318 struct dst_entry *dst = __sk_dst_check(sk, 0);
319
320 if (dst)
321 dst->ops->redirect(dst, sk, skb);
322}
323
324
325
326void tcp_req_err(struct sock *sk, u32 seq, bool abort)
327{
328 struct request_sock *req = inet_reqsk(sk);
329 struct net *net = sock_net(sk);
330
331
332
333
334 if (seq != tcp_rsk(req)->snt_isn) {
335 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
336 } else if (abort) {
337
338
339
340
341
342
343 inet_csk_reqsk_queue_drop(req->rsk_listener, req);
344 tcp_listendrop(req->rsk_listener);
345 }
346 reqsk_put(req);
347}
348EXPORT_SYMBOL(tcp_req_err);
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
367{
368 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
369 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
370 struct inet_connection_sock *icsk;
371 struct tcp_sock *tp;
372 struct inet_sock *inet;
373 const int type = icmp_hdr(icmp_skb)->type;
374 const int code = icmp_hdr(icmp_skb)->code;
375 struct sock *sk;
376 struct sk_buff *skb;
377 struct request_sock *fastopen;
378 __u32 seq, snd_una;
379 __u32 remaining;
380 int err;
381 struct net *net = dev_net(icmp_skb->dev);
382
383 sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
384 th->dest, iph->saddr, ntohs(th->source),
385 inet_iif(icmp_skb));
386 if (!sk) {
387 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
388 return;
389 }
390 if (sk->sk_state == TCP_TIME_WAIT) {
391 inet_twsk_put(inet_twsk(sk));
392 return;
393 }
394 seq = ntohl(th->seq);
395 if (sk->sk_state == TCP_NEW_SYN_RECV)
396 return tcp_req_err(sk, seq,
397 type == ICMP_PARAMETERPROB ||
398 type == ICMP_TIME_EXCEEDED ||
399 (type == ICMP_DEST_UNREACH &&
400 (code == ICMP_NET_UNREACH ||
401 code == ICMP_HOST_UNREACH)));
402
403 bh_lock_sock(sk);
404
405
406
407
408
409 if (sock_owned_by_user(sk)) {
410 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
411 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
412 }
413 if (sk->sk_state == TCP_CLOSE)
414 goto out;
415
416 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
417 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
418 goto out;
419 }
420
421 icsk = inet_csk(sk);
422 tp = tcp_sk(sk);
423
424 fastopen = tp->fastopen_rsk;
425 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
426 if (sk->sk_state != TCP_LISTEN &&
427 !between(seq, snd_una, tp->snd_nxt)) {
428 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
429 goto out;
430 }
431
432 switch (type) {
433 case ICMP_REDIRECT:
434 if (!sock_owned_by_user(sk))
435 do_redirect(icmp_skb, sk);
436 goto out;
437 case ICMP_SOURCE_QUENCH:
438
439 goto out;
440 case ICMP_PARAMETERPROB:
441 err = EPROTO;
442 break;
443 case ICMP_DEST_UNREACH:
444 if (code > NR_ICMP_UNREACH)
445 goto out;
446
447 if (code == ICMP_FRAG_NEEDED) {
448
449
450
451
452 if (sk->sk_state == TCP_LISTEN)
453 goto out;
454
455 tp->mtu_info = info;
456 if (!sock_owned_by_user(sk)) {
457 tcp_v4_mtu_reduced(sk);
458 } else {
459 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
460 sock_hold(sk);
461 }
462 goto out;
463 }
464
465 err = icmp_err_convert[code].errno;
466
467
468 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
469 break;
470 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
471 !icsk->icsk_backoff || fastopen)
472 break;
473
474 if (sock_owned_by_user(sk))
475 break;
476
477 icsk->icsk_backoff--;
478 icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
479 TCP_TIMEOUT_INIT;
480 icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
481
482 skb = tcp_write_queue_head(sk);
483 BUG_ON(!skb);
484
485 remaining = icsk->icsk_rto -
486 min(icsk->icsk_rto,
487 tcp_time_stamp - tcp_skb_timestamp(skb));
488
489 if (remaining) {
490 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
491 remaining, TCP_RTO_MAX);
492 } else {
493
494
495 tcp_retransmit_timer(sk);
496 }
497
498 break;
499 case ICMP_TIME_EXCEEDED:
500 err = EHOSTUNREACH;
501 break;
502 default:
503 goto out;
504 }
505
506 switch (sk->sk_state) {
507 case TCP_SYN_SENT:
508 case TCP_SYN_RECV:
509
510
511
512 if (fastopen && !fastopen->sk)
513 break;
514
515 if (!sock_owned_by_user(sk)) {
516 sk->sk_err = err;
517
518 sk->sk_error_report(sk);
519
520 tcp_done(sk);
521 } else {
522 sk->sk_err_soft = err;
523 }
524 goto out;
525 }
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543 inet = inet_sk(sk);
544 if (!sock_owned_by_user(sk) && inet->recverr) {
545 sk->sk_err = err;
546 sk->sk_error_report(sk);
547 } else {
548 sk->sk_err_soft = err;
549 }
550
551out:
552 bh_unlock_sock(sk);
553 sock_put(sk);
554}
555
556void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
557{
558 struct tcphdr *th = tcp_hdr(skb);
559
560 if (skb->ip_summed == CHECKSUM_PARTIAL) {
561 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
562 skb->csum_start = skb_transport_header(skb) - skb->head;
563 skb->csum_offset = offsetof(struct tcphdr, check);
564 } else {
565 th->check = tcp_v4_check(skb->len, saddr, daddr,
566 csum_partial(th,
567 th->doff << 2,
568 skb->csum));
569 }
570}
571
572
573void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
574{
575 const struct inet_sock *inet = inet_sk(sk);
576
577 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
578}
579EXPORT_SYMBOL(tcp_v4_send_check);
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
595{
596 const struct tcphdr *th = tcp_hdr(skb);
597 struct {
598 struct tcphdr th;
599#ifdef CONFIG_TCP_MD5SIG
600 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
601#endif
602 } rep;
603 struct ip_reply_arg arg;
604#ifdef CONFIG_TCP_MD5SIG
605 struct tcp_md5sig_key *key = NULL;
606 const __u8 *hash_location = NULL;
607 unsigned char newhash[16];
608 int genhash;
609 struct sock *sk1 = NULL;
610#endif
611 struct net *net;
612
613
614 if (th->rst)
615 return;
616
617
618
619
620 if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
621 return;
622
623
624 memset(&rep, 0, sizeof(rep));
625 rep.th.dest = th->source;
626 rep.th.source = th->dest;
627 rep.th.doff = sizeof(struct tcphdr) / 4;
628 rep.th.rst = 1;
629
630 if (th->ack) {
631 rep.th.seq = th->ack_seq;
632 } else {
633 rep.th.ack = 1;
634 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
635 skb->len - (th->doff << 2));
636 }
637
638 memset(&arg, 0, sizeof(arg));
639 arg.iov[0].iov_base = (unsigned char *)&rep;
640 arg.iov[0].iov_len = sizeof(rep.th);
641
642 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
643#ifdef CONFIG_TCP_MD5SIG
644 rcu_read_lock();
645 hash_location = tcp_parse_md5sig_option(th);
646 if (sk && sk_fullsock(sk)) {
647 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
648 &ip_hdr(skb)->saddr, AF_INET);
649 } else if (hash_location) {
650
651
652
653
654
655
656
657 sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
658 ip_hdr(skb)->saddr,
659 th->source, ip_hdr(skb)->daddr,
660 ntohs(th->source), inet_iif(skb));
661
662 if (!sk1)
663 goto out;
664
665 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
666 &ip_hdr(skb)->saddr, AF_INET);
667 if (!key)
668 goto out;
669
670
671 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
672 if (genhash || memcmp(hash_location, newhash, 16) != 0)
673 goto out;
674
675 }
676
677 if (key) {
678 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
679 (TCPOPT_NOP << 16) |
680 (TCPOPT_MD5SIG << 8) |
681 TCPOLEN_MD5SIG);
682
683 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
684 rep.th.doff = arg.iov[0].iov_len / 4;
685
686 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
687 key, ip_hdr(skb)->saddr,
688 ip_hdr(skb)->daddr, &rep.th);
689 }
690#endif
691 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
692 ip_hdr(skb)->saddr,
693 arg.iov[0].iov_len, IPPROTO_TCP, 0);
694 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
695 arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
696
697
698
699
700
701 if (sk)
702 arg.bound_dev_if = sk->sk_bound_dev_if;
703
704 BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
705 offsetof(struct inet_timewait_sock, tw_bound_dev_if));
706
707 arg.tos = ip_hdr(skb)->tos;
708 arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
709 local_bh_disable();
710 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
711 skb, &TCP_SKB_CB(skb)->header.h4.opt,
712 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
713 &arg, arg.iov[0].iov_len);
714
715 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
716 __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
717 local_bh_enable();
718
719#ifdef CONFIG_TCP_MD5SIG
720out:
721 rcu_read_unlock();
722#endif
723}
724
725
726
727
728
729static void tcp_v4_send_ack(const struct sock *sk,
730 struct sk_buff *skb, u32 seq, u32 ack,
731 u32 win, u32 tsval, u32 tsecr, int oif,
732 struct tcp_md5sig_key *key,
733 int reply_flags, u8 tos)
734{
735 const struct tcphdr *th = tcp_hdr(skb);
736 struct {
737 struct tcphdr th;
738 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
739#ifdef CONFIG_TCP_MD5SIG
740 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
741#endif
742 ];
743 } rep;
744 struct net *net = sock_net(sk);
745 struct ip_reply_arg arg;
746
747 memset(&rep.th, 0, sizeof(struct tcphdr));
748 memset(&arg, 0, sizeof(arg));
749
750 arg.iov[0].iov_base = (unsigned char *)&rep;
751 arg.iov[0].iov_len = sizeof(rep.th);
752 if (tsecr) {
753 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
754 (TCPOPT_TIMESTAMP << 8) |
755 TCPOLEN_TIMESTAMP);
756 rep.opt[1] = htonl(tsval);
757 rep.opt[2] = htonl(tsecr);
758 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
759 }
760
761
762 rep.th.dest = th->source;
763 rep.th.source = th->dest;
764 rep.th.doff = arg.iov[0].iov_len / 4;
765 rep.th.seq = htonl(seq);
766 rep.th.ack_seq = htonl(ack);
767 rep.th.ack = 1;
768 rep.th.window = htons(win);
769
770#ifdef CONFIG_TCP_MD5SIG
771 if (key) {
772 int offset = (tsecr) ? 3 : 0;
773
774 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
775 (TCPOPT_NOP << 16) |
776 (TCPOPT_MD5SIG << 8) |
777 TCPOLEN_MD5SIG);
778 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
779 rep.th.doff = arg.iov[0].iov_len/4;
780
781 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
782 key, ip_hdr(skb)->saddr,
783 ip_hdr(skb)->daddr, &rep.th);
784 }
785#endif
786 arg.flags = reply_flags;
787 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
788 ip_hdr(skb)->saddr,
789 arg.iov[0].iov_len, IPPROTO_TCP, 0);
790 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
791 if (oif)
792 arg.bound_dev_if = oif;
793 arg.tos = tos;
794 arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
795 local_bh_disable();
796 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
797 skb, &TCP_SKB_CB(skb)->header.h4.opt,
798 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
799 &arg, arg.iov[0].iov_len);
800
801 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
802 local_bh_enable();
803}
804
805static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
806{
807 struct inet_timewait_sock *tw = inet_twsk(sk);
808 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
809
810 tcp_v4_send_ack(sk, skb,
811 tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
812 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
813 tcp_time_stamp + tcptw->tw_ts_offset,
814 tcptw->tw_ts_recent,
815 tw->tw_bound_dev_if,
816 tcp_twsk_md5_key(tcptw),
817 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
818 tw->tw_tos
819 );
820
821 inet_twsk_put(tw);
822}
823
824static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
825 struct request_sock *req)
826{
827
828
829
830 u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
831 tcp_sk(sk)->snd_nxt;
832
833
834
835
836
837
838 tcp_v4_send_ack(sk, skb, seq,
839 tcp_rsk(req)->rcv_nxt,
840 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
841 tcp_time_stamp + tcp_rsk(req)->ts_off,
842 req->ts_recent,
843 0,
844 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
845 AF_INET),
846 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
847 ip_hdr(skb)->tos);
848}
849
850
851
852
853
854
855static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
856 struct flowi *fl,
857 struct request_sock *req,
858 struct tcp_fastopen_cookie *foc,
859 enum tcp_synack_type synack_type)
860{
861 const struct inet_request_sock *ireq = inet_rsk(req);
862 struct flowi4 fl4;
863 int err = -1;
864 struct sk_buff *skb;
865
866
867 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
868 return -1;
869
870 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
871
872 if (skb) {
873 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
874
875 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
876 ireq->ir_rmt_addr,
877 ireq->opt);
878 err = net_xmit_eval(err);
879 }
880
881 return err;
882}
883
884
885
886
887static void tcp_v4_reqsk_destructor(struct request_sock *req)
888{
889 kfree(inet_rsk(req)->opt);
890}
891
892#ifdef CONFIG_TCP_MD5SIG
893
894
895
896
897
898
899
900struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
901 const union tcp_md5_addr *addr,
902 int family)
903{
904 const struct tcp_sock *tp = tcp_sk(sk);
905 struct tcp_md5sig_key *key;
906 unsigned int size = sizeof(struct in_addr);
907 const struct tcp_md5sig_info *md5sig;
908
909
910 md5sig = rcu_dereference_check(tp->md5sig_info,
911 lockdep_sock_is_held(sk));
912 if (!md5sig)
913 return NULL;
914#if IS_ENABLED(CONFIG_IPV6)
915 if (family == AF_INET6)
916 size = sizeof(struct in6_addr);
917#endif
918 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
919 if (key->family != family)
920 continue;
921 if (!memcmp(&key->addr, addr, size))
922 return key;
923 }
924 return NULL;
925}
926EXPORT_SYMBOL(tcp_md5_do_lookup);
927
928struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
929 const struct sock *addr_sk)
930{
931 const union tcp_md5_addr *addr;
932
933 addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
934 return tcp_md5_do_lookup(sk, addr, AF_INET);
935}
936EXPORT_SYMBOL(tcp_v4_md5_lookup);
937
938
939int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
940 int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
941{
942
943 struct tcp_md5sig_key *key;
944 struct tcp_sock *tp = tcp_sk(sk);
945 struct tcp_md5sig_info *md5sig;
946
947 key = tcp_md5_do_lookup(sk, addr, family);
948 if (key) {
949
950 memcpy(key->key, newkey, newkeylen);
951 key->keylen = newkeylen;
952 return 0;
953 }
954
955 md5sig = rcu_dereference_protected(tp->md5sig_info,
956 lockdep_sock_is_held(sk));
957 if (!md5sig) {
958 md5sig = kmalloc(sizeof(*md5sig), gfp);
959 if (!md5sig)
960 return -ENOMEM;
961
962 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
963 INIT_HLIST_HEAD(&md5sig->head);
964 rcu_assign_pointer(tp->md5sig_info, md5sig);
965 }
966
967 key = sock_kmalloc(sk, sizeof(*key), gfp);
968 if (!key)
969 return -ENOMEM;
970 if (!tcp_alloc_md5sig_pool()) {
971 sock_kfree_s(sk, key, sizeof(*key));
972 return -ENOMEM;
973 }
974
975 memcpy(key->key, newkey, newkeylen);
976 key->keylen = newkeylen;
977 key->family = family;
978 memcpy(&key->addr, addr,
979 (family == AF_INET6) ? sizeof(struct in6_addr) :
980 sizeof(struct in_addr));
981 hlist_add_head_rcu(&key->node, &md5sig->head);
982 return 0;
983}
984EXPORT_SYMBOL(tcp_md5_do_add);
985
986int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
987{
988 struct tcp_md5sig_key *key;
989
990 key = tcp_md5_do_lookup(sk, addr, family);
991 if (!key)
992 return -ENOENT;
993 hlist_del_rcu(&key->node);
994 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
995 kfree_rcu(key, rcu);
996 return 0;
997}
998EXPORT_SYMBOL(tcp_md5_do_del);
999
1000static void tcp_clear_md5_list(struct sock *sk)
1001{
1002 struct tcp_sock *tp = tcp_sk(sk);
1003 struct tcp_md5sig_key *key;
1004 struct hlist_node *n;
1005 struct tcp_md5sig_info *md5sig;
1006
1007 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1008
1009 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
1010 hlist_del_rcu(&key->node);
1011 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1012 kfree_rcu(key, rcu);
1013 }
1014}
1015
1016static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1017 int optlen)
1018{
1019 struct tcp_md5sig cmd;
1020 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1021
1022 if (optlen < sizeof(cmd))
1023 return -EINVAL;
1024
1025 if (copy_from_user(&cmd, optval, sizeof(cmd)))
1026 return -EFAULT;
1027
1028 if (sin->sin_family != AF_INET)
1029 return -EINVAL;
1030
1031 if (!cmd.tcpm_keylen)
1032 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1033 AF_INET);
1034
1035 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1036 return -EINVAL;
1037
1038 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1039 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1040 GFP_KERNEL);
1041}
1042
1043static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
1044 __be32 daddr, __be32 saddr,
1045 const struct tcphdr *th, int nbytes)
1046{
1047 struct tcp4_pseudohdr *bp;
1048 struct scatterlist sg;
1049 struct tcphdr *_th;
1050
1051 bp = hp->scratch;
1052 bp->saddr = saddr;
1053 bp->daddr = daddr;
1054 bp->pad = 0;
1055 bp->protocol = IPPROTO_TCP;
1056 bp->len = cpu_to_be16(nbytes);
1057
1058 _th = (struct tcphdr *)(bp + 1);
1059 memcpy(_th, th, sizeof(*th));
1060 _th->check = 0;
1061
1062 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
1063 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
1064 sizeof(*bp) + sizeof(*th));
1065 return crypto_ahash_update(hp->md5_req);
1066}
1067
1068static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1069 __be32 daddr, __be32 saddr, const struct tcphdr *th)
1070{
1071 struct tcp_md5sig_pool *hp;
1072 struct ahash_request *req;
1073
1074 hp = tcp_get_md5sig_pool();
1075 if (!hp)
1076 goto clear_hash_noput;
1077 req = hp->md5_req;
1078
1079 if (crypto_ahash_init(req))
1080 goto clear_hash;
1081 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
1082 goto clear_hash;
1083 if (tcp_md5_hash_key(hp, key))
1084 goto clear_hash;
1085 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1086 if (crypto_ahash_final(req))
1087 goto clear_hash;
1088
1089 tcp_put_md5sig_pool();
1090 return 0;
1091
1092clear_hash:
1093 tcp_put_md5sig_pool();
1094clear_hash_noput:
1095 memset(md5_hash, 0, 16);
1096 return 1;
1097}
1098
1099int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1100 const struct sock *sk,
1101 const struct sk_buff *skb)
1102{
1103 struct tcp_md5sig_pool *hp;
1104 struct ahash_request *req;
1105 const struct tcphdr *th = tcp_hdr(skb);
1106 __be32 saddr, daddr;
1107
1108 if (sk) {
1109 saddr = sk->sk_rcv_saddr;
1110 daddr = sk->sk_daddr;
1111 } else {
1112 const struct iphdr *iph = ip_hdr(skb);
1113 saddr = iph->saddr;
1114 daddr = iph->daddr;
1115 }
1116
1117 hp = tcp_get_md5sig_pool();
1118 if (!hp)
1119 goto clear_hash_noput;
1120 req = hp->md5_req;
1121
1122 if (crypto_ahash_init(req))
1123 goto clear_hash;
1124
1125 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
1126 goto clear_hash;
1127 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1128 goto clear_hash;
1129 if (tcp_md5_hash_key(hp, key))
1130 goto clear_hash;
1131 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1132 if (crypto_ahash_final(req))
1133 goto clear_hash;
1134
1135 tcp_put_md5sig_pool();
1136 return 0;
1137
1138clear_hash:
1139 tcp_put_md5sig_pool();
1140clear_hash_noput:
1141 memset(md5_hash, 0, 16);
1142 return 1;
1143}
1144EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1145
1146#endif
1147
1148
1149static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
1150 const struct sk_buff *skb)
1151{
1152#ifdef CONFIG_TCP_MD5SIG
1153
1154
1155
1156
1157
1158
1159
1160
1161 const __u8 *hash_location = NULL;
1162 struct tcp_md5sig_key *hash_expected;
1163 const struct iphdr *iph = ip_hdr(skb);
1164 const struct tcphdr *th = tcp_hdr(skb);
1165 int genhash;
1166 unsigned char newhash[16];
1167
1168 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1169 AF_INET);
1170 hash_location = tcp_parse_md5sig_option(th);
1171
1172
1173 if (!hash_expected && !hash_location)
1174 return false;
1175
1176 if (hash_expected && !hash_location) {
1177 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1178 return true;
1179 }
1180
1181 if (!hash_expected && hash_location) {
1182 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1183 return true;
1184 }
1185
1186
1187
1188
1189 genhash = tcp_v4_md5_hash_skb(newhash,
1190 hash_expected,
1191 NULL, skb);
1192
1193 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1194 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
1195 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1196 &iph->saddr, ntohs(th->source),
1197 &iph->daddr, ntohs(th->dest),
1198 genhash ? " tcp_v4_calc_md5_hash failed"
1199 : "");
1200 return true;
1201 }
1202 return false;
1203#endif
1204 return false;
1205}
1206
1207static void tcp_v4_init_req(struct request_sock *req,
1208 const struct sock *sk_listener,
1209 struct sk_buff *skb)
1210{
1211 struct inet_request_sock *ireq = inet_rsk(req);
1212
1213 sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
1214 sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
1215 ireq->opt = tcp_v4_save_options(skb);
1216}
1217
1218static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
1219 struct flowi *fl,
1220 const struct request_sock *req,
1221 bool *strict)
1222{
1223 struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
1224
1225 if (strict) {
1226 if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
1227 *strict = true;
1228 else
1229 *strict = false;
1230 }
1231
1232 return dst;
1233}
1234
1235struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1236 .family = PF_INET,
1237 .obj_size = sizeof(struct tcp_request_sock),
1238 .rtx_syn_ack = tcp_rtx_synack,
1239 .send_ack = tcp_v4_reqsk_send_ack,
1240 .destructor = tcp_v4_reqsk_destructor,
1241 .send_reset = tcp_v4_send_reset,
1242 .syn_ack_timeout = tcp_syn_ack_timeout,
1243};
1244
1245static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1246 .mss_clamp = TCP_MSS_DEFAULT,
1247#ifdef CONFIG_TCP_MD5SIG
1248 .req_md5_lookup = tcp_v4_md5_lookup,
1249 .calc_md5_hash = tcp_v4_md5_hash_skb,
1250#endif
1251 .init_req = tcp_v4_init_req,
1252#ifdef CONFIG_SYN_COOKIES
1253 .cookie_init_seq = cookie_v4_init_sequence,
1254#endif
1255 .route_req = tcp_v4_route_req,
1256 .init_seq = tcp_v4_init_sequence,
1257 .send_synack = tcp_v4_send_synack,
1258};
1259
1260int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1261{
1262
1263 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1264 goto drop;
1265
1266 return tcp_conn_request(&tcp_request_sock_ops,
1267 &tcp_request_sock_ipv4_ops, sk, skb);
1268
1269drop:
1270 tcp_listendrop(sk);
1271 return 0;
1272}
1273EXPORT_SYMBOL(tcp_v4_conn_request);
1274
1275
1276
1277
1278
1279
1280struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1281 struct request_sock *req,
1282 struct dst_entry *dst,
1283 struct request_sock *req_unhash,
1284 bool *own_req)
1285{
1286 struct inet_request_sock *ireq;
1287 struct inet_sock *newinet;
1288 struct tcp_sock *newtp;
1289 struct sock *newsk;
1290#ifdef CONFIG_TCP_MD5SIG
1291 struct tcp_md5sig_key *key;
1292#endif
1293 struct ip_options_rcu *inet_opt;
1294
1295 if (sk_acceptq_is_full(sk))
1296 goto exit_overflow;
1297
1298 newsk = tcp_create_openreq_child(sk, req, skb);
1299 if (!newsk)
1300 goto exit_nonewsk;
1301
1302 newsk->sk_gso_type = SKB_GSO_TCPV4;
1303 inet_sk_rx_dst_set(newsk, skb);
1304
1305 newtp = tcp_sk(newsk);
1306 newinet = inet_sk(newsk);
1307 ireq = inet_rsk(req);
1308 sk_daddr_set(newsk, ireq->ir_rmt_addr);
1309 sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
1310 newsk->sk_bound_dev_if = ireq->ir_iif;
1311 newinet->inet_saddr = ireq->ir_loc_addr;
1312 inet_opt = ireq->opt;
1313 rcu_assign_pointer(newinet->inet_opt, inet_opt);
1314 ireq->opt = NULL;
1315 newinet->mc_index = inet_iif(skb);
1316 newinet->mc_ttl = ip_hdr(skb)->ttl;
1317 newinet->rcv_tos = ip_hdr(skb)->tos;
1318 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1319 if (inet_opt)
1320 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1321 newinet->inet_id = newtp->write_seq ^ jiffies;
1322
1323 if (!dst) {
1324 dst = inet_csk_route_child_sock(sk, newsk, req);
1325 if (!dst)
1326 goto put_and_exit;
1327 } else {
1328
1329 }
1330 sk_setup_caps(newsk, dst);
1331
1332 tcp_ca_openreq_child(newsk, dst);
1333
1334 tcp_sync_mss(newsk, dst_mtu(dst));
1335 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1336
1337 tcp_initialize_rcv_mss(newsk);
1338
1339#ifdef CONFIG_TCP_MD5SIG
1340
1341 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1342 AF_INET);
1343 if (key) {
1344
1345
1346
1347
1348
1349
1350 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1351 AF_INET, key->key, key->keylen, GFP_ATOMIC);
1352 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1353 }
1354#endif
1355
1356 if (__inet_inherit_port(sk, newsk) < 0)
1357 goto put_and_exit;
1358 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1359 if (*own_req)
1360 tcp_move_syn(newtp, req);
1361
1362 return newsk;
1363
1364exit_overflow:
1365 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1366exit_nonewsk:
1367 dst_release(dst);
1368exit:
1369 tcp_listendrop(sk);
1370 return NULL;
1371put_and_exit:
1372 inet_csk_prepare_forced_close(newsk);
1373 tcp_done(newsk);
1374 goto exit;
1375}
1376EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1377
1378static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
1379{
1380#ifdef CONFIG_SYN_COOKIES
1381 const struct tcphdr *th = tcp_hdr(skb);
1382
1383 if (!th->syn)
1384 sk = cookie_v4_check(sk, skb);
1385#endif
1386 return sk;
1387}
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1398{
1399 struct sock *rsk;
1400
1401 if (sk->sk_state == TCP_ESTABLISHED) {
1402 struct dst_entry *dst = sk->sk_rx_dst;
1403
1404 sock_rps_save_rxhash(sk, skb);
1405 sk_mark_napi_id(sk, skb);
1406 if (dst) {
1407 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1408 !dst->ops->check(dst, 0)) {
1409 dst_release(dst);
1410 sk->sk_rx_dst = NULL;
1411 }
1412 }
1413 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1414 return 0;
1415 }
1416
1417 if (tcp_checksum_complete(skb))
1418 goto csum_err;
1419
1420 if (sk->sk_state == TCP_LISTEN) {
1421 struct sock *nsk = tcp_v4_cookie_check(sk, skb);
1422
1423 if (!nsk)
1424 goto discard;
1425 if (nsk != sk) {
1426 sock_rps_save_rxhash(nsk, skb);
1427 sk_mark_napi_id(nsk, skb);
1428 if (tcp_child_process(sk, nsk, skb)) {
1429 rsk = nsk;
1430 goto reset;
1431 }
1432 return 0;
1433 }
1434 } else
1435 sock_rps_save_rxhash(sk, skb);
1436
1437 if (tcp_rcv_state_process(sk, skb)) {
1438 rsk = sk;
1439 goto reset;
1440 }
1441 return 0;
1442
1443reset:
1444 tcp_v4_send_reset(rsk, skb);
1445discard:
1446 kfree_skb(skb);
1447
1448
1449
1450
1451
1452 return 0;
1453
1454csum_err:
1455 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1456 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1457 goto discard;
1458}
1459EXPORT_SYMBOL(tcp_v4_do_rcv);
1460
1461void tcp_v4_early_demux(struct sk_buff *skb)
1462{
1463 const struct iphdr *iph;
1464 const struct tcphdr *th;
1465 struct sock *sk;
1466
1467 if (skb->pkt_type != PACKET_HOST)
1468 return;
1469
1470 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1471 return;
1472
1473 iph = ip_hdr(skb);
1474 th = tcp_hdr(skb);
1475
1476 if (th->doff < sizeof(struct tcphdr) / 4)
1477 return;
1478
1479 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1480 iph->saddr, th->source,
1481 iph->daddr, ntohs(th->dest),
1482 skb->skb_iif);
1483 if (sk) {
1484 skb->sk = sk;
1485 skb->destructor = sock_edemux;
1486 if (sk_fullsock(sk)) {
1487 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1488
1489 if (dst)
1490 dst = dst_check(dst, 0);
1491 if (dst &&
1492 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1493 skb_dst_set_noref(skb, dst);
1494 }
1495 }
1496}
1497
1498
1499
1500
1501
1502
1503
1504
1505bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1506{
1507 struct tcp_sock *tp = tcp_sk(sk);
1508
1509 if (sysctl_tcp_low_latency || !tp->ucopy.task)
1510 return false;
1511
1512 if (skb->len <= tcp_hdrlen(skb) &&
1513 skb_queue_len(&tp->ucopy.prequeue) == 0)
1514 return false;
1515
1516
1517
1518
1519
1520
1521
1522 if (likely(sk->sk_rx_dst))
1523 skb_dst_drop(skb);
1524 else
1525 skb_dst_force_safe(skb);
1526
1527 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1528 tp->ucopy.memory += skb->truesize;
1529 if (skb_queue_len(&tp->ucopy.prequeue) >= 32 ||
1530 tp->ucopy.memory + atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
1531 struct sk_buff *skb1;
1532
1533 BUG_ON(sock_owned_by_user(sk));
1534 __NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED,
1535 skb_queue_len(&tp->ucopy.prequeue));
1536
1537 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
1538 sk_backlog_rcv(sk, skb1);
1539
1540 tp->ucopy.memory = 0;
1541 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1542 wake_up_interruptible_sync_poll(sk_sleep(sk),
1543 POLLIN | POLLRDNORM | POLLRDBAND);
1544 if (!inet_csk_ack_scheduled(sk))
1545 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1546 (3 * tcp_rto_min(sk)) / 4,
1547 TCP_RTO_MAX);
1548 }
1549 return true;
1550}
1551EXPORT_SYMBOL(tcp_prequeue);
1552
1553bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
1554{
1555 u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
1556
1557
1558
1559
1560
1561 limit += 64*1024;
1562
1563
1564
1565
1566
1567
1568
1569 skb_condense(skb);
1570
1571 if (unlikely(sk_add_backlog(sk, skb, limit))) {
1572 bh_unlock_sock(sk);
1573 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
1574 return true;
1575 }
1576 return false;
1577}
1578EXPORT_SYMBOL(tcp_add_backlog);
1579
1580int tcp_filter(struct sock *sk, struct sk_buff *skb)
1581{
1582 struct tcphdr *th = (struct tcphdr *)skb->data;
1583 unsigned int eaten = skb->len;
1584 int err;
1585
1586 err = sk_filter_trim_cap(sk, skb, th->doff * 4);
1587 if (!err) {
1588 eaten -= skb->len;
1589 TCP_SKB_CB(skb)->end_seq -= eaten;
1590 }
1591 return err;
1592}
1593EXPORT_SYMBOL(tcp_filter);
1594
1595
1596
1597
1598
1599int tcp_v4_rcv(struct sk_buff *skb)
1600{
1601 struct net *net = dev_net(skb->dev);
1602 const struct iphdr *iph;
1603 const struct tcphdr *th;
1604 bool refcounted;
1605 struct sock *sk;
1606 int ret;
1607
1608 if (skb->pkt_type != PACKET_HOST)
1609 goto discard_it;
1610
1611
1612 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1613
1614 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1615 goto discard_it;
1616
1617 th = (const struct tcphdr *)skb->data;
1618
1619 if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
1620 goto bad_packet;
1621 if (!pskb_may_pull(skb, th->doff * 4))
1622 goto discard_it;
1623
1624
1625
1626
1627
1628
1629 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
1630 goto csum_error;
1631
1632 th = (const struct tcphdr *)skb->data;
1633 iph = ip_hdr(skb);
1634
1635
1636
1637 memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1638 sizeof(struct inet_skb_parm));
1639 barrier();
1640
1641 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1642 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1643 skb->len - th->doff * 4);
1644 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1645 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1646 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1647 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1648 TCP_SKB_CB(skb)->sacked = 0;
1649
1650lookup:
1651 sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
1652 th->dest, &refcounted);
1653 if (!sk)
1654 goto no_tcp_socket;
1655
1656process:
1657 if (sk->sk_state == TCP_TIME_WAIT)
1658 goto do_time_wait;
1659
1660 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1661 struct request_sock *req = inet_reqsk(sk);
1662 struct sock *nsk;
1663
1664 sk = req->rsk_listener;
1665 if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
1666 sk_drops_add(sk, skb);
1667 reqsk_put(req);
1668 goto discard_it;
1669 }
1670 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1671 inet_csk_reqsk_queue_drop_and_put(sk, req);
1672 goto lookup;
1673 }
1674
1675
1676
1677 sock_hold(sk);
1678 refcounted = true;
1679 nsk = tcp_check_req(sk, skb, req, false);
1680 if (!nsk) {
1681 reqsk_put(req);
1682 goto discard_and_relse;
1683 }
1684 if (nsk == sk) {
1685 reqsk_put(req);
1686 } else if (tcp_child_process(sk, nsk, skb)) {
1687 tcp_v4_send_reset(nsk, skb);
1688 goto discard_and_relse;
1689 } else {
1690 sock_put(sk);
1691 return 0;
1692 }
1693 }
1694 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1695 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1696 goto discard_and_relse;
1697 }
1698
1699 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1700 goto discard_and_relse;
1701
1702 if (tcp_v4_inbound_md5_hash(sk, skb))
1703 goto discard_and_relse;
1704
1705 nf_reset(skb);
1706
1707 if (tcp_filter(sk, skb))
1708 goto discard_and_relse;
1709 th = (const struct tcphdr *)skb->data;
1710 iph = ip_hdr(skb);
1711
1712 skb->dev = NULL;
1713
1714 if (sk->sk_state == TCP_LISTEN) {
1715 ret = tcp_v4_do_rcv(sk, skb);
1716 goto put_and_return;
1717 }
1718
1719 sk_incoming_cpu_update(sk);
1720
1721 bh_lock_sock_nested(sk);
1722 tcp_segs_in(tcp_sk(sk), skb);
1723 ret = 0;
1724 if (!sock_owned_by_user(sk)) {
1725 if (!tcp_prequeue(sk, skb))
1726 ret = tcp_v4_do_rcv(sk, skb);
1727 } else if (tcp_add_backlog(sk, skb)) {
1728 goto discard_and_relse;
1729 }
1730 bh_unlock_sock(sk);
1731
1732put_and_return:
1733 if (refcounted)
1734 sock_put(sk);
1735
1736 return ret;
1737
1738no_tcp_socket:
1739 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1740 goto discard_it;
1741
1742 if (tcp_checksum_complete(skb)) {
1743csum_error:
1744 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1745bad_packet:
1746 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1747 } else {
1748 tcp_v4_send_reset(NULL, skb);
1749 }
1750
1751discard_it:
1752
1753 kfree_skb(skb);
1754 return 0;
1755
1756discard_and_relse:
1757 sk_drops_add(sk, skb);
1758 if (refcounted)
1759 sock_put(sk);
1760 goto discard_it;
1761
1762do_time_wait:
1763 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1764 inet_twsk_put(inet_twsk(sk));
1765 goto discard_it;
1766 }
1767
1768 if (tcp_checksum_complete(skb)) {
1769 inet_twsk_put(inet_twsk(sk));
1770 goto csum_error;
1771 }
1772 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1773 case TCP_TW_SYN: {
1774 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1775 &tcp_hashinfo, skb,
1776 __tcp_hdrlen(th),
1777 iph->saddr, th->source,
1778 iph->daddr, th->dest,
1779 inet_iif(skb));
1780 if (sk2) {
1781 inet_twsk_deschedule_put(inet_twsk(sk));
1782 sk = sk2;
1783 refcounted = false;
1784 goto process;
1785 }
1786
1787 }
1788 case TCP_TW_ACK:
1789 tcp_v4_timewait_ack(sk, skb);
1790 break;
1791 case TCP_TW_RST:
1792 tcp_v4_send_reset(sk, skb);
1793 inet_twsk_deschedule_put(inet_twsk(sk));
1794 goto discard_it;
1795 case TCP_TW_SUCCESS:;
1796 }
1797 goto discard_it;
1798}
1799
1800static struct timewait_sock_ops tcp_timewait_sock_ops = {
1801 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1802 .twsk_unique = tcp_twsk_unique,
1803 .twsk_destructor= tcp_twsk_destructor,
1804};
1805
1806void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
1807{
1808 struct dst_entry *dst = skb_dst(skb);
1809
1810 if (dst && dst_hold_safe(dst)) {
1811 sk->sk_rx_dst = dst;
1812 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1813 }
1814}
1815EXPORT_SYMBOL(inet_sk_rx_dst_set);
1816
1817const struct inet_connection_sock_af_ops ipv4_specific = {
1818 .queue_xmit = ip_queue_xmit,
1819 .send_check = tcp_v4_send_check,
1820 .rebuild_header = inet_sk_rebuild_header,
1821 .sk_rx_dst_set = inet_sk_rx_dst_set,
1822 .conn_request = tcp_v4_conn_request,
1823 .syn_recv_sock = tcp_v4_syn_recv_sock,
1824 .net_header_len = sizeof(struct iphdr),
1825 .setsockopt = ip_setsockopt,
1826 .getsockopt = ip_getsockopt,
1827 .addr2sockaddr = inet_csk_addr2sockaddr,
1828 .sockaddr_len = sizeof(struct sockaddr_in),
1829#ifdef CONFIG_COMPAT
1830 .compat_setsockopt = compat_ip_setsockopt,
1831 .compat_getsockopt = compat_ip_getsockopt,
1832#endif
1833 .mtu_reduced = tcp_v4_mtu_reduced,
1834};
1835EXPORT_SYMBOL(ipv4_specific);
1836
1837#ifdef CONFIG_TCP_MD5SIG
1838static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1839 .md5_lookup = tcp_v4_md5_lookup,
1840 .calc_md5_hash = tcp_v4_md5_hash_skb,
1841 .md5_parse = tcp_v4_parse_md5_keys,
1842};
1843#endif
1844
1845
1846
1847
1848static int tcp_v4_init_sock(struct sock *sk)
1849{
1850 struct inet_connection_sock *icsk = inet_csk(sk);
1851
1852 tcp_init_sock(sk);
1853
1854 icsk->icsk_af_ops = &ipv4_specific;
1855
1856#ifdef CONFIG_TCP_MD5SIG
1857 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1858#endif
1859
1860 return 0;
1861}
1862
1863void tcp_v4_destroy_sock(struct sock *sk)
1864{
1865 struct tcp_sock *tp = tcp_sk(sk);
1866
1867 tcp_clear_xmit_timers(sk);
1868
1869 tcp_cleanup_congestion_control(sk);
1870
1871
1872 tcp_write_queue_purge(sk);
1873
1874
1875 skb_rbtree_purge(&tp->out_of_order_queue);
1876
1877#ifdef CONFIG_TCP_MD5SIG
1878
1879 if (tp->md5sig_info) {
1880 tcp_clear_md5_list(sk);
1881 kfree_rcu(tp->md5sig_info, rcu);
1882 tp->md5sig_info = NULL;
1883 }
1884#endif
1885
1886
1887 __skb_queue_purge(&tp->ucopy.prequeue);
1888
1889
1890 if (inet_csk(sk)->icsk_bind_hash)
1891 inet_put_port(sk);
1892
1893 BUG_ON(tp->fastopen_rsk);
1894
1895
1896 tcp_free_fastopen_req(tp);
1897 tcp_saved_syn_free(tp);
1898
1899 sk_sockets_allocated_dec(sk);
1900}
1901EXPORT_SYMBOL(tcp_v4_destroy_sock);
1902
1903#ifdef CONFIG_PROC_FS
1904
1905
1906
1907
1908
1909
1910
1911static void *listening_get_next(struct seq_file *seq, void *cur)
1912{
1913 struct tcp_iter_state *st = seq->private;
1914 struct net *net = seq_file_net(seq);
1915 struct inet_listen_hashbucket *ilb;
1916 struct sock *sk = cur;
1917
1918 if (!sk) {
1919get_head:
1920 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1921 spin_lock(&ilb->lock);
1922 sk = sk_head(&ilb->head);
1923 st->offset = 0;
1924 goto get_sk;
1925 }
1926 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1927 ++st->num;
1928 ++st->offset;
1929
1930 sk = sk_next(sk);
1931get_sk:
1932 sk_for_each_from(sk) {
1933 if (!net_eq(sock_net(sk), net))
1934 continue;
1935 if (sk->sk_family == st->family)
1936 return sk;
1937 }
1938 spin_unlock(&ilb->lock);
1939 st->offset = 0;
1940 if (++st->bucket < INET_LHTABLE_SIZE)
1941 goto get_head;
1942 return NULL;
1943}
1944
1945static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1946{
1947 struct tcp_iter_state *st = seq->private;
1948 void *rc;
1949
1950 st->bucket = 0;
1951 st->offset = 0;
1952 rc = listening_get_next(seq, NULL);
1953
1954 while (rc && *pos) {
1955 rc = listening_get_next(seq, rc);
1956 --*pos;
1957 }
1958 return rc;
1959}
1960
1961static inline bool empty_bucket(const struct tcp_iter_state *st)
1962{
1963 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
1964}
1965
1966
1967
1968
1969
1970static void *established_get_first(struct seq_file *seq)
1971{
1972 struct tcp_iter_state *st = seq->private;
1973 struct net *net = seq_file_net(seq);
1974 void *rc = NULL;
1975
1976 st->offset = 0;
1977 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1978 struct sock *sk;
1979 struct hlist_nulls_node *node;
1980 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1981
1982
1983 if (empty_bucket(st))
1984 continue;
1985
1986 spin_lock_bh(lock);
1987 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
1988 if (sk->sk_family != st->family ||
1989 !net_eq(sock_net(sk), net)) {
1990 continue;
1991 }
1992 rc = sk;
1993 goto out;
1994 }
1995 spin_unlock_bh(lock);
1996 }
1997out:
1998 return rc;
1999}
2000
2001static void *established_get_next(struct seq_file *seq, void *cur)
2002{
2003 struct sock *sk = cur;
2004 struct hlist_nulls_node *node;
2005 struct tcp_iter_state *st = seq->private;
2006 struct net *net = seq_file_net(seq);
2007
2008 ++st->num;
2009 ++st->offset;
2010
2011 sk = sk_nulls_next(sk);
2012
2013 sk_nulls_for_each_from(sk, node) {
2014 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2015 return sk;
2016 }
2017
2018 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2019 ++st->bucket;
2020 return established_get_first(seq);
2021}
2022
2023static void *established_get_idx(struct seq_file *seq, loff_t pos)
2024{
2025 struct tcp_iter_state *st = seq->private;
2026 void *rc;
2027
2028 st->bucket = 0;
2029 rc = established_get_first(seq);
2030
2031 while (rc && pos) {
2032 rc = established_get_next(seq, rc);
2033 --pos;
2034 }
2035 return rc;
2036}
2037
2038static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2039{
2040 void *rc;
2041 struct tcp_iter_state *st = seq->private;
2042
2043 st->state = TCP_SEQ_STATE_LISTENING;
2044 rc = listening_get_idx(seq, &pos);
2045
2046 if (!rc) {
2047 st->state = TCP_SEQ_STATE_ESTABLISHED;
2048 rc = established_get_idx(seq, pos);
2049 }
2050
2051 return rc;
2052}
2053
2054static void *tcp_seek_last_pos(struct seq_file *seq)
2055{
2056 struct tcp_iter_state *st = seq->private;
2057 int offset = st->offset;
2058 int orig_num = st->num;
2059 void *rc = NULL;
2060
2061 switch (st->state) {
2062 case TCP_SEQ_STATE_LISTENING:
2063 if (st->bucket >= INET_LHTABLE_SIZE)
2064 break;
2065 st->state = TCP_SEQ_STATE_LISTENING;
2066 rc = listening_get_next(seq, NULL);
2067 while (offset-- && rc)
2068 rc = listening_get_next(seq, rc);
2069 if (rc)
2070 break;
2071 st->bucket = 0;
2072 st->state = TCP_SEQ_STATE_ESTABLISHED;
2073
2074 case TCP_SEQ_STATE_ESTABLISHED:
2075 if (st->bucket > tcp_hashinfo.ehash_mask)
2076 break;
2077 rc = established_get_first(seq);
2078 while (offset-- && rc)
2079 rc = established_get_next(seq, rc);
2080 }
2081
2082 st->num = orig_num;
2083
2084 return rc;
2085}
2086
2087static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2088{
2089 struct tcp_iter_state *st = seq->private;
2090 void *rc;
2091
2092 if (*pos && *pos == st->last_pos) {
2093 rc = tcp_seek_last_pos(seq);
2094 if (rc)
2095 goto out;
2096 }
2097
2098 st->state = TCP_SEQ_STATE_LISTENING;
2099 st->num = 0;
2100 st->bucket = 0;
2101 st->offset = 0;
2102 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2103
2104out:
2105 st->last_pos = *pos;
2106 return rc;
2107}
2108
2109static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2110{
2111 struct tcp_iter_state *st = seq->private;
2112 void *rc = NULL;
2113
2114 if (v == SEQ_START_TOKEN) {
2115 rc = tcp_get_idx(seq, 0);
2116 goto out;
2117 }
2118
2119 switch (st->state) {
2120 case TCP_SEQ_STATE_LISTENING:
2121 rc = listening_get_next(seq, v);
2122 if (!rc) {
2123 st->state = TCP_SEQ_STATE_ESTABLISHED;
2124 st->bucket = 0;
2125 st->offset = 0;
2126 rc = established_get_first(seq);
2127 }
2128 break;
2129 case TCP_SEQ_STATE_ESTABLISHED:
2130 rc = established_get_next(seq, v);
2131 break;
2132 }
2133out:
2134 ++*pos;
2135 st->last_pos = *pos;
2136 return rc;
2137}
2138
2139static void tcp_seq_stop(struct seq_file *seq, void *v)
2140{
2141 struct tcp_iter_state *st = seq->private;
2142
2143 switch (st->state) {
2144 case TCP_SEQ_STATE_LISTENING:
2145 if (v != SEQ_START_TOKEN)
2146 spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
2147 break;
2148 case TCP_SEQ_STATE_ESTABLISHED:
2149 if (v)
2150 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2151 break;
2152 }
2153}
2154
2155int tcp_seq_open(struct inode *inode, struct file *file)
2156{
2157 struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
2158 struct tcp_iter_state *s;
2159 int err;
2160
2161 err = seq_open_net(inode, file, &afinfo->seq_ops,
2162 sizeof(struct tcp_iter_state));
2163 if (err < 0)
2164 return err;
2165
2166 s = ((struct seq_file *)file->private_data)->private;
2167 s->family = afinfo->family;
2168 s->last_pos = 0;
2169 return 0;
2170}
2171EXPORT_SYMBOL(tcp_seq_open);
2172
2173int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2174{
2175 int rc = 0;
2176 struct proc_dir_entry *p;
2177
2178 afinfo->seq_ops.start = tcp_seq_start;
2179 afinfo->seq_ops.next = tcp_seq_next;
2180 afinfo->seq_ops.stop = tcp_seq_stop;
2181
2182 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2183 afinfo->seq_fops, afinfo);
2184 if (!p)
2185 rc = -ENOMEM;
2186 return rc;
2187}
2188EXPORT_SYMBOL(tcp_proc_register);
2189
2190void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2191{
2192 remove_proc_entry(afinfo->name, net->proc_net);
2193}
2194EXPORT_SYMBOL(tcp_proc_unregister);
2195
2196static void get_openreq4(const struct request_sock *req,
2197 struct seq_file *f, int i)
2198{
2199 const struct inet_request_sock *ireq = inet_rsk(req);
2200 long delta = req->rsk_timer.expires - jiffies;
2201
2202 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2203 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
2204 i,
2205 ireq->ir_loc_addr,
2206 ireq->ir_num,
2207 ireq->ir_rmt_addr,
2208 ntohs(ireq->ir_rmt_port),
2209 TCP_SYN_RECV,
2210 0, 0,
2211 1,
2212 jiffies_delta_to_clock_t(delta),
2213 req->num_timeout,
2214 from_kuid_munged(seq_user_ns(f),
2215 sock_i_uid(req->rsk_listener)),
2216 0,
2217 0,
2218 0,
2219 req);
2220}
2221
2222static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2223{
2224 int timer_active;
2225 unsigned long timer_expires;
2226 const struct tcp_sock *tp = tcp_sk(sk);
2227 const struct inet_connection_sock *icsk = inet_csk(sk);
2228 const struct inet_sock *inet = inet_sk(sk);
2229 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2230 __be32 dest = inet->inet_daddr;
2231 __be32 src = inet->inet_rcv_saddr;
2232 __u16 destp = ntohs(inet->inet_dport);
2233 __u16 srcp = ntohs(inet->inet_sport);
2234 int rx_queue;
2235 int state;
2236
2237 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2238 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2239 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2240 timer_active = 1;
2241 timer_expires = icsk->icsk_timeout;
2242 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2243 timer_active = 4;
2244 timer_expires = icsk->icsk_timeout;
2245 } else if (timer_pending(&sk->sk_timer)) {
2246 timer_active = 2;
2247 timer_expires = sk->sk_timer.expires;
2248 } else {
2249 timer_active = 0;
2250 timer_expires = jiffies;
2251 }
2252
2253 state = sk_state_load(sk);
2254 if (state == TCP_LISTEN)
2255 rx_queue = sk->sk_ack_backlog;
2256 else
2257
2258
2259
2260 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2261
2262 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2263 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
2264 i, src, srcp, dest, destp, state,
2265 tp->write_seq - tp->snd_una,
2266 rx_queue,
2267 timer_active,
2268 jiffies_delta_to_clock_t(timer_expires - jiffies),
2269 icsk->icsk_retransmits,
2270 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
2271 icsk->icsk_probes_out,
2272 sock_i_ino(sk),
2273 atomic_read(&sk->sk_refcnt), sk,
2274 jiffies_to_clock_t(icsk->icsk_rto),
2275 jiffies_to_clock_t(icsk->icsk_ack.ato),
2276 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2277 tp->snd_cwnd,
2278 state == TCP_LISTEN ?
2279 fastopenq->max_qlen :
2280 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
2281}
2282
2283static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2284 struct seq_file *f, int i)
2285{
2286 long delta = tw->tw_timer.expires - jiffies;
2287 __be32 dest, src;
2288 __u16 destp, srcp;
2289
2290 dest = tw->tw_daddr;
2291 src = tw->tw_rcv_saddr;
2292 destp = ntohs(tw->tw_dport);
2293 srcp = ntohs(tw->tw_sport);
2294
2295 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2296 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
2297 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2298 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2299 atomic_read(&tw->tw_refcnt), tw);
2300}
2301
2302#define TMPSZ 150
2303
2304static int tcp4_seq_show(struct seq_file *seq, void *v)
2305{
2306 struct tcp_iter_state *st;
2307 struct sock *sk = v;
2308
2309 seq_setwidth(seq, TMPSZ - 1);
2310 if (v == SEQ_START_TOKEN) {
2311 seq_puts(seq, " sl local_address rem_address st tx_queue "
2312 "rx_queue tr tm->when retrnsmt uid timeout "
2313 "inode");
2314 goto out;
2315 }
2316 st = seq->private;
2317
2318 if (sk->sk_state == TCP_TIME_WAIT)
2319 get_timewait4_sock(v, seq, st->num);
2320 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2321 get_openreq4(v, seq, st->num);
2322 else
2323 get_tcp4_sock(v, seq, st->num);
2324out:
2325 seq_pad(seq, '\n');
2326 return 0;
2327}
2328
2329static const struct file_operations tcp_afinfo_seq_fops = {
2330 .owner = THIS_MODULE,
2331 .open = tcp_seq_open,
2332 .read = seq_read,
2333 .llseek = seq_lseek,
2334 .release = seq_release_net
2335};
2336
2337static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2338 .name = "tcp",
2339 .family = AF_INET,
2340 .seq_fops = &tcp_afinfo_seq_fops,
2341 .seq_ops = {
2342 .show = tcp4_seq_show,
2343 },
2344};
2345
2346static int __net_init tcp4_proc_init_net(struct net *net)
2347{
2348 return tcp_proc_register(net, &tcp4_seq_afinfo);
2349}
2350
2351static void __net_exit tcp4_proc_exit_net(struct net *net)
2352{
2353 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2354}
2355
2356static struct pernet_operations tcp4_net_ops = {
2357 .init = tcp4_proc_init_net,
2358 .exit = tcp4_proc_exit_net,
2359};
2360
2361int __init tcp4_proc_init(void)
2362{
2363 return register_pernet_subsys(&tcp4_net_ops);
2364}
2365
2366void tcp4_proc_exit(void)
2367{
2368 unregister_pernet_subsys(&tcp4_net_ops);
2369}
2370#endif
2371
2372struct proto tcp_prot = {
2373 .name = "TCP",
2374 .owner = THIS_MODULE,
2375 .close = tcp_close,
2376 .connect = tcp_v4_connect,
2377 .disconnect = tcp_disconnect,
2378 .accept = inet_csk_accept,
2379 .ioctl = tcp_ioctl,
2380 .init = tcp_v4_init_sock,
2381 .destroy = tcp_v4_destroy_sock,
2382 .shutdown = tcp_shutdown,
2383 .setsockopt = tcp_setsockopt,
2384 .getsockopt = tcp_getsockopt,
2385 .keepalive = tcp_set_keepalive,
2386 .recvmsg = tcp_recvmsg,
2387 .sendmsg = tcp_sendmsg,
2388 .sendpage = tcp_sendpage,
2389 .backlog_rcv = tcp_v4_do_rcv,
2390 .release_cb = tcp_release_cb,
2391 .hash = inet_hash,
2392 .unhash = inet_unhash,
2393 .get_port = inet_csk_get_port,
2394 .enter_memory_pressure = tcp_enter_memory_pressure,
2395 .stream_memory_free = tcp_stream_memory_free,
2396 .sockets_allocated = &tcp_sockets_allocated,
2397 .orphan_count = &tcp_orphan_count,
2398 .memory_allocated = &tcp_memory_allocated,
2399 .memory_pressure = &tcp_memory_pressure,
2400 .sysctl_mem = sysctl_tcp_mem,
2401 .sysctl_wmem = sysctl_tcp_wmem,
2402 .sysctl_rmem = sysctl_tcp_rmem,
2403 .max_header = MAX_TCP_HEADER,
2404 .obj_size = sizeof(struct tcp_sock),
2405 .slab_flags = SLAB_DESTROY_BY_RCU,
2406 .twsk_prot = &tcp_timewait_sock_ops,
2407 .rsk_prot = &tcp_request_sock_ops,
2408 .h.hashinfo = &tcp_hashinfo,
2409 .no_autobind = true,
2410#ifdef CONFIG_COMPAT
2411 .compat_setsockopt = compat_tcp_setsockopt,
2412 .compat_getsockopt = compat_tcp_getsockopt,
2413#endif
2414 .diag_destroy = tcp_abort,
2415};
2416EXPORT_SYMBOL(tcp_prot);
2417
2418static void __net_exit tcp_sk_exit(struct net *net)
2419{
2420 int cpu;
2421
2422 for_each_possible_cpu(cpu)
2423 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2424 free_percpu(net->ipv4.tcp_sk);
2425}
2426
2427static int __net_init tcp_sk_init(struct net *net)
2428{
2429 int res, cpu, cnt;
2430
2431 net->ipv4.tcp_sk = alloc_percpu(struct sock *);
2432 if (!net->ipv4.tcp_sk)
2433 return -ENOMEM;
2434
2435 for_each_possible_cpu(cpu) {
2436 struct sock *sk;
2437
2438 res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
2439 IPPROTO_TCP, net);
2440 if (res)
2441 goto fail;
2442 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2443 *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2444 }
2445
2446 net->ipv4.sysctl_tcp_ecn = 2;
2447 net->ipv4.sysctl_tcp_ecn_fallback = 1;
2448
2449 net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
2450 net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
2451 net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
2452
2453 net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
2454 net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
2455 net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
2456
2457 net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
2458 net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
2459 net->ipv4.sysctl_tcp_syncookies = 1;
2460 net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
2461 net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
2462 net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
2463 net->ipv4.sysctl_tcp_orphan_retries = 0;
2464 net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
2465 net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
2466 net->ipv4.sysctl_tcp_tw_reuse = 0;
2467
2468 cnt = tcp_hashinfo.ehash_mask + 1;
2469 net->ipv4.tcp_death_row.sysctl_tw_recycle = 0;
2470 net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2;
2471 net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
2472
2473 net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256);
2474
2475 return 0;
2476fail:
2477 tcp_sk_exit(net);
2478
2479 return res;
2480}
2481
2482static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2483{
2484 inet_twsk_purge(&tcp_hashinfo, AF_INET);
2485}
2486
2487static struct pernet_operations __net_initdata tcp_sk_ops = {
2488 .init = tcp_sk_init,
2489 .exit = tcp_sk_exit,
2490 .exit_batch = tcp_sk_exit_batch,
2491};
2492
2493void __init tcp_v4_init(void)
2494{
2495 if (register_pernet_subsys(&tcp_sk_ops))
2496 panic("Failed to create the TCP control socket.\n");
2497}
2498