1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53#define pr_fmt(fmt) "TCP: " fmt
54
55#include <linux/bottom_half.h>
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
64#include <linux/slab.h>
65
66#include <net/net_namespace.h>
67#include <net/icmp.h>
68#include <net/inet_hashtables.h>
69#include <net/tcp.h>
70#include <net/transp_v6.h>
71#include <net/ipv6.h>
72#include <net/inet_common.h>
73#include <net/timewait_sock.h>
74#include <net/xfrm.h>
75#include <net/netdma.h>
76#include <net/secure_seq.h>
77#include <net/tcp_memcontrol.h>
78
79#include <linux/inet.h>
80#include <linux/ipv6.h>
81#include <linux/stddef.h>
82#include <linux/proc_fs.h>
83#include <linux/seq_file.h>
84
85#include <linux/crypto.h>
86#include <linux/scatterlist.h>
87
88int sysctl_tcp_tw_reuse __read_mostly;
89int sysctl_tcp_low_latency __read_mostly;
90EXPORT_SYMBOL(sysctl_tcp_low_latency);
91
92
93#ifdef CONFIG_TCP_MD5SIG
94static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
95 __be32 daddr, __be32 saddr, const struct tcphdr *th);
96#endif
97
98struct inet_hashinfo tcp_hashinfo;
99EXPORT_SYMBOL(tcp_hashinfo);
100
101static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
102{
103 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
104 ip_hdr(skb)->saddr,
105 tcp_hdr(skb)->dest,
106 tcp_hdr(skb)->source);
107}
108
109int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
110{
111 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
112 struct tcp_sock *tp = tcp_sk(sk);
113
114
115
116
117
118
119
120
121
122
123
124
125 if (tcptw->tw_ts_recent_stamp &&
126 (twp == NULL || (sysctl_tcp_tw_reuse &&
127 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
128 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
129 if (tp->write_seq == 0)
130 tp->write_seq = 1;
131 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
132 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
133 sock_hold(sktw);
134 return 1;
135 }
136
137 return 0;
138}
139EXPORT_SYMBOL_GPL(tcp_twsk_unique);
140
141static int tcp_repair_connect(struct sock *sk)
142{
143 tcp_connect_init(sk);
144 tcp_finish_connect(sk, NULL);
145
146 return 0;
147}
148
149
150int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
151{
152 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
153 struct inet_sock *inet = inet_sk(sk);
154 struct tcp_sock *tp = tcp_sk(sk);
155 __be16 orig_sport, orig_dport;
156 __be32 daddr, nexthop;
157 struct flowi4 *fl4;
158 struct rtable *rt;
159 int err;
160 struct ip_options_rcu *inet_opt;
161
162 if (addr_len < sizeof(struct sockaddr_in))
163 return -EINVAL;
164
165 if (usin->sin_family != AF_INET)
166 return -EAFNOSUPPORT;
167
168 nexthop = daddr = usin->sin_addr.s_addr;
169 inet_opt = rcu_dereference_protected(inet->inet_opt,
170 sock_owned_by_user(sk));
171 if (inet_opt && inet_opt->opt.srr) {
172 if (!daddr)
173 return -EINVAL;
174 nexthop = inet_opt->opt.faddr;
175 }
176
177 orig_sport = inet->inet_sport;
178 orig_dport = usin->sin_port;
179 fl4 = &inet->cork.fl.u.ip4;
180 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
181 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
182 IPPROTO_TCP,
183 orig_sport, orig_dport, sk, true);
184 if (IS_ERR(rt)) {
185 err = PTR_ERR(rt);
186 if (err == -ENETUNREACH)
187 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
188 return err;
189 }
190
191 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
192 ip_rt_put(rt);
193 return -ENETUNREACH;
194 }
195
196 if (!inet_opt || !inet_opt->opt.srr)
197 daddr = fl4->daddr;
198
199 if (!inet->inet_saddr)
200 inet->inet_saddr = fl4->saddr;
201 inet->inet_rcv_saddr = inet->inet_saddr;
202
203 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
204
205 tp->rx_opt.ts_recent = 0;
206 tp->rx_opt.ts_recent_stamp = 0;
207 if (likely(!tp->repair))
208 tp->write_seq = 0;
209 }
210
211 if (tcp_death_row.sysctl_tw_recycle &&
212 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
213 struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
214
215
216
217
218
219
220 if (peer) {
221 inet_peer_refcheck(peer);
222 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
223 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
224 tp->rx_opt.ts_recent = peer->tcp_ts;
225 }
226 }
227 }
228
229 inet->inet_dport = usin->sin_port;
230 inet->inet_daddr = daddr;
231
232 inet_csk(sk)->icsk_ext_hdr_len = 0;
233 if (inet_opt)
234 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
235
236 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
237
238
239
240
241
242
243 tcp_set_state(sk, TCP_SYN_SENT);
244 err = inet_hash_connect(&tcp_death_row, sk);
245 if (err)
246 goto failure;
247
248 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
249 inet->inet_sport, inet->inet_dport, sk);
250 if (IS_ERR(rt)) {
251 err = PTR_ERR(rt);
252 rt = NULL;
253 goto failure;
254 }
255
256 sk->sk_gso_type = SKB_GSO_TCPV4;
257 sk_setup_caps(sk, &rt->dst);
258
259 if (!tp->write_seq && likely(!tp->repair))
260 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
261 inet->inet_daddr,
262 inet->inet_sport,
263 usin->sin_port);
264
265 inet->inet_id = tp->write_seq ^ jiffies;
266
267 if (likely(!tp->repair))
268 err = tcp_connect(sk);
269 else
270 err = tcp_repair_connect(sk);
271
272 rt = NULL;
273 if (err)
274 goto failure;
275
276 return 0;
277
278failure:
279
280
281
282
283 tcp_set_state(sk, TCP_CLOSE);
284 ip_rt_put(rt);
285 sk->sk_route_caps = 0;
286 inet->inet_dport = 0;
287 return err;
288}
289EXPORT_SYMBOL(tcp_v4_connect);
290
291
292
293
294static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
295{
296 struct dst_entry *dst;
297 struct inet_sock *inet = inet_sk(sk);
298
299
300
301
302
303 if (sk->sk_state == TCP_LISTEN)
304 return;
305
306
307
308
309
310
311
312 if ((dst = __sk_dst_check(sk, 0)) == NULL)
313 return;
314
315 dst->ops->update_pmtu(dst, mtu);
316
317
318
319
320 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
321 sk->sk_err_soft = EMSGSIZE;
322
323 mtu = dst_mtu(dst);
324
325 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
326 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
327 tcp_sync_mss(sk, mtu);
328
329
330
331
332
333
334 tcp_simple_retransmit(sk);
335 }
336}
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
355{
356 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
357 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
358 struct inet_connection_sock *icsk;
359 struct tcp_sock *tp;
360 struct inet_sock *inet;
361 const int type = icmp_hdr(icmp_skb)->type;
362 const int code = icmp_hdr(icmp_skb)->code;
363 struct sock *sk;
364 struct sk_buff *skb;
365 __u32 seq;
366 __u32 remaining;
367 int err;
368 struct net *net = dev_net(icmp_skb->dev);
369
370 if (icmp_skb->len < (iph->ihl << 2) + 8) {
371 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
372 return;
373 }
374
375 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
376 iph->saddr, th->source, inet_iif(icmp_skb));
377 if (!sk) {
378 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
379 return;
380 }
381 if (sk->sk_state == TCP_TIME_WAIT) {
382 inet_twsk_put(inet_twsk(sk));
383 return;
384 }
385
386 bh_lock_sock(sk);
387
388
389
390 if (sock_owned_by_user(sk))
391 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
392
393 if (sk->sk_state == TCP_CLOSE)
394 goto out;
395
396 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
397 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
398 goto out;
399 }
400
401 icsk = inet_csk(sk);
402 tp = tcp_sk(sk);
403 seq = ntohl(th->seq);
404 if (sk->sk_state != TCP_LISTEN &&
405 !between(seq, tp->snd_una, tp->snd_nxt)) {
406 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
407 goto out;
408 }
409
410 switch (type) {
411 case ICMP_SOURCE_QUENCH:
412
413 goto out;
414 case ICMP_PARAMETERPROB:
415 err = EPROTO;
416 break;
417 case ICMP_DEST_UNREACH:
418 if (code > NR_ICMP_UNREACH)
419 goto out;
420
421 if (code == ICMP_FRAG_NEEDED) {
422 if (!sock_owned_by_user(sk))
423 do_pmtu_discovery(sk, iph, info);
424 goto out;
425 }
426
427 err = icmp_err_convert[code].errno;
428
429
430 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
431 break;
432 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
433 !icsk->icsk_backoff)
434 break;
435
436 if (sock_owned_by_user(sk))
437 break;
438
439 icsk->icsk_backoff--;
440 inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
441 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
442 tcp_bound_rto(sk);
443
444 skb = tcp_write_queue_head(sk);
445 BUG_ON(!skb);
446
447 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
448 tcp_time_stamp - TCP_SKB_CB(skb)->when);
449
450 if (remaining) {
451 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
452 remaining, TCP_RTO_MAX);
453 } else {
454
455
456 tcp_retransmit_timer(sk);
457 }
458
459 break;
460 case ICMP_TIME_EXCEEDED:
461 err = EHOSTUNREACH;
462 break;
463 default:
464 goto out;
465 }
466
467 switch (sk->sk_state) {
468 struct request_sock *req, **prev;
469 case TCP_LISTEN:
470 if (sock_owned_by_user(sk))
471 goto out;
472
473 req = inet_csk_search_req(sk, &prev, th->dest,
474 iph->daddr, iph->saddr);
475 if (!req)
476 goto out;
477
478
479
480
481 WARN_ON(req->sk);
482
483 if (seq != tcp_rsk(req)->snt_isn) {
484 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
485 goto out;
486 }
487
488
489
490
491
492
493
494 inet_csk_reqsk_queue_drop(sk, req, prev);
495 goto out;
496
497 case TCP_SYN_SENT:
498 case TCP_SYN_RECV:
499
500
501 if (!sock_owned_by_user(sk)) {
502 sk->sk_err = err;
503
504 sk->sk_error_report(sk);
505
506 tcp_done(sk);
507 } else {
508 sk->sk_err_soft = err;
509 }
510 goto out;
511 }
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529 inet = inet_sk(sk);
530 if (!sock_owned_by_user(sk) && inet->recverr) {
531 sk->sk_err = err;
532 sk->sk_error_report(sk);
533 } else {
534 sk->sk_err_soft = err;
535 }
536
537out:
538 bh_unlock_sock(sk);
539 sock_put(sk);
540}
541
542static void __tcp_v4_send_check(struct sk_buff *skb,
543 __be32 saddr, __be32 daddr)
544{
545 struct tcphdr *th = tcp_hdr(skb);
546
547 if (skb->ip_summed == CHECKSUM_PARTIAL) {
548 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
549 skb->csum_start = skb_transport_header(skb) - skb->head;
550 skb->csum_offset = offsetof(struct tcphdr, check);
551 } else {
552 th->check = tcp_v4_check(skb->len, saddr, daddr,
553 csum_partial(th,
554 th->doff << 2,
555 skb->csum));
556 }
557}
558
559
560void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
561{
562 const struct inet_sock *inet = inet_sk(sk);
563
564 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
565}
566EXPORT_SYMBOL(tcp_v4_send_check);
567
568int tcp_v4_gso_send_check(struct sk_buff *skb)
569{
570 const struct iphdr *iph;
571 struct tcphdr *th;
572
573 if (!pskb_may_pull(skb, sizeof(*th)))
574 return -EINVAL;
575
576 iph = ip_hdr(skb);
577 th = tcp_hdr(skb);
578
579 th->check = 0;
580 skb->ip_summed = CHECKSUM_PARTIAL;
581 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
582 return 0;
583}
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
599{
600 const struct tcphdr *th = tcp_hdr(skb);
601 struct {
602 struct tcphdr th;
603#ifdef CONFIG_TCP_MD5SIG
604 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
605#endif
606 } rep;
607 struct ip_reply_arg arg;
608#ifdef CONFIG_TCP_MD5SIG
609 struct tcp_md5sig_key *key;
610 const __u8 *hash_location = NULL;
611 unsigned char newhash[16];
612 int genhash;
613 struct sock *sk1 = NULL;
614#endif
615 struct net *net;
616
617
618 if (th->rst)
619 return;
620
621 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
622 return;
623
624
625 memset(&rep, 0, sizeof(rep));
626 rep.th.dest = th->source;
627 rep.th.source = th->dest;
628 rep.th.doff = sizeof(struct tcphdr) / 4;
629 rep.th.rst = 1;
630
631 if (th->ack) {
632 rep.th.seq = th->ack_seq;
633 } else {
634 rep.th.ack = 1;
635 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
636 skb->len - (th->doff << 2));
637 }
638
639 memset(&arg, 0, sizeof(arg));
640 arg.iov[0].iov_base = (unsigned char *)&rep;
641 arg.iov[0].iov_len = sizeof(rep.th);
642
643#ifdef CONFIG_TCP_MD5SIG
644 hash_location = tcp_parse_md5sig_option(th);
645 if (!sk && hash_location) {
646
647
648
649
650
651
652
653 sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
654 &tcp_hashinfo, ip_hdr(skb)->daddr,
655 ntohs(th->source), inet_iif(skb));
656
657 if (!sk1)
658 return;
659 rcu_read_lock();
660 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
661 &ip_hdr(skb)->saddr, AF_INET);
662 if (!key)
663 goto release_sk1;
664
665 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
666 if (genhash || memcmp(hash_location, newhash, 16) != 0)
667 goto release_sk1;
668 } else {
669 key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
670 &ip_hdr(skb)->saddr,
671 AF_INET) : NULL;
672 }
673
674 if (key) {
675 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
676 (TCPOPT_NOP << 16) |
677 (TCPOPT_MD5SIG << 8) |
678 TCPOLEN_MD5SIG);
679
680 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
681 rep.th.doff = arg.iov[0].iov_len / 4;
682
683 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
684 key, ip_hdr(skb)->saddr,
685 ip_hdr(skb)->daddr, &rep.th);
686 }
687#endif
688 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
689 ip_hdr(skb)->saddr,
690 arg.iov[0].iov_len, IPPROTO_TCP, 0);
691 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
692 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
693
694
695
696
697 arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb);
698
699 net = dev_net(skb_dst(skb)->dev);
700 arg.tos = ip_hdr(skb)->tos;
701 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
702 &arg, arg.iov[0].iov_len);
703
704 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
705 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
706
707#ifdef CONFIG_TCP_MD5SIG
708release_sk1:
709 if (sk1) {
710 rcu_read_unlock();
711 sock_put(sk1);
712 }
713#endif
714}
715
716
717
718
719
720static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
721 u32 win, u32 ts, int oif,
722 struct tcp_md5sig_key *key,
723 int reply_flags, u8 tos)
724{
725 const struct tcphdr *th = tcp_hdr(skb);
726 struct {
727 struct tcphdr th;
728 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
729#ifdef CONFIG_TCP_MD5SIG
730 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
731#endif
732 ];
733 } rep;
734 struct ip_reply_arg arg;
735 struct net *net = dev_net(skb_dst(skb)->dev);
736
737 memset(&rep.th, 0, sizeof(struct tcphdr));
738 memset(&arg, 0, sizeof(arg));
739
740 arg.iov[0].iov_base = (unsigned char *)&rep;
741 arg.iov[0].iov_len = sizeof(rep.th);
742 if (ts) {
743 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
744 (TCPOPT_TIMESTAMP << 8) |
745 TCPOLEN_TIMESTAMP);
746 rep.opt[1] = htonl(tcp_time_stamp);
747 rep.opt[2] = htonl(ts);
748 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
749 }
750
751
752 rep.th.dest = th->source;
753 rep.th.source = th->dest;
754 rep.th.doff = arg.iov[0].iov_len / 4;
755 rep.th.seq = htonl(seq);
756 rep.th.ack_seq = htonl(ack);
757 rep.th.ack = 1;
758 rep.th.window = htons(win);
759
760#ifdef CONFIG_TCP_MD5SIG
761 if (key) {
762 int offset = (ts) ? 3 : 0;
763
764 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
765 (TCPOPT_NOP << 16) |
766 (TCPOPT_MD5SIG << 8) |
767 TCPOLEN_MD5SIG);
768 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
769 rep.th.doff = arg.iov[0].iov_len/4;
770
771 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
772 key, ip_hdr(skb)->saddr,
773 ip_hdr(skb)->daddr, &rep.th);
774 }
775#endif
776 arg.flags = reply_flags;
777 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
778 ip_hdr(skb)->saddr,
779 arg.iov[0].iov_len, IPPROTO_TCP, 0);
780 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
781 if (oif)
782 arg.bound_dev_if = oif;
783 arg.tos = tos;
784 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
785 &arg, arg.iov[0].iov_len);
786
787 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
788}
789
790static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
791{
792 struct inet_timewait_sock *tw = inet_twsk(sk);
793 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
794
795 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
796 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
797 tcptw->tw_ts_recent,
798 tw->tw_bound_dev_if,
799 tcp_twsk_md5_key(tcptw),
800 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
801 tw->tw_tos
802 );
803
804 inet_twsk_put(tw);
805}
806
807static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
808 struct request_sock *req)
809{
810 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
811 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
812 req->ts_recent,
813 0,
814 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
815 AF_INET),
816 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
817 ip_hdr(skb)->tos);
818}
819
820
821
822
823
824
825static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
826 struct request_sock *req,
827 struct request_values *rvp,
828 u16 queue_mapping)
829{
830 const struct inet_request_sock *ireq = inet_rsk(req);
831 struct flowi4 fl4;
832 int err = -1;
833 struct sk_buff * skb;
834
835
836 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
837 return -1;
838
839 skb = tcp_make_synack(sk, dst, req, rvp);
840
841 if (skb) {
842 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
843
844 skb_set_queue_mapping(skb, queue_mapping);
845 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
846 ireq->rmt_addr,
847 ireq->opt);
848 err = net_xmit_eval(err);
849 }
850
851 dst_release(dst);
852 return err;
853}
854
855static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
856 struct request_values *rvp)
857{
858 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
859 return tcp_v4_send_synack(sk, NULL, req, rvp, 0);
860}
861
862
863
864
865static void tcp_v4_reqsk_destructor(struct request_sock *req)
866{
867 kfree(inet_rsk(req)->opt);
868}
869
870
871
872
873bool tcp_syn_flood_action(struct sock *sk,
874 const struct sk_buff *skb,
875 const char *proto)
876{
877 const char *msg = "Dropping request";
878 bool want_cookie = false;
879 struct listen_sock *lopt;
880
881
882
883#ifdef CONFIG_SYN_COOKIES
884 if (sysctl_tcp_syncookies) {
885 msg = "Sending cookies";
886 want_cookie = true;
887 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
888 } else
889#endif
890 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
891
892 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
893 if (!lopt->synflood_warned) {
894 lopt->synflood_warned = 1;
895 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
896 proto, ntohs(tcp_hdr(skb)->dest), msg);
897 }
898 return want_cookie;
899}
900EXPORT_SYMBOL(tcp_syn_flood_action);
901
902
903
904
905static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
906 struct sk_buff *skb)
907{
908 const struct ip_options *opt = &(IPCB(skb)->opt);
909 struct ip_options_rcu *dopt = NULL;
910
911 if (opt && opt->optlen) {
912 int opt_size = sizeof(*dopt) + opt->optlen;
913
914 dopt = kmalloc(opt_size, GFP_ATOMIC);
915 if (dopt) {
916 if (ip_options_echo(&dopt->opt, skb)) {
917 kfree(dopt);
918 dopt = NULL;
919 }
920 }
921 }
922 return dopt;
923}
924
925#ifdef CONFIG_TCP_MD5SIG
926
927
928
929
930
931
932
933struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
934 const union tcp_md5_addr *addr,
935 int family)
936{
937 struct tcp_sock *tp = tcp_sk(sk);
938 struct tcp_md5sig_key *key;
939 struct hlist_node *pos;
940 unsigned int size = sizeof(struct in_addr);
941 struct tcp_md5sig_info *md5sig;
942
943
944 md5sig = rcu_dereference_check(tp->md5sig_info,
945 sock_owned_by_user(sk) ||
946 lockdep_is_held(&sk->sk_lock.slock));
947 if (!md5sig)
948 return NULL;
949#if IS_ENABLED(CONFIG_IPV6)
950 if (family == AF_INET6)
951 size = sizeof(struct in6_addr);
952#endif
953 hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) {
954 if (key->family != family)
955 continue;
956 if (!memcmp(&key->addr, addr, size))
957 return key;
958 }
959 return NULL;
960}
961EXPORT_SYMBOL(tcp_md5_do_lookup);
962
963struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
964 struct sock *addr_sk)
965{
966 union tcp_md5_addr *addr;
967
968 addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
969 return tcp_md5_do_lookup(sk, addr, AF_INET);
970}
971EXPORT_SYMBOL(tcp_v4_md5_lookup);
972
973static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
974 struct request_sock *req)
975{
976 union tcp_md5_addr *addr;
977
978 addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
979 return tcp_md5_do_lookup(sk, addr, AF_INET);
980}
981
982
983int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
984 int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
985{
986
987 struct tcp_md5sig_key *key;
988 struct tcp_sock *tp = tcp_sk(sk);
989 struct tcp_md5sig_info *md5sig;
990
991 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
992 if (key) {
993
994 memcpy(key->key, newkey, newkeylen);
995 key->keylen = newkeylen;
996 return 0;
997 }
998
999 md5sig = rcu_dereference_protected(tp->md5sig_info,
1000 sock_owned_by_user(sk));
1001 if (!md5sig) {
1002 md5sig = kmalloc(sizeof(*md5sig), gfp);
1003 if (!md5sig)
1004 return -ENOMEM;
1005
1006 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1007 INIT_HLIST_HEAD(&md5sig->head);
1008 rcu_assign_pointer(tp->md5sig_info, md5sig);
1009 }
1010
1011 key = sock_kmalloc(sk, sizeof(*key), gfp);
1012 if (!key)
1013 return -ENOMEM;
1014 if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
1015 sock_kfree_s(sk, key, sizeof(*key));
1016 return -ENOMEM;
1017 }
1018
1019 memcpy(key->key, newkey, newkeylen);
1020 key->keylen = newkeylen;
1021 key->family = family;
1022 memcpy(&key->addr, addr,
1023 (family == AF_INET6) ? sizeof(struct in6_addr) :
1024 sizeof(struct in_addr));
1025 hlist_add_head_rcu(&key->node, &md5sig->head);
1026 return 0;
1027}
1028EXPORT_SYMBOL(tcp_md5_do_add);
1029
1030int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
1031{
1032 struct tcp_sock *tp = tcp_sk(sk);
1033 struct tcp_md5sig_key *key;
1034 struct tcp_md5sig_info *md5sig;
1035
1036 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
1037 if (!key)
1038 return -ENOENT;
1039 hlist_del_rcu(&key->node);
1040 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1041 kfree_rcu(key, rcu);
1042 md5sig = rcu_dereference_protected(tp->md5sig_info,
1043 sock_owned_by_user(sk));
1044 if (hlist_empty(&md5sig->head))
1045 tcp_free_md5sig_pool();
1046 return 0;
1047}
1048EXPORT_SYMBOL(tcp_md5_do_del);
1049
1050void tcp_clear_md5_list(struct sock *sk)
1051{
1052 struct tcp_sock *tp = tcp_sk(sk);
1053 struct tcp_md5sig_key *key;
1054 struct hlist_node *pos, *n;
1055 struct tcp_md5sig_info *md5sig;
1056
1057 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1058
1059 if (!hlist_empty(&md5sig->head))
1060 tcp_free_md5sig_pool();
1061 hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
1062 hlist_del_rcu(&key->node);
1063 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1064 kfree_rcu(key, rcu);
1065 }
1066}
1067
1068static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1069 int optlen)
1070{
1071 struct tcp_md5sig cmd;
1072 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1073
1074 if (optlen < sizeof(cmd))
1075 return -EINVAL;
1076
1077 if (copy_from_user(&cmd, optval, sizeof(cmd)))
1078 return -EFAULT;
1079
1080 if (sin->sin_family != AF_INET)
1081 return -EINVAL;
1082
1083 if (!cmd.tcpm_key || !cmd.tcpm_keylen)
1084 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1085 AF_INET);
1086
1087 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1088 return -EINVAL;
1089
1090 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1091 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1092 GFP_KERNEL);
1093}
1094
1095static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1096 __be32 daddr, __be32 saddr, int nbytes)
1097{
1098 struct tcp4_pseudohdr *bp;
1099 struct scatterlist sg;
1100
1101 bp = &hp->md5_blk.ip4;
1102
1103
1104
1105
1106
1107
1108 bp->saddr = saddr;
1109 bp->daddr = daddr;
1110 bp->pad = 0;
1111 bp->protocol = IPPROTO_TCP;
1112 bp->len = cpu_to_be16(nbytes);
1113
1114 sg_init_one(&sg, bp, sizeof(*bp));
1115 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1116}
1117
1118static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1119 __be32 daddr, __be32 saddr, const struct tcphdr *th)
1120{
1121 struct tcp_md5sig_pool *hp;
1122 struct hash_desc *desc;
1123
1124 hp = tcp_get_md5sig_pool();
1125 if (!hp)
1126 goto clear_hash_noput;
1127 desc = &hp->md5_desc;
1128
1129 if (crypto_hash_init(desc))
1130 goto clear_hash;
1131 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1132 goto clear_hash;
1133 if (tcp_md5_hash_header(hp, th))
1134 goto clear_hash;
1135 if (tcp_md5_hash_key(hp, key))
1136 goto clear_hash;
1137 if (crypto_hash_final(desc, md5_hash))
1138 goto clear_hash;
1139
1140 tcp_put_md5sig_pool();
1141 return 0;
1142
1143clear_hash:
1144 tcp_put_md5sig_pool();
1145clear_hash_noput:
1146 memset(md5_hash, 0, 16);
1147 return 1;
1148}
1149
1150int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1151 const struct sock *sk, const struct request_sock *req,
1152 const struct sk_buff *skb)
1153{
1154 struct tcp_md5sig_pool *hp;
1155 struct hash_desc *desc;
1156 const struct tcphdr *th = tcp_hdr(skb);
1157 __be32 saddr, daddr;
1158
1159 if (sk) {
1160 saddr = inet_sk(sk)->inet_saddr;
1161 daddr = inet_sk(sk)->inet_daddr;
1162 } else if (req) {
1163 saddr = inet_rsk(req)->loc_addr;
1164 daddr = inet_rsk(req)->rmt_addr;
1165 } else {
1166 const struct iphdr *iph = ip_hdr(skb);
1167 saddr = iph->saddr;
1168 daddr = iph->daddr;
1169 }
1170
1171 hp = tcp_get_md5sig_pool();
1172 if (!hp)
1173 goto clear_hash_noput;
1174 desc = &hp->md5_desc;
1175
1176 if (crypto_hash_init(desc))
1177 goto clear_hash;
1178
1179 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1180 goto clear_hash;
1181 if (tcp_md5_hash_header(hp, th))
1182 goto clear_hash;
1183 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1184 goto clear_hash;
1185 if (tcp_md5_hash_key(hp, key))
1186 goto clear_hash;
1187 if (crypto_hash_final(desc, md5_hash))
1188 goto clear_hash;
1189
1190 tcp_put_md5sig_pool();
1191 return 0;
1192
1193clear_hash:
1194 tcp_put_md5sig_pool();
1195clear_hash_noput:
1196 memset(md5_hash, 0, 16);
1197 return 1;
1198}
1199EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1200
1201static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1202{
1203
1204
1205
1206
1207
1208
1209
1210
1211 const __u8 *hash_location = NULL;
1212 struct tcp_md5sig_key *hash_expected;
1213 const struct iphdr *iph = ip_hdr(skb);
1214 const struct tcphdr *th = tcp_hdr(skb);
1215 int genhash;
1216 unsigned char newhash[16];
1217
1218 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1219 AF_INET);
1220 hash_location = tcp_parse_md5sig_option(th);
1221
1222
1223 if (!hash_expected && !hash_location)
1224 return false;
1225
1226 if (hash_expected && !hash_location) {
1227 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1228 return true;
1229 }
1230
1231 if (!hash_expected && hash_location) {
1232 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1233 return true;
1234 }
1235
1236
1237
1238
1239 genhash = tcp_v4_md5_hash_skb(newhash,
1240 hash_expected,
1241 NULL, NULL, skb);
1242
1243 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1244 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1245 &iph->saddr, ntohs(th->source),
1246 &iph->daddr, ntohs(th->dest),
1247 genhash ? " tcp_v4_calc_md5_hash failed"
1248 : "");
1249 return true;
1250 }
1251 return false;
1252}
1253
1254#endif
1255
1256struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1257 .family = PF_INET,
1258 .obj_size = sizeof(struct tcp_request_sock),
1259 .rtx_syn_ack = tcp_v4_rtx_synack,
1260 .send_ack = tcp_v4_reqsk_send_ack,
1261 .destructor = tcp_v4_reqsk_destructor,
1262 .send_reset = tcp_v4_send_reset,
1263 .syn_ack_timeout = tcp_syn_ack_timeout,
1264};
1265
1266#ifdef CONFIG_TCP_MD5SIG
1267static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1268 .md5_lookup = tcp_v4_reqsk_md5_lookup,
1269 .calc_md5_hash = tcp_v4_md5_hash_skb,
1270};
1271#endif
1272
1273int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1274{
1275 struct tcp_extend_values tmp_ext;
1276 struct tcp_options_received tmp_opt;
1277 const u8 *hash_location;
1278 struct request_sock *req;
1279 struct inet_request_sock *ireq;
1280 struct tcp_sock *tp = tcp_sk(sk);
1281 struct dst_entry *dst = NULL;
1282 __be32 saddr = ip_hdr(skb)->saddr;
1283 __be32 daddr = ip_hdr(skb)->daddr;
1284 __u32 isn = TCP_SKB_CB(skb)->when;
1285 bool want_cookie = false;
1286
1287
1288 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1289 goto drop;
1290
1291
1292
1293
1294
1295 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1296 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1297 if (!want_cookie)
1298 goto drop;
1299 }
1300
1301
1302
1303
1304
1305
1306 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1307 goto drop;
1308
1309 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1310 if (!req)
1311 goto drop;
1312
1313#ifdef CONFIG_TCP_MD5SIG
1314 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1315#endif
1316
1317 tcp_clear_options(&tmp_opt);
1318 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1319 tmp_opt.user_mss = tp->rx_opt.user_mss;
1320 tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
1321
1322 if (tmp_opt.cookie_plus > 0 &&
1323 tmp_opt.saw_tstamp &&
1324 !tp->rx_opt.cookie_out_never &&
1325 (sysctl_tcp_cookie_size > 0 ||
1326 (tp->cookie_values != NULL &&
1327 tp->cookie_values->cookie_desired > 0))) {
1328 u8 *c;
1329 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1330 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1331
1332 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1333 goto drop_and_release;
1334
1335
1336 *mess++ ^= (__force u32)daddr;
1337 *mess++ ^= (__force u32)saddr;
1338
1339
1340 c = (u8 *)mess;
1341 while (l-- > 0)
1342 *c++ ^= *hash_location++;
1343
1344 want_cookie = false;
1345 tmp_ext.cookie_out_never = 0;
1346 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1347 } else if (!tp->rx_opt.cookie_in_always) {
1348
1349 tmp_ext.cookie_out_never = 1;
1350 tmp_ext.cookie_plus = 0;
1351 } else {
1352 goto drop_and_release;
1353 }
1354 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1355
1356 if (want_cookie && !tmp_opt.saw_tstamp)
1357 tcp_clear_options(&tmp_opt);
1358
1359 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1360 tcp_openreq_init(req, &tmp_opt, skb);
1361
1362 ireq = inet_rsk(req);
1363 ireq->loc_addr = daddr;
1364 ireq->rmt_addr = saddr;
1365 ireq->no_srccheck = inet_sk(sk)->transparent;
1366 ireq->opt = tcp_v4_save_options(sk, skb);
1367
1368 if (security_inet_conn_request(sk, skb, req))
1369 goto drop_and_free;
1370
1371 if (!want_cookie || tmp_opt.tstamp_ok)
1372 TCP_ECN_create_request(req, skb);
1373
1374 if (want_cookie) {
1375 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1376 req->cookie_ts = tmp_opt.tstamp_ok;
1377 } else if (!isn) {
1378 struct inet_peer *peer = NULL;
1379 struct flowi4 fl4;
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390 if (tmp_opt.saw_tstamp &&
1391 tcp_death_row.sysctl_tw_recycle &&
1392 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1393 fl4.daddr == saddr &&
1394 (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
1395 inet_peer_refcheck(peer);
1396 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1397 (s32)(peer->tcp_ts - req->ts_recent) >
1398 TCP_PAWS_WINDOW) {
1399 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1400 goto drop_and_release;
1401 }
1402 }
1403
1404 else if (!sysctl_tcp_syncookies &&
1405 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1406 (sysctl_max_syn_backlog >> 2)) &&
1407 (!peer || !peer->tcp_ts_stamp) &&
1408 (!dst || !dst_metric(dst, RTAX_RTT))) {
1409
1410
1411
1412
1413
1414
1415
1416 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
1417 &saddr, ntohs(tcp_hdr(skb)->source));
1418 goto drop_and_release;
1419 }
1420
1421 isn = tcp_v4_init_sequence(skb);
1422 }
1423 tcp_rsk(req)->snt_isn = isn;
1424 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1425
1426 if (tcp_v4_send_synack(sk, dst, req,
1427 (struct request_values *)&tmp_ext,
1428 skb_get_queue_mapping(skb)) ||
1429 want_cookie)
1430 goto drop_and_free;
1431
1432 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1433 return 0;
1434
1435drop_and_release:
1436 dst_release(dst);
1437drop_and_free:
1438 reqsk_free(req);
1439drop:
1440 return 0;
1441}
1442EXPORT_SYMBOL(tcp_v4_conn_request);
1443
1444
1445
1446
1447
1448
1449struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1450 struct request_sock *req,
1451 struct dst_entry *dst)
1452{
1453 struct inet_request_sock *ireq;
1454 struct inet_sock *newinet;
1455 struct tcp_sock *newtp;
1456 struct sock *newsk;
1457#ifdef CONFIG_TCP_MD5SIG
1458 struct tcp_md5sig_key *key;
1459#endif
1460 struct ip_options_rcu *inet_opt;
1461
1462 if (sk_acceptq_is_full(sk))
1463 goto exit_overflow;
1464
1465 newsk = tcp_create_openreq_child(sk, req, skb);
1466 if (!newsk)
1467 goto exit_nonewsk;
1468
1469 newsk->sk_gso_type = SKB_GSO_TCPV4;
1470
1471 newtp = tcp_sk(newsk);
1472 newinet = inet_sk(newsk);
1473 ireq = inet_rsk(req);
1474 newinet->inet_daddr = ireq->rmt_addr;
1475 newinet->inet_rcv_saddr = ireq->loc_addr;
1476 newinet->inet_saddr = ireq->loc_addr;
1477 inet_opt = ireq->opt;
1478 rcu_assign_pointer(newinet->inet_opt, inet_opt);
1479 ireq->opt = NULL;
1480 newinet->mc_index = inet_iif(skb);
1481 newinet->mc_ttl = ip_hdr(skb)->ttl;
1482 newinet->rcv_tos = ip_hdr(skb)->tos;
1483 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1484 if (inet_opt)
1485 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1486 newinet->inet_id = newtp->write_seq ^ jiffies;
1487
1488 if (!dst) {
1489 dst = inet_csk_route_child_sock(sk, newsk, req);
1490 if (!dst)
1491 goto put_and_exit;
1492 } else {
1493
1494 }
1495 sk_setup_caps(newsk, dst);
1496
1497 tcp_mtup_init(newsk);
1498 tcp_sync_mss(newsk, dst_mtu(dst));
1499 newtp->advmss = dst_metric_advmss(dst);
1500 if (tcp_sk(sk)->rx_opt.user_mss &&
1501 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1502 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1503
1504 tcp_initialize_rcv_mss(newsk);
1505 if (tcp_rsk(req)->snt_synack)
1506 tcp_valid_rtt_meas(newsk,
1507 tcp_time_stamp - tcp_rsk(req)->snt_synack);
1508 newtp->total_retrans = req->retrans;
1509
1510#ifdef CONFIG_TCP_MD5SIG
1511
1512 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1513 AF_INET);
1514 if (key != NULL) {
1515
1516
1517
1518
1519
1520
1521 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1522 AF_INET, key->key, key->keylen, GFP_ATOMIC);
1523 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1524 }
1525#endif
1526
1527 if (__inet_inherit_port(sk, newsk) < 0)
1528 goto put_and_exit;
1529 __inet_hash_nolisten(newsk, NULL);
1530
1531 return newsk;
1532
1533exit_overflow:
1534 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1535exit_nonewsk:
1536 dst_release(dst);
1537exit:
1538 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1539 return NULL;
1540put_and_exit:
1541 tcp_clear_xmit_timers(newsk);
1542 tcp_cleanup_congestion_control(newsk);
1543 bh_unlock_sock(newsk);
1544 sock_put(newsk);
1545 goto exit;
1546}
1547EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1548
1549static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1550{
1551 struct tcphdr *th = tcp_hdr(skb);
1552 const struct iphdr *iph = ip_hdr(skb);
1553 struct sock *nsk;
1554 struct request_sock **prev;
1555
1556 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1557 iph->saddr, iph->daddr);
1558 if (req)
1559 return tcp_check_req(sk, skb, req, prev);
1560
1561 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1562 th->source, iph->daddr, th->dest, inet_iif(skb));
1563
1564 if (nsk) {
1565 if (nsk->sk_state != TCP_TIME_WAIT) {
1566 bh_lock_sock(nsk);
1567 return nsk;
1568 }
1569 inet_twsk_put(inet_twsk(nsk));
1570 return NULL;
1571 }
1572
1573#ifdef CONFIG_SYN_COOKIES
1574 if (!th->syn)
1575 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1576#endif
1577 return sk;
1578}
1579
1580static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1581{
1582 const struct iphdr *iph = ip_hdr(skb);
1583
1584 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1585 if (!tcp_v4_check(skb->len, iph->saddr,
1586 iph->daddr, skb->csum)) {
1587 skb->ip_summed = CHECKSUM_UNNECESSARY;
1588 return 0;
1589 }
1590 }
1591
1592 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1593 skb->len, IPPROTO_TCP, 0);
1594
1595 if (skb->len <= 76) {
1596 return __skb_checksum_complete(skb);
1597 }
1598 return 0;
1599}
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1611{
1612 struct sock *rsk;
1613#ifdef CONFIG_TCP_MD5SIG
1614
1615
1616
1617
1618
1619
1620 if (tcp_v4_inbound_md5_hash(sk, skb))
1621 goto discard;
1622#endif
1623
1624 if (sk->sk_state == TCP_ESTABLISHED) {
1625 sock_rps_save_rxhash(sk, skb);
1626 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1627 rsk = sk;
1628 goto reset;
1629 }
1630 return 0;
1631 }
1632
1633 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1634 goto csum_err;
1635
1636 if (sk->sk_state == TCP_LISTEN) {
1637 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1638 if (!nsk)
1639 goto discard;
1640
1641 if (nsk != sk) {
1642 sock_rps_save_rxhash(nsk, skb);
1643 if (tcp_child_process(sk, nsk, skb)) {
1644 rsk = nsk;
1645 goto reset;
1646 }
1647 return 0;
1648 }
1649 } else
1650 sock_rps_save_rxhash(sk, skb);
1651
1652 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1653 rsk = sk;
1654 goto reset;
1655 }
1656 return 0;
1657
1658reset:
1659 tcp_v4_send_reset(rsk, skb);
1660discard:
1661 kfree_skb(skb);
1662
1663
1664
1665
1666
1667 return 0;
1668
1669csum_err:
1670 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1671 goto discard;
1672}
1673EXPORT_SYMBOL(tcp_v4_do_rcv);
1674
1675
1676
1677
1678
1679int tcp_v4_rcv(struct sk_buff *skb)
1680{
1681 const struct iphdr *iph;
1682 const struct tcphdr *th;
1683 struct sock *sk;
1684 int ret;
1685 struct net *net = dev_net(skb->dev);
1686
1687 if (skb->pkt_type != PACKET_HOST)
1688 goto discard_it;
1689
1690
1691 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1692
1693 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1694 goto discard_it;
1695
1696 th = tcp_hdr(skb);
1697
1698 if (th->doff < sizeof(struct tcphdr) / 4)
1699 goto bad_packet;
1700 if (!pskb_may_pull(skb, th->doff * 4))
1701 goto discard_it;
1702
1703
1704
1705
1706
1707 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1708 goto bad_packet;
1709
1710 th = tcp_hdr(skb);
1711 iph = ip_hdr(skb);
1712 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1713 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1714 skb->len - th->doff * 4);
1715 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1716 TCP_SKB_CB(skb)->when = 0;
1717 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1718 TCP_SKB_CB(skb)->sacked = 0;
1719
1720 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1721 if (!sk)
1722 goto no_tcp_socket;
1723
1724process:
1725 if (sk->sk_state == TCP_TIME_WAIT)
1726 goto do_time_wait;
1727
1728 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1729 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1730 goto discard_and_relse;
1731 }
1732
1733 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1734 goto discard_and_relse;
1735 nf_reset(skb);
1736
1737 if (sk_filter(sk, skb))
1738 goto discard_and_relse;
1739
1740 skb->dev = NULL;
1741
1742 bh_lock_sock_nested(sk);
1743 ret = 0;
1744 if (!sock_owned_by_user(sk)) {
1745#ifdef CONFIG_NET_DMA
1746 struct tcp_sock *tp = tcp_sk(sk);
1747 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1748 tp->ucopy.dma_chan = net_dma_find_channel();
1749 if (tp->ucopy.dma_chan)
1750 ret = tcp_v4_do_rcv(sk, skb);
1751 else
1752#endif
1753 {
1754 if (!tcp_prequeue(sk, skb))
1755 ret = tcp_v4_do_rcv(sk, skb);
1756 }
1757 } else if (unlikely(sk_add_backlog(sk, skb,
1758 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1759 bh_unlock_sock(sk);
1760 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1761 goto discard_and_relse;
1762 }
1763 bh_unlock_sock(sk);
1764
1765 sock_put(sk);
1766
1767 return ret;
1768
1769no_tcp_socket:
1770 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1771 goto discard_it;
1772
1773 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1774bad_packet:
1775 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1776 } else {
1777 tcp_v4_send_reset(NULL, skb);
1778 }
1779
1780discard_it:
1781
1782 kfree_skb(skb);
1783 return 0;
1784
1785discard_and_relse:
1786 sock_put(sk);
1787 goto discard_it;
1788
1789do_time_wait:
1790 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1791 inet_twsk_put(inet_twsk(sk));
1792 goto discard_it;
1793 }
1794
1795 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1796 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1797 inet_twsk_put(inet_twsk(sk));
1798 goto discard_it;
1799 }
1800 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1801 case TCP_TW_SYN: {
1802 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1803 &tcp_hashinfo,
1804 iph->daddr, th->dest,
1805 inet_iif(skb));
1806 if (sk2) {
1807 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1808 inet_twsk_put(inet_twsk(sk));
1809 sk = sk2;
1810 goto process;
1811 }
1812
1813 }
1814 case TCP_TW_ACK:
1815 tcp_v4_timewait_ack(sk, skb);
1816 break;
1817 case TCP_TW_RST:
1818 goto no_tcp_socket;
1819 case TCP_TW_SUCCESS:;
1820 }
1821 goto discard_it;
1822}
1823
1824struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
1825{
1826 struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
1827 struct inet_sock *inet = inet_sk(sk);
1828 struct inet_peer *peer;
1829
1830 if (!rt ||
1831 inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
1832 peer = inet_getpeer_v4(inet->inet_daddr, 1);
1833 *release_it = true;
1834 } else {
1835 if (!rt->peer)
1836 rt_bind_peer(rt, inet->inet_daddr, 1);
1837 peer = rt->peer;
1838 *release_it = false;
1839 }
1840
1841 return peer;
1842}
1843EXPORT_SYMBOL(tcp_v4_get_peer);
1844
1845void *tcp_v4_tw_get_peer(struct sock *sk)
1846{
1847 const struct inet_timewait_sock *tw = inet_twsk(sk);
1848
1849 return inet_getpeer_v4(tw->tw_daddr, 1);
1850}
1851EXPORT_SYMBOL(tcp_v4_tw_get_peer);
1852
1853static struct timewait_sock_ops tcp_timewait_sock_ops = {
1854 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1855 .twsk_unique = tcp_twsk_unique,
1856 .twsk_destructor= tcp_twsk_destructor,
1857 .twsk_getpeer = tcp_v4_tw_get_peer,
1858};
1859
1860const struct inet_connection_sock_af_ops ipv4_specific = {
1861 .queue_xmit = ip_queue_xmit,
1862 .send_check = tcp_v4_send_check,
1863 .rebuild_header = inet_sk_rebuild_header,
1864 .conn_request = tcp_v4_conn_request,
1865 .syn_recv_sock = tcp_v4_syn_recv_sock,
1866 .get_peer = tcp_v4_get_peer,
1867 .net_header_len = sizeof(struct iphdr),
1868 .setsockopt = ip_setsockopt,
1869 .getsockopt = ip_getsockopt,
1870 .addr2sockaddr = inet_csk_addr2sockaddr,
1871 .sockaddr_len = sizeof(struct sockaddr_in),
1872 .bind_conflict = inet_csk_bind_conflict,
1873#ifdef CONFIG_COMPAT
1874 .compat_setsockopt = compat_ip_setsockopt,
1875 .compat_getsockopt = compat_ip_getsockopt,
1876#endif
1877};
1878EXPORT_SYMBOL(ipv4_specific);
1879
1880#ifdef CONFIG_TCP_MD5SIG
1881static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1882 .md5_lookup = tcp_v4_md5_lookup,
1883 .calc_md5_hash = tcp_v4_md5_hash_skb,
1884 .md5_parse = tcp_v4_parse_md5_keys,
1885};
1886#endif
1887
1888
1889
1890
1891static int tcp_v4_init_sock(struct sock *sk)
1892{
1893 struct inet_connection_sock *icsk = inet_csk(sk);
1894
1895 tcp_init_sock(sk);
1896
1897 icsk->icsk_af_ops = &ipv4_specific;
1898
1899#ifdef CONFIG_TCP_MD5SIG
1900 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1901#endif
1902
1903 return 0;
1904}
1905
1906void tcp_v4_destroy_sock(struct sock *sk)
1907{
1908 struct tcp_sock *tp = tcp_sk(sk);
1909
1910 tcp_clear_xmit_timers(sk);
1911
1912 tcp_cleanup_congestion_control(sk);
1913
1914
1915 tcp_write_queue_purge(sk);
1916
1917
1918 __skb_queue_purge(&tp->out_of_order_queue);
1919
1920#ifdef CONFIG_TCP_MD5SIG
1921
1922 if (tp->md5sig_info) {
1923 tcp_clear_md5_list(sk);
1924 kfree_rcu(tp->md5sig_info, rcu);
1925 tp->md5sig_info = NULL;
1926 }
1927#endif
1928
1929#ifdef CONFIG_NET_DMA
1930
1931 __skb_queue_purge(&sk->sk_async_wait_queue);
1932#endif
1933
1934
1935 __skb_queue_purge(&tp->ucopy.prequeue);
1936
1937
1938 if (inet_csk(sk)->icsk_bind_hash)
1939 inet_put_port(sk);
1940
1941
1942
1943
1944 if (sk->sk_sndmsg_page) {
1945 __free_page(sk->sk_sndmsg_page);
1946 sk->sk_sndmsg_page = NULL;
1947 }
1948
1949
1950 if (tp->cookie_values != NULL) {
1951 kref_put(&tp->cookie_values->kref,
1952 tcp_cookie_values_release);
1953 tp->cookie_values = NULL;
1954 }
1955
1956 sk_sockets_allocated_dec(sk);
1957 sock_release_memcg(sk);
1958}
1959EXPORT_SYMBOL(tcp_v4_destroy_sock);
1960
1961#ifdef CONFIG_PROC_FS
1962
1963
1964static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1965{
1966 return hlist_nulls_empty(head) ? NULL :
1967 list_entry(head->first, struct inet_timewait_sock, tw_node);
1968}
1969
1970static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1971{
1972 return !is_a_nulls(tw->tw_node.next) ?
1973 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1974}
1975
1976
1977
1978
1979
1980
1981static void *listening_get_next(struct seq_file *seq, void *cur)
1982{
1983 struct inet_connection_sock *icsk;
1984 struct hlist_nulls_node *node;
1985 struct sock *sk = cur;
1986 struct inet_listen_hashbucket *ilb;
1987 struct tcp_iter_state *st = seq->private;
1988 struct net *net = seq_file_net(seq);
1989
1990 if (!sk) {
1991 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1992 spin_lock_bh(&ilb->lock);
1993 sk = sk_nulls_head(&ilb->head);
1994 st->offset = 0;
1995 goto get_sk;
1996 }
1997 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1998 ++st->num;
1999 ++st->offset;
2000
2001 if (st->state == TCP_SEQ_STATE_OPENREQ) {
2002 struct request_sock *req = cur;
2003
2004 icsk = inet_csk(st->syn_wait_sk);
2005 req = req->dl_next;
2006 while (1) {
2007 while (req) {
2008 if (req->rsk_ops->family == st->family) {
2009 cur = req;
2010 goto out;
2011 }
2012 req = req->dl_next;
2013 }
2014 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2015 break;
2016get_req:
2017 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2018 }
2019 sk = sk_nulls_next(st->syn_wait_sk);
2020 st->state = TCP_SEQ_STATE_LISTENING;
2021 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2022 } else {
2023 icsk = inet_csk(sk);
2024 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2025 if (reqsk_queue_len(&icsk->icsk_accept_queue))
2026 goto start_req;
2027 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2028 sk = sk_nulls_next(sk);
2029 }
2030get_sk:
2031 sk_nulls_for_each_from(sk, node) {
2032 if (!net_eq(sock_net(sk), net))
2033 continue;
2034 if (sk->sk_family == st->family) {
2035 cur = sk;
2036 goto out;
2037 }
2038 icsk = inet_csk(sk);
2039 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2040 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2041start_req:
2042 st->uid = sock_i_uid(sk);
2043 st->syn_wait_sk = sk;
2044 st->state = TCP_SEQ_STATE_OPENREQ;
2045 st->sbucket = 0;
2046 goto get_req;
2047 }
2048 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2049 }
2050 spin_unlock_bh(&ilb->lock);
2051 st->offset = 0;
2052 if (++st->bucket < INET_LHTABLE_SIZE) {
2053 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2054 spin_lock_bh(&ilb->lock);
2055 sk = sk_nulls_head(&ilb->head);
2056 goto get_sk;
2057 }
2058 cur = NULL;
2059out:
2060 return cur;
2061}
2062
2063static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2064{
2065 struct tcp_iter_state *st = seq->private;
2066 void *rc;
2067
2068 st->bucket = 0;
2069 st->offset = 0;
2070 rc = listening_get_next(seq, NULL);
2071
2072 while (rc && *pos) {
2073 rc = listening_get_next(seq, rc);
2074 --*pos;
2075 }
2076 return rc;
2077}
2078
2079static inline bool empty_bucket(struct tcp_iter_state *st)
2080{
2081 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2082 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2083}
2084
2085
2086
2087
2088
2089static void *established_get_first(struct seq_file *seq)
2090{
2091 struct tcp_iter_state *st = seq->private;
2092 struct net *net = seq_file_net(seq);
2093 void *rc = NULL;
2094
2095 st->offset = 0;
2096 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2097 struct sock *sk;
2098 struct hlist_nulls_node *node;
2099 struct inet_timewait_sock *tw;
2100 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2101
2102
2103 if (empty_bucket(st))
2104 continue;
2105
2106 spin_lock_bh(lock);
2107 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2108 if (sk->sk_family != st->family ||
2109 !net_eq(sock_net(sk), net)) {
2110 continue;
2111 }
2112 rc = sk;
2113 goto out;
2114 }
2115 st->state = TCP_SEQ_STATE_TIME_WAIT;
2116 inet_twsk_for_each(tw, node,
2117 &tcp_hashinfo.ehash[st->bucket].twchain) {
2118 if (tw->tw_family != st->family ||
2119 !net_eq(twsk_net(tw), net)) {
2120 continue;
2121 }
2122 rc = tw;
2123 goto out;
2124 }
2125 spin_unlock_bh(lock);
2126 st->state = TCP_SEQ_STATE_ESTABLISHED;
2127 }
2128out:
2129 return rc;
2130}
2131
2132static void *established_get_next(struct seq_file *seq, void *cur)
2133{
2134 struct sock *sk = cur;
2135 struct inet_timewait_sock *tw;
2136 struct hlist_nulls_node *node;
2137 struct tcp_iter_state *st = seq->private;
2138 struct net *net = seq_file_net(seq);
2139
2140 ++st->num;
2141 ++st->offset;
2142
2143 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2144 tw = cur;
2145 tw = tw_next(tw);
2146get_tw:
2147 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2148 tw = tw_next(tw);
2149 }
2150 if (tw) {
2151 cur = tw;
2152 goto out;
2153 }
2154 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2155 st->state = TCP_SEQ_STATE_ESTABLISHED;
2156
2157
2158 st->offset = 0;
2159 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2160 empty_bucket(st))
2161 ;
2162 if (st->bucket > tcp_hashinfo.ehash_mask)
2163 return NULL;
2164
2165 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2166 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2167 } else
2168 sk = sk_nulls_next(sk);
2169
2170 sk_nulls_for_each_from(sk, node) {
2171 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2172 goto found;
2173 }
2174
2175 st->state = TCP_SEQ_STATE_TIME_WAIT;
2176 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2177 goto get_tw;
2178found:
2179 cur = sk;
2180out:
2181 return cur;
2182}
2183
2184static void *established_get_idx(struct seq_file *seq, loff_t pos)
2185{
2186 struct tcp_iter_state *st = seq->private;
2187 void *rc;
2188
2189 st->bucket = 0;
2190 rc = established_get_first(seq);
2191
2192 while (rc && pos) {
2193 rc = established_get_next(seq, rc);
2194 --pos;
2195 }
2196 return rc;
2197}
2198
2199static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2200{
2201 void *rc;
2202 struct tcp_iter_state *st = seq->private;
2203
2204 st->state = TCP_SEQ_STATE_LISTENING;
2205 rc = listening_get_idx(seq, &pos);
2206
2207 if (!rc) {
2208 st->state = TCP_SEQ_STATE_ESTABLISHED;
2209 rc = established_get_idx(seq, pos);
2210 }
2211
2212 return rc;
2213}
2214
2215static void *tcp_seek_last_pos(struct seq_file *seq)
2216{
2217 struct tcp_iter_state *st = seq->private;
2218 int offset = st->offset;
2219 int orig_num = st->num;
2220 void *rc = NULL;
2221
2222 switch (st->state) {
2223 case TCP_SEQ_STATE_OPENREQ:
2224 case TCP_SEQ_STATE_LISTENING:
2225 if (st->bucket >= INET_LHTABLE_SIZE)
2226 break;
2227 st->state = TCP_SEQ_STATE_LISTENING;
2228 rc = listening_get_next(seq, NULL);
2229 while (offset-- && rc)
2230 rc = listening_get_next(seq, rc);
2231 if (rc)
2232 break;
2233 st->bucket = 0;
2234
2235 case TCP_SEQ_STATE_ESTABLISHED:
2236 case TCP_SEQ_STATE_TIME_WAIT:
2237 st->state = TCP_SEQ_STATE_ESTABLISHED;
2238 if (st->bucket > tcp_hashinfo.ehash_mask)
2239 break;
2240 rc = established_get_first(seq);
2241 while (offset-- && rc)
2242 rc = established_get_next(seq, rc);
2243 }
2244
2245 st->num = orig_num;
2246
2247 return rc;
2248}
2249
2250static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2251{
2252 struct tcp_iter_state *st = seq->private;
2253 void *rc;
2254
2255 if (*pos && *pos == st->last_pos) {
2256 rc = tcp_seek_last_pos(seq);
2257 if (rc)
2258 goto out;
2259 }
2260
2261 st->state = TCP_SEQ_STATE_LISTENING;
2262 st->num = 0;
2263 st->bucket = 0;
2264 st->offset = 0;
2265 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2266
2267out:
2268 st->last_pos = *pos;
2269 return rc;
2270}
2271
2272static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2273{
2274 struct tcp_iter_state *st = seq->private;
2275 void *rc = NULL;
2276
2277 if (v == SEQ_START_TOKEN) {
2278 rc = tcp_get_idx(seq, 0);
2279 goto out;
2280 }
2281
2282 switch (st->state) {
2283 case TCP_SEQ_STATE_OPENREQ:
2284 case TCP_SEQ_STATE_LISTENING:
2285 rc = listening_get_next(seq, v);
2286 if (!rc) {
2287 st->state = TCP_SEQ_STATE_ESTABLISHED;
2288 st->bucket = 0;
2289 st->offset = 0;
2290 rc = established_get_first(seq);
2291 }
2292 break;
2293 case TCP_SEQ_STATE_ESTABLISHED:
2294 case TCP_SEQ_STATE_TIME_WAIT:
2295 rc = established_get_next(seq, v);
2296 break;
2297 }
2298out:
2299 ++*pos;
2300 st->last_pos = *pos;
2301 return rc;
2302}
2303
2304static void tcp_seq_stop(struct seq_file *seq, void *v)
2305{
2306 struct tcp_iter_state *st = seq->private;
2307
2308 switch (st->state) {
2309 case TCP_SEQ_STATE_OPENREQ:
2310 if (v) {
2311 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2312 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2313 }
2314 case TCP_SEQ_STATE_LISTENING:
2315 if (v != SEQ_START_TOKEN)
2316 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2317 break;
2318 case TCP_SEQ_STATE_TIME_WAIT:
2319 case TCP_SEQ_STATE_ESTABLISHED:
2320 if (v)
2321 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2322 break;
2323 }
2324}
2325
2326int tcp_seq_open(struct inode *inode, struct file *file)
2327{
2328 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2329 struct tcp_iter_state *s;
2330 int err;
2331
2332 err = seq_open_net(inode, file, &afinfo->seq_ops,
2333 sizeof(struct tcp_iter_state));
2334 if (err < 0)
2335 return err;
2336
2337 s = ((struct seq_file *)file->private_data)->private;
2338 s->family = afinfo->family;
2339 s->last_pos = 0;
2340 return 0;
2341}
2342EXPORT_SYMBOL(tcp_seq_open);
2343
2344int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2345{
2346 int rc = 0;
2347 struct proc_dir_entry *p;
2348
2349 afinfo->seq_ops.start = tcp_seq_start;
2350 afinfo->seq_ops.next = tcp_seq_next;
2351 afinfo->seq_ops.stop = tcp_seq_stop;
2352
2353 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2354 afinfo->seq_fops, afinfo);
2355 if (!p)
2356 rc = -ENOMEM;
2357 return rc;
2358}
2359EXPORT_SYMBOL(tcp_proc_register);
2360
2361void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2362{
2363 proc_net_remove(net, afinfo->name);
2364}
2365EXPORT_SYMBOL(tcp_proc_unregister);
2366
2367static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2368 struct seq_file *f, int i, int uid, int *len)
2369{
2370 const struct inet_request_sock *ireq = inet_rsk(req);
2371 int ttd = req->expires - jiffies;
2372
2373 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2374 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
2375 i,
2376 ireq->loc_addr,
2377 ntohs(inet_sk(sk)->inet_sport),
2378 ireq->rmt_addr,
2379 ntohs(ireq->rmt_port),
2380 TCP_SYN_RECV,
2381 0, 0,
2382 1,
2383 jiffies_to_clock_t(ttd),
2384 req->retrans,
2385 uid,
2386 0,
2387 0,
2388 atomic_read(&sk->sk_refcnt),
2389 req,
2390 len);
2391}
2392
2393static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2394{
2395 int timer_active;
2396 unsigned long timer_expires;
2397 const struct tcp_sock *tp = tcp_sk(sk);
2398 const struct inet_connection_sock *icsk = inet_csk(sk);
2399 const struct inet_sock *inet = inet_sk(sk);
2400 __be32 dest = inet->inet_daddr;
2401 __be32 src = inet->inet_rcv_saddr;
2402 __u16 destp = ntohs(inet->inet_dport);
2403 __u16 srcp = ntohs(inet->inet_sport);
2404 int rx_queue;
2405
2406 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2407 timer_active = 1;
2408 timer_expires = icsk->icsk_timeout;
2409 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2410 timer_active = 4;
2411 timer_expires = icsk->icsk_timeout;
2412 } else if (timer_pending(&sk->sk_timer)) {
2413 timer_active = 2;
2414 timer_expires = sk->sk_timer.expires;
2415 } else {
2416 timer_active = 0;
2417 timer_expires = jiffies;
2418 }
2419
2420 if (sk->sk_state == TCP_LISTEN)
2421 rx_queue = sk->sk_ack_backlog;
2422 else
2423
2424
2425
2426 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2427
2428 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2429 "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
2430 i, src, srcp, dest, destp, sk->sk_state,
2431 tp->write_seq - tp->snd_una,
2432 rx_queue,
2433 timer_active,
2434 jiffies_to_clock_t(timer_expires - jiffies),
2435 icsk->icsk_retransmits,
2436 sock_i_uid(sk),
2437 icsk->icsk_probes_out,
2438 sock_i_ino(sk),
2439 atomic_read(&sk->sk_refcnt), sk,
2440 jiffies_to_clock_t(icsk->icsk_rto),
2441 jiffies_to_clock_t(icsk->icsk_ack.ato),
2442 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2443 tp->snd_cwnd,
2444 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
2445 len);
2446}
2447
2448static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2449 struct seq_file *f, int i, int *len)
2450{
2451 __be32 dest, src;
2452 __u16 destp, srcp;
2453 int ttd = tw->tw_ttd - jiffies;
2454
2455 if (ttd < 0)
2456 ttd = 0;
2457
2458 dest = tw->tw_daddr;
2459 src = tw->tw_rcv_saddr;
2460 destp = ntohs(tw->tw_dport);
2461 srcp = ntohs(tw->tw_sport);
2462
2463 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2464 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
2465 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2466 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2467 atomic_read(&tw->tw_refcnt), tw, len);
2468}
2469
2470#define TMPSZ 150
2471
2472static int tcp4_seq_show(struct seq_file *seq, void *v)
2473{
2474 struct tcp_iter_state *st;
2475 int len;
2476
2477 if (v == SEQ_START_TOKEN) {
2478 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2479 " sl local_address rem_address st tx_queue "
2480 "rx_queue tr tm->when retrnsmt uid timeout "
2481 "inode");
2482 goto out;
2483 }
2484 st = seq->private;
2485
2486 switch (st->state) {
2487 case TCP_SEQ_STATE_LISTENING:
2488 case TCP_SEQ_STATE_ESTABLISHED:
2489 get_tcp4_sock(v, seq, st->num, &len);
2490 break;
2491 case TCP_SEQ_STATE_OPENREQ:
2492 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2493 break;
2494 case TCP_SEQ_STATE_TIME_WAIT:
2495 get_timewait4_sock(v, seq, st->num, &len);
2496 break;
2497 }
2498 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2499out:
2500 return 0;
2501}
2502
2503static const struct file_operations tcp_afinfo_seq_fops = {
2504 .owner = THIS_MODULE,
2505 .open = tcp_seq_open,
2506 .read = seq_read,
2507 .llseek = seq_lseek,
2508 .release = seq_release_net
2509};
2510
2511static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2512 .name = "tcp",
2513 .family = AF_INET,
2514 .seq_fops = &tcp_afinfo_seq_fops,
2515 .seq_ops = {
2516 .show = tcp4_seq_show,
2517 },
2518};
2519
2520static int __net_init tcp4_proc_init_net(struct net *net)
2521{
2522 return tcp_proc_register(net, &tcp4_seq_afinfo);
2523}
2524
2525static void __net_exit tcp4_proc_exit_net(struct net *net)
2526{
2527 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2528}
2529
2530static struct pernet_operations tcp4_net_ops = {
2531 .init = tcp4_proc_init_net,
2532 .exit = tcp4_proc_exit_net,
2533};
2534
2535int __init tcp4_proc_init(void)
2536{
2537 return register_pernet_subsys(&tcp4_net_ops);
2538}
2539
2540void tcp4_proc_exit(void)
2541{
2542 unregister_pernet_subsys(&tcp4_net_ops);
2543}
2544#endif
2545
2546struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2547{
2548 const struct iphdr *iph = skb_gro_network_header(skb);
2549
2550 switch (skb->ip_summed) {
2551 case CHECKSUM_COMPLETE:
2552 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2553 skb->csum)) {
2554 skb->ip_summed = CHECKSUM_UNNECESSARY;
2555 break;
2556 }
2557
2558
2559 case CHECKSUM_NONE:
2560 NAPI_GRO_CB(skb)->flush = 1;
2561 return NULL;
2562 }
2563
2564 return tcp_gro_receive(head, skb);
2565}
2566
2567int tcp4_gro_complete(struct sk_buff *skb)
2568{
2569 const struct iphdr *iph = ip_hdr(skb);
2570 struct tcphdr *th = tcp_hdr(skb);
2571
2572 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2573 iph->saddr, iph->daddr, 0);
2574 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2575
2576 return tcp_gro_complete(skb);
2577}
2578
2579struct proto tcp_prot = {
2580 .name = "TCP",
2581 .owner = THIS_MODULE,
2582 .close = tcp_close,
2583 .connect = tcp_v4_connect,
2584 .disconnect = tcp_disconnect,
2585 .accept = inet_csk_accept,
2586 .ioctl = tcp_ioctl,
2587 .init = tcp_v4_init_sock,
2588 .destroy = tcp_v4_destroy_sock,
2589 .shutdown = tcp_shutdown,
2590 .setsockopt = tcp_setsockopt,
2591 .getsockopt = tcp_getsockopt,
2592 .recvmsg = tcp_recvmsg,
2593 .sendmsg = tcp_sendmsg,
2594 .sendpage = tcp_sendpage,
2595 .backlog_rcv = tcp_v4_do_rcv,
2596 .hash = inet_hash,
2597 .unhash = inet_unhash,
2598 .get_port = inet_csk_get_port,
2599 .enter_memory_pressure = tcp_enter_memory_pressure,
2600 .sockets_allocated = &tcp_sockets_allocated,
2601 .orphan_count = &tcp_orphan_count,
2602 .memory_allocated = &tcp_memory_allocated,
2603 .memory_pressure = &tcp_memory_pressure,
2604 .sysctl_wmem = sysctl_tcp_wmem,
2605 .sysctl_rmem = sysctl_tcp_rmem,
2606 .max_header = MAX_TCP_HEADER,
2607 .obj_size = sizeof(struct tcp_sock),
2608 .slab_flags = SLAB_DESTROY_BY_RCU,
2609 .twsk_prot = &tcp_timewait_sock_ops,
2610 .rsk_prot = &tcp_request_sock_ops,
2611 .h.hashinfo = &tcp_hashinfo,
2612 .no_autobind = true,
2613#ifdef CONFIG_COMPAT
2614 .compat_setsockopt = compat_tcp_setsockopt,
2615 .compat_getsockopt = compat_tcp_getsockopt,
2616#endif
2617#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
2618 .init_cgroup = tcp_init_cgroup,
2619 .destroy_cgroup = tcp_destroy_cgroup,
2620 .proto_cgroup = tcp_proto_cgroup,
2621#endif
2622};
2623EXPORT_SYMBOL(tcp_prot);
2624
2625static int __net_init tcp_sk_init(struct net *net)
2626{
2627 return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2628 PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2629}
2630
2631static void __net_exit tcp_sk_exit(struct net *net)
2632{
2633 inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2634}
2635
2636static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2637{
2638 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2639}
2640
2641static struct pernet_operations __net_initdata tcp_sk_ops = {
2642 .init = tcp_sk_init,
2643 .exit = tcp_sk_exit,
2644 .exit_batch = tcp_sk_exit_batch,
2645};
2646
2647void __init tcp_v4_init(void)
2648{
2649 inet_hashinfo_init(&tcp_hashinfo);
2650 if (register_pernet_subsys(&tcp_sk_ops))
2651 panic("Failed to create the TCP control socket.\n");
2652}
2653