1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53#define pr_fmt(fmt) "TCP: " fmt
54
55#include <linux/bottom_half.h>
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
64#include <linux/slab.h>
65
66#include <net/net_namespace.h>
67#include <net/icmp.h>
68#include <net/inet_hashtables.h>
69#include <net/tcp.h>
70#include <net/transp_v6.h>
71#include <net/ipv6.h>
72#include <net/inet_common.h>
73#include <net/timewait_sock.h>
74#include <net/xfrm.h>
75#include <net/secure_seq.h>
76#include <net/busy_poll.h>
77
78#include <linux/inet.h>
79#include <linux/ipv6.h>
80#include <linux/stddef.h>
81#include <linux/proc_fs.h>
82#include <linux/seq_file.h>
83#include <linux/inetdevice.h>
84
85#include <crypto/hash.h>
86#include <linux/scatterlist.h>
87
88#include <trace/events/tcp.h>
89
90#ifdef CONFIG_TCP_MD5SIG
91static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
92 __be32 daddr, __be32 saddr, const struct tcphdr *th);
93#endif
94
95struct inet_hashinfo tcp_hashinfo;
96EXPORT_SYMBOL(tcp_hashinfo);
97
98static u32 tcp_v4_init_seq(const struct sk_buff *skb)
99{
100 return secure_tcp_seq(ip_hdr(skb)->daddr,
101 ip_hdr(skb)->saddr,
102 tcp_hdr(skb)->dest,
103 tcp_hdr(skb)->source);
104}
105
106static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
107{
108 return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
109}
110
111int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
112{
113 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
114 struct tcp_sock *tp = tcp_sk(sk);
115
116
117
118
119
120
121
122
123
124
125
126
127 if (tcptw->tw_ts_recent_stamp &&
128 (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse &&
129 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
130 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
131 if (tp->write_seq == 0)
132 tp->write_seq = 1;
133 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
134 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
135 sock_hold(sktw);
136 return 1;
137 }
138
139 return 0;
140}
141EXPORT_SYMBOL_GPL(tcp_twsk_unique);
142
143
144int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
145{
146 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
147 struct inet_sock *inet = inet_sk(sk);
148 struct tcp_sock *tp = tcp_sk(sk);
149 __be16 orig_sport, orig_dport;
150 __be32 daddr, nexthop;
151 struct flowi4 *fl4;
152 struct rtable *rt;
153 int err;
154 struct ip_options_rcu *inet_opt;
155 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
156
157 if (addr_len < sizeof(struct sockaddr_in))
158 return -EINVAL;
159
160 if (usin->sin_family != AF_INET)
161 return -EAFNOSUPPORT;
162
163 nexthop = daddr = usin->sin_addr.s_addr;
164 inet_opt = rcu_dereference_protected(inet->inet_opt,
165 lockdep_sock_is_held(sk));
166 if (inet_opt && inet_opt->opt.srr) {
167 if (!daddr)
168 return -EINVAL;
169 nexthop = inet_opt->opt.faddr;
170 }
171
172 orig_sport = inet->inet_sport;
173 orig_dport = usin->sin_port;
174 fl4 = &inet->cork.fl.u.ip4;
175 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
176 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
177 IPPROTO_TCP,
178 orig_sport, orig_dport, sk);
179 if (IS_ERR(rt)) {
180 err = PTR_ERR(rt);
181 if (err == -ENETUNREACH)
182 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
183 return err;
184 }
185
186 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
187 ip_rt_put(rt);
188 return -ENETUNREACH;
189 }
190
191 if (!inet_opt || !inet_opt->opt.srr)
192 daddr = fl4->daddr;
193
194 if (!inet->inet_saddr)
195 inet->inet_saddr = fl4->saddr;
196 sk_rcv_saddr_set(sk, inet->inet_saddr);
197
198 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
199
200 tp->rx_opt.ts_recent = 0;
201 tp->rx_opt.ts_recent_stamp = 0;
202 if (likely(!tp->repair))
203 tp->write_seq = 0;
204 }
205
206 inet->inet_dport = usin->sin_port;
207 sk_daddr_set(sk, daddr);
208
209 inet_csk(sk)->icsk_ext_hdr_len = 0;
210 if (inet_opt)
211 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
212
213 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
214
215
216
217
218
219
220 tcp_set_state(sk, TCP_SYN_SENT);
221 err = inet_hash_connect(tcp_death_row, sk);
222 if (err)
223 goto failure;
224
225 sk_set_txhash(sk);
226
227 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
228 inet->inet_sport, inet->inet_dport, sk);
229 if (IS_ERR(rt)) {
230 err = PTR_ERR(rt);
231 rt = NULL;
232 goto failure;
233 }
234
235 sk->sk_gso_type = SKB_GSO_TCPV4;
236 sk_setup_caps(sk, &rt->dst);
237 rt = NULL;
238
239 if (likely(!tp->repair)) {
240 if (!tp->write_seq)
241 tp->write_seq = secure_tcp_seq(inet->inet_saddr,
242 inet->inet_daddr,
243 inet->inet_sport,
244 usin->sin_port);
245 tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
246 inet->inet_saddr,
247 inet->inet_daddr);
248 }
249
250 inet->inet_id = tp->write_seq ^ jiffies;
251
252 if (tcp_fastopen_defer_connect(sk, &err))
253 return err;
254 if (err)
255 goto failure;
256
257 err = tcp_connect(sk);
258
259 if (err)
260 goto failure;
261
262 return 0;
263
264failure:
265
266
267
268
269 tcp_set_state(sk, TCP_CLOSE);
270 ip_rt_put(rt);
271 sk->sk_route_caps = 0;
272 inet->inet_dport = 0;
273 return err;
274}
275EXPORT_SYMBOL(tcp_v4_connect);
276
277
278
279
280
281
282void tcp_v4_mtu_reduced(struct sock *sk)
283{
284 struct inet_sock *inet = inet_sk(sk);
285 struct dst_entry *dst;
286 u32 mtu;
287
288 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
289 return;
290 mtu = tcp_sk(sk)->mtu_info;
291 dst = inet_csk_update_pmtu(sk, mtu);
292 if (!dst)
293 return;
294
295
296
297
298 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
299 sk->sk_err_soft = EMSGSIZE;
300
301 mtu = dst_mtu(dst);
302
303 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
304 ip_sk_accept_pmtu(sk) &&
305 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
306 tcp_sync_mss(sk, mtu);
307
308
309
310
311
312
313 tcp_simple_retransmit(sk);
314 }
315}
316EXPORT_SYMBOL(tcp_v4_mtu_reduced);
317
318static void do_redirect(struct sk_buff *skb, struct sock *sk)
319{
320 struct dst_entry *dst = __sk_dst_check(sk, 0);
321
322 if (dst)
323 dst->ops->redirect(dst, sk, skb);
324}
325
326
327
328void tcp_req_err(struct sock *sk, u32 seq, bool abort)
329{
330 struct request_sock *req = inet_reqsk(sk);
331 struct net *net = sock_net(sk);
332
333
334
335
336 if (seq != tcp_rsk(req)->snt_isn) {
337 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
338 } else if (abort) {
339
340
341
342
343
344
345 inet_csk_reqsk_queue_drop(req->rsk_listener, req);
346 tcp_listendrop(req->rsk_listener);
347 }
348 reqsk_put(req);
349}
350EXPORT_SYMBOL(tcp_req_err);
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
369{
370 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
371 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
372 struct inet_connection_sock *icsk;
373 struct tcp_sock *tp;
374 struct inet_sock *inet;
375 const int type = icmp_hdr(icmp_skb)->type;
376 const int code = icmp_hdr(icmp_skb)->code;
377 struct sock *sk;
378 struct sk_buff *skb;
379 struct request_sock *fastopen;
380 u32 seq, snd_una;
381 s32 remaining;
382 u32 delta_us;
383 int err;
384 struct net *net = dev_net(icmp_skb->dev);
385
386 sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
387 th->dest, iph->saddr, ntohs(th->source),
388 inet_iif(icmp_skb), 0);
389 if (!sk) {
390 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
391 return;
392 }
393 if (sk->sk_state == TCP_TIME_WAIT) {
394 inet_twsk_put(inet_twsk(sk));
395 return;
396 }
397 seq = ntohl(th->seq);
398 if (sk->sk_state == TCP_NEW_SYN_RECV)
399 return tcp_req_err(sk, seq,
400 type == ICMP_PARAMETERPROB ||
401 type == ICMP_TIME_EXCEEDED ||
402 (type == ICMP_DEST_UNREACH &&
403 (code == ICMP_NET_UNREACH ||
404 code == ICMP_HOST_UNREACH)));
405
406 bh_lock_sock(sk);
407
408
409
410
411
412 if (sock_owned_by_user(sk)) {
413 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
414 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
415 }
416 if (sk->sk_state == TCP_CLOSE)
417 goto out;
418
419 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
420 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
421 goto out;
422 }
423
424 icsk = inet_csk(sk);
425 tp = tcp_sk(sk);
426
427 fastopen = tp->fastopen_rsk;
428 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
429 if (sk->sk_state != TCP_LISTEN &&
430 !between(seq, snd_una, tp->snd_nxt)) {
431 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
432 goto out;
433 }
434
435 switch (type) {
436 case ICMP_REDIRECT:
437 if (!sock_owned_by_user(sk))
438 do_redirect(icmp_skb, sk);
439 goto out;
440 case ICMP_SOURCE_QUENCH:
441
442 goto out;
443 case ICMP_PARAMETERPROB:
444 err = EPROTO;
445 break;
446 case ICMP_DEST_UNREACH:
447 if (code > NR_ICMP_UNREACH)
448 goto out;
449
450 if (code == ICMP_FRAG_NEEDED) {
451
452
453
454
455 if (sk->sk_state == TCP_LISTEN)
456 goto out;
457
458 tp->mtu_info = info;
459 if (!sock_owned_by_user(sk)) {
460 tcp_v4_mtu_reduced(sk);
461 } else {
462 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
463 sock_hold(sk);
464 }
465 goto out;
466 }
467
468 err = icmp_err_convert[code].errno;
469
470
471 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
472 break;
473 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
474 !icsk->icsk_backoff || fastopen)
475 break;
476
477 if (sock_owned_by_user(sk))
478 break;
479
480 icsk->icsk_backoff--;
481 icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
482 TCP_TIMEOUT_INIT;
483 icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
484
485 skb = tcp_rtx_queue_head(sk);
486 BUG_ON(!skb);
487
488 tcp_mstamp_refresh(tp);
489 delta_us = (u32)(tp->tcp_mstamp - skb->skb_mstamp);
490 remaining = icsk->icsk_rto -
491 usecs_to_jiffies(delta_us);
492
493 if (remaining > 0) {
494 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
495 remaining, TCP_RTO_MAX);
496 } else {
497
498
499 tcp_retransmit_timer(sk);
500 }
501
502 break;
503 case ICMP_TIME_EXCEEDED:
504 err = EHOSTUNREACH;
505 break;
506 default:
507 goto out;
508 }
509
510 switch (sk->sk_state) {
511 case TCP_SYN_SENT:
512 case TCP_SYN_RECV:
513
514
515
516 if (fastopen && !fastopen->sk)
517 break;
518
519 if (!sock_owned_by_user(sk)) {
520 sk->sk_err = err;
521
522 sk->sk_error_report(sk);
523
524 tcp_done(sk);
525 } else {
526 sk->sk_err_soft = err;
527 }
528 goto out;
529 }
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547 inet = inet_sk(sk);
548 if (!sock_owned_by_user(sk) && inet->recverr) {
549 sk->sk_err = err;
550 sk->sk_error_report(sk);
551 } else {
552 sk->sk_err_soft = err;
553 }
554
555out:
556 bh_unlock_sock(sk);
557 sock_put(sk);
558}
559
560void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
561{
562 struct tcphdr *th = tcp_hdr(skb);
563
564 if (skb->ip_summed == CHECKSUM_PARTIAL) {
565 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
566 skb->csum_start = skb_transport_header(skb) - skb->head;
567 skb->csum_offset = offsetof(struct tcphdr, check);
568 } else {
569 th->check = tcp_v4_check(skb->len, saddr, daddr,
570 csum_partial(th,
571 th->doff << 2,
572 skb->csum));
573 }
574}
575
576
577void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
578{
579 const struct inet_sock *inet = inet_sk(sk);
580
581 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
582}
583EXPORT_SYMBOL(tcp_v4_send_check);
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
599{
600 const struct tcphdr *th = tcp_hdr(skb);
601 struct {
602 struct tcphdr th;
603#ifdef CONFIG_TCP_MD5SIG
604 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
605#endif
606 } rep;
607 struct ip_reply_arg arg;
608#ifdef CONFIG_TCP_MD5SIG
609 struct tcp_md5sig_key *key = NULL;
610 const __u8 *hash_location = NULL;
611 unsigned char newhash[16];
612 int genhash;
613 struct sock *sk1 = NULL;
614#endif
615 struct net *net;
616
617
618 if (th->rst)
619 return;
620
621
622
623
624 if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
625 return;
626
627
628 memset(&rep, 0, sizeof(rep));
629 rep.th.dest = th->source;
630 rep.th.source = th->dest;
631 rep.th.doff = sizeof(struct tcphdr) / 4;
632 rep.th.rst = 1;
633
634 if (th->ack) {
635 rep.th.seq = th->ack_seq;
636 } else {
637 rep.th.ack = 1;
638 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
639 skb->len - (th->doff << 2));
640 }
641
642 memset(&arg, 0, sizeof(arg));
643 arg.iov[0].iov_base = (unsigned char *)&rep;
644 arg.iov[0].iov_len = sizeof(rep.th);
645
646 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
647#ifdef CONFIG_TCP_MD5SIG
648 rcu_read_lock();
649 hash_location = tcp_parse_md5sig_option(th);
650 if (sk && sk_fullsock(sk)) {
651 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
652 &ip_hdr(skb)->saddr, AF_INET);
653 } else if (hash_location) {
654
655
656
657
658
659
660
661 sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
662 ip_hdr(skb)->saddr,
663 th->source, ip_hdr(skb)->daddr,
664 ntohs(th->source), inet_iif(skb),
665 tcp_v4_sdif(skb));
666
667 if (!sk1)
668 goto out;
669
670 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
671 &ip_hdr(skb)->saddr, AF_INET);
672 if (!key)
673 goto out;
674
675
676 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
677 if (genhash || memcmp(hash_location, newhash, 16) != 0)
678 goto out;
679
680 }
681
682 if (key) {
683 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
684 (TCPOPT_NOP << 16) |
685 (TCPOPT_MD5SIG << 8) |
686 TCPOLEN_MD5SIG);
687
688 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
689 rep.th.doff = arg.iov[0].iov_len / 4;
690
691 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
692 key, ip_hdr(skb)->saddr,
693 ip_hdr(skb)->daddr, &rep.th);
694 }
695#endif
696 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
697 ip_hdr(skb)->saddr,
698 arg.iov[0].iov_len, IPPROTO_TCP, 0);
699 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
700 arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
701
702
703
704
705
706 if (sk) {
707 arg.bound_dev_if = sk->sk_bound_dev_if;
708 trace_tcp_send_reset(sk, skb);
709 }
710
711 BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
712 offsetof(struct inet_timewait_sock, tw_bound_dev_if));
713
714 arg.tos = ip_hdr(skb)->tos;
715 arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
716 local_bh_disable();
717 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
718 skb, &TCP_SKB_CB(skb)->header.h4.opt,
719 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
720 &arg, arg.iov[0].iov_len);
721
722 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
723 __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
724 local_bh_enable();
725
726#ifdef CONFIG_TCP_MD5SIG
727out:
728 rcu_read_unlock();
729#endif
730}
731
732
733
734
735
736static void tcp_v4_send_ack(const struct sock *sk,
737 struct sk_buff *skb, u32 seq, u32 ack,
738 u32 win, u32 tsval, u32 tsecr, int oif,
739 struct tcp_md5sig_key *key,
740 int reply_flags, u8 tos)
741{
742 const struct tcphdr *th = tcp_hdr(skb);
743 struct {
744 struct tcphdr th;
745 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
746#ifdef CONFIG_TCP_MD5SIG
747 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
748#endif
749 ];
750 } rep;
751 struct net *net = sock_net(sk);
752 struct ip_reply_arg arg;
753
754 memset(&rep.th, 0, sizeof(struct tcphdr));
755 memset(&arg, 0, sizeof(arg));
756
757 arg.iov[0].iov_base = (unsigned char *)&rep;
758 arg.iov[0].iov_len = sizeof(rep.th);
759 if (tsecr) {
760 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
761 (TCPOPT_TIMESTAMP << 8) |
762 TCPOLEN_TIMESTAMP);
763 rep.opt[1] = htonl(tsval);
764 rep.opt[2] = htonl(tsecr);
765 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
766 }
767
768
769 rep.th.dest = th->source;
770 rep.th.source = th->dest;
771 rep.th.doff = arg.iov[0].iov_len / 4;
772 rep.th.seq = htonl(seq);
773 rep.th.ack_seq = htonl(ack);
774 rep.th.ack = 1;
775 rep.th.window = htons(win);
776
777#ifdef CONFIG_TCP_MD5SIG
778 if (key) {
779 int offset = (tsecr) ? 3 : 0;
780
781 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
782 (TCPOPT_NOP << 16) |
783 (TCPOPT_MD5SIG << 8) |
784 TCPOLEN_MD5SIG);
785 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
786 rep.th.doff = arg.iov[0].iov_len/4;
787
788 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
789 key, ip_hdr(skb)->saddr,
790 ip_hdr(skb)->daddr, &rep.th);
791 }
792#endif
793 arg.flags = reply_flags;
794 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
795 ip_hdr(skb)->saddr,
796 arg.iov[0].iov_len, IPPROTO_TCP, 0);
797 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
798 if (oif)
799 arg.bound_dev_if = oif;
800 arg.tos = tos;
801 arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
802 local_bh_disable();
803 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
804 skb, &TCP_SKB_CB(skb)->header.h4.opt,
805 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
806 &arg, arg.iov[0].iov_len);
807
808 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
809 local_bh_enable();
810}
811
812static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
813{
814 struct inet_timewait_sock *tw = inet_twsk(sk);
815 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
816
817 tcp_v4_send_ack(sk, skb,
818 tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
819 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
820 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
821 tcptw->tw_ts_recent,
822 tw->tw_bound_dev_if,
823 tcp_twsk_md5_key(tcptw),
824 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
825 tw->tw_tos
826 );
827
828 inet_twsk_put(tw);
829}
830
831static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
832 struct request_sock *req)
833{
834
835
836
837 u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
838 tcp_sk(sk)->snd_nxt;
839
840
841
842
843
844
845 tcp_v4_send_ack(sk, skb, seq,
846 tcp_rsk(req)->rcv_nxt,
847 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
848 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
849 req->ts_recent,
850 0,
851 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
852 AF_INET),
853 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
854 ip_hdr(skb)->tos);
855}
856
857
858
859
860
861
862static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
863 struct flowi *fl,
864 struct request_sock *req,
865 struct tcp_fastopen_cookie *foc,
866 enum tcp_synack_type synack_type)
867{
868 const struct inet_request_sock *ireq = inet_rsk(req);
869 struct flowi4 fl4;
870 int err = -1;
871 struct sk_buff *skb;
872
873
874 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
875 return -1;
876
877 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
878
879 if (skb) {
880 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
881
882 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
883 ireq->ir_rmt_addr,
884 ireq_opt_deref(ireq));
885 err = net_xmit_eval(err);
886 }
887
888 return err;
889}
890
891
892
893
894static void tcp_v4_reqsk_destructor(struct request_sock *req)
895{
896 kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
897}
898
899#ifdef CONFIG_TCP_MD5SIG
900
901
902
903
904
905
906
907struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
908 const union tcp_md5_addr *addr,
909 int family)
910{
911 const struct tcp_sock *tp = tcp_sk(sk);
912 struct tcp_md5sig_key *key;
913 const struct tcp_md5sig_info *md5sig;
914 __be32 mask;
915 struct tcp_md5sig_key *best_match = NULL;
916 bool match;
917
918
919 md5sig = rcu_dereference_check(tp->md5sig_info,
920 lockdep_sock_is_held(sk));
921 if (!md5sig)
922 return NULL;
923
924 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
925 if (key->family != family)
926 continue;
927
928 if (family == AF_INET) {
929 mask = inet_make_mask(key->prefixlen);
930 match = (key->addr.a4.s_addr & mask) ==
931 (addr->a4.s_addr & mask);
932#if IS_ENABLED(CONFIG_IPV6)
933 } else if (family == AF_INET6) {
934 match = ipv6_prefix_equal(&key->addr.a6, &addr->a6,
935 key->prefixlen);
936#endif
937 } else {
938 match = false;
939 }
940
941 if (match && (!best_match ||
942 key->prefixlen > best_match->prefixlen))
943 best_match = key;
944 }
945 return best_match;
946}
947EXPORT_SYMBOL(tcp_md5_do_lookup);
948
949static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
950 const union tcp_md5_addr *addr,
951 int family, u8 prefixlen)
952{
953 const struct tcp_sock *tp = tcp_sk(sk);
954 struct tcp_md5sig_key *key;
955 unsigned int size = sizeof(struct in_addr);
956 const struct tcp_md5sig_info *md5sig;
957
958
959 md5sig = rcu_dereference_check(tp->md5sig_info,
960 lockdep_sock_is_held(sk));
961 if (!md5sig)
962 return NULL;
963#if IS_ENABLED(CONFIG_IPV6)
964 if (family == AF_INET6)
965 size = sizeof(struct in6_addr);
966#endif
967 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
968 if (key->family != family)
969 continue;
970 if (!memcmp(&key->addr, addr, size) &&
971 key->prefixlen == prefixlen)
972 return key;
973 }
974 return NULL;
975}
976
977struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
978 const struct sock *addr_sk)
979{
980 const union tcp_md5_addr *addr;
981
982 addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
983 return tcp_md5_do_lookup(sk, addr, AF_INET);
984}
985EXPORT_SYMBOL(tcp_v4_md5_lookup);
986
987
988int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
989 int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
990 gfp_t gfp)
991{
992
993 struct tcp_md5sig_key *key;
994 struct tcp_sock *tp = tcp_sk(sk);
995 struct tcp_md5sig_info *md5sig;
996
997 key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
998 if (key) {
999
1000 memcpy(key->key, newkey, newkeylen);
1001 key->keylen = newkeylen;
1002 return 0;
1003 }
1004
1005 md5sig = rcu_dereference_protected(tp->md5sig_info,
1006 lockdep_sock_is_held(sk));
1007 if (!md5sig) {
1008 md5sig = kmalloc(sizeof(*md5sig), gfp);
1009 if (!md5sig)
1010 return -ENOMEM;
1011
1012 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1013 INIT_HLIST_HEAD(&md5sig->head);
1014 rcu_assign_pointer(tp->md5sig_info, md5sig);
1015 }
1016
1017 key = sock_kmalloc(sk, sizeof(*key), gfp);
1018 if (!key)
1019 return -ENOMEM;
1020 if (!tcp_alloc_md5sig_pool()) {
1021 sock_kfree_s(sk, key, sizeof(*key));
1022 return -ENOMEM;
1023 }
1024
1025 memcpy(key->key, newkey, newkeylen);
1026 key->keylen = newkeylen;
1027 key->family = family;
1028 key->prefixlen = prefixlen;
1029 memcpy(&key->addr, addr,
1030 (family == AF_INET6) ? sizeof(struct in6_addr) :
1031 sizeof(struct in_addr));
1032 hlist_add_head_rcu(&key->node, &md5sig->head);
1033 return 0;
1034}
1035EXPORT_SYMBOL(tcp_md5_do_add);
1036
1037int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
1038 u8 prefixlen)
1039{
1040 struct tcp_md5sig_key *key;
1041
1042 key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
1043 if (!key)
1044 return -ENOENT;
1045 hlist_del_rcu(&key->node);
1046 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1047 kfree_rcu(key, rcu);
1048 return 0;
1049}
1050EXPORT_SYMBOL(tcp_md5_do_del);
1051
1052static void tcp_clear_md5_list(struct sock *sk)
1053{
1054 struct tcp_sock *tp = tcp_sk(sk);
1055 struct tcp_md5sig_key *key;
1056 struct hlist_node *n;
1057 struct tcp_md5sig_info *md5sig;
1058
1059 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1060
1061 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
1062 hlist_del_rcu(&key->node);
1063 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1064 kfree_rcu(key, rcu);
1065 }
1066}
1067
1068static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
1069 char __user *optval, int optlen)
1070{
1071 struct tcp_md5sig cmd;
1072 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1073 u8 prefixlen = 32;
1074
1075 if (optlen < sizeof(cmd))
1076 return -EINVAL;
1077
1078 if (copy_from_user(&cmd, optval, sizeof(cmd)))
1079 return -EFAULT;
1080
1081 if (sin->sin_family != AF_INET)
1082 return -EINVAL;
1083
1084 if (optname == TCP_MD5SIG_EXT &&
1085 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
1086 prefixlen = cmd.tcpm_prefixlen;
1087 if (prefixlen > 32)
1088 return -EINVAL;
1089 }
1090
1091 if (!cmd.tcpm_keylen)
1092 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1093 AF_INET, prefixlen);
1094
1095 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1096 return -EINVAL;
1097
1098 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1099 AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen,
1100 GFP_KERNEL);
1101}
1102
1103static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
1104 __be32 daddr, __be32 saddr,
1105 const struct tcphdr *th, int nbytes)
1106{
1107 struct tcp4_pseudohdr *bp;
1108 struct scatterlist sg;
1109 struct tcphdr *_th;
1110
1111 bp = hp->scratch;
1112 bp->saddr = saddr;
1113 bp->daddr = daddr;
1114 bp->pad = 0;
1115 bp->protocol = IPPROTO_TCP;
1116 bp->len = cpu_to_be16(nbytes);
1117
1118 _th = (struct tcphdr *)(bp + 1);
1119 memcpy(_th, th, sizeof(*th));
1120 _th->check = 0;
1121
1122 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
1123 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
1124 sizeof(*bp) + sizeof(*th));
1125 return crypto_ahash_update(hp->md5_req);
1126}
1127
1128static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1129 __be32 daddr, __be32 saddr, const struct tcphdr *th)
1130{
1131 struct tcp_md5sig_pool *hp;
1132 struct ahash_request *req;
1133
1134 hp = tcp_get_md5sig_pool();
1135 if (!hp)
1136 goto clear_hash_noput;
1137 req = hp->md5_req;
1138
1139 if (crypto_ahash_init(req))
1140 goto clear_hash;
1141 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
1142 goto clear_hash;
1143 if (tcp_md5_hash_key(hp, key))
1144 goto clear_hash;
1145 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1146 if (crypto_ahash_final(req))
1147 goto clear_hash;
1148
1149 tcp_put_md5sig_pool();
1150 return 0;
1151
1152clear_hash:
1153 tcp_put_md5sig_pool();
1154clear_hash_noput:
1155 memset(md5_hash, 0, 16);
1156 return 1;
1157}
1158
1159int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1160 const struct sock *sk,
1161 const struct sk_buff *skb)
1162{
1163 struct tcp_md5sig_pool *hp;
1164 struct ahash_request *req;
1165 const struct tcphdr *th = tcp_hdr(skb);
1166 __be32 saddr, daddr;
1167
1168 if (sk) {
1169 saddr = sk->sk_rcv_saddr;
1170 daddr = sk->sk_daddr;
1171 } else {
1172 const struct iphdr *iph = ip_hdr(skb);
1173 saddr = iph->saddr;
1174 daddr = iph->daddr;
1175 }
1176
1177 hp = tcp_get_md5sig_pool();
1178 if (!hp)
1179 goto clear_hash_noput;
1180 req = hp->md5_req;
1181
1182 if (crypto_ahash_init(req))
1183 goto clear_hash;
1184
1185 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
1186 goto clear_hash;
1187 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1188 goto clear_hash;
1189 if (tcp_md5_hash_key(hp, key))
1190 goto clear_hash;
1191 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1192 if (crypto_ahash_final(req))
1193 goto clear_hash;
1194
1195 tcp_put_md5sig_pool();
1196 return 0;
1197
1198clear_hash:
1199 tcp_put_md5sig_pool();
1200clear_hash_noput:
1201 memset(md5_hash, 0, 16);
1202 return 1;
1203}
1204EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1205
1206#endif
1207
1208
1209static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
1210 const struct sk_buff *skb)
1211{
1212#ifdef CONFIG_TCP_MD5SIG
1213
1214
1215
1216
1217
1218
1219
1220
1221 const __u8 *hash_location = NULL;
1222 struct tcp_md5sig_key *hash_expected;
1223 const struct iphdr *iph = ip_hdr(skb);
1224 const struct tcphdr *th = tcp_hdr(skb);
1225 int genhash;
1226 unsigned char newhash[16];
1227
1228 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1229 AF_INET);
1230 hash_location = tcp_parse_md5sig_option(th);
1231
1232
1233 if (!hash_expected && !hash_location)
1234 return false;
1235
1236 if (hash_expected && !hash_location) {
1237 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1238 return true;
1239 }
1240
1241 if (!hash_expected && hash_location) {
1242 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1243 return true;
1244 }
1245
1246
1247
1248
1249 genhash = tcp_v4_md5_hash_skb(newhash,
1250 hash_expected,
1251 NULL, skb);
1252
1253 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1254 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
1255 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1256 &iph->saddr, ntohs(th->source),
1257 &iph->daddr, ntohs(th->dest),
1258 genhash ? " tcp_v4_calc_md5_hash failed"
1259 : "");
1260 return true;
1261 }
1262 return false;
1263#endif
1264 return false;
1265}
1266
1267static void tcp_v4_init_req(struct request_sock *req,
1268 const struct sock *sk_listener,
1269 struct sk_buff *skb)
1270{
1271 struct inet_request_sock *ireq = inet_rsk(req);
1272 struct net *net = sock_net(sk_listener);
1273
1274 sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
1275 sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
1276 RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
1277}
1278
1279static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
1280 struct flowi *fl,
1281 const struct request_sock *req)
1282{
1283 return inet_csk_route_req(sk, &fl->u.ip4, req);
1284}
1285
1286struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1287 .family = PF_INET,
1288 .obj_size = sizeof(struct tcp_request_sock),
1289 .rtx_syn_ack = tcp_rtx_synack,
1290 .send_ack = tcp_v4_reqsk_send_ack,
1291 .destructor = tcp_v4_reqsk_destructor,
1292 .send_reset = tcp_v4_send_reset,
1293 .syn_ack_timeout = tcp_syn_ack_timeout,
1294};
1295
1296static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1297 .mss_clamp = TCP_MSS_DEFAULT,
1298#ifdef CONFIG_TCP_MD5SIG
1299 .req_md5_lookup = tcp_v4_md5_lookup,
1300 .calc_md5_hash = tcp_v4_md5_hash_skb,
1301#endif
1302 .init_req = tcp_v4_init_req,
1303#ifdef CONFIG_SYN_COOKIES
1304 .cookie_init_seq = cookie_v4_init_sequence,
1305#endif
1306 .route_req = tcp_v4_route_req,
1307 .init_seq = tcp_v4_init_seq,
1308 .init_ts_off = tcp_v4_init_ts_off,
1309 .send_synack = tcp_v4_send_synack,
1310};
1311
1312int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1313{
1314
1315 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1316 goto drop;
1317
1318 return tcp_conn_request(&tcp_request_sock_ops,
1319 &tcp_request_sock_ipv4_ops, sk, skb);
1320
1321drop:
1322 tcp_listendrop(sk);
1323 return 0;
1324}
1325EXPORT_SYMBOL(tcp_v4_conn_request);
1326
1327
1328
1329
1330
1331
1332struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1333 struct request_sock *req,
1334 struct dst_entry *dst,
1335 struct request_sock *req_unhash,
1336 bool *own_req)
1337{
1338 struct inet_request_sock *ireq;
1339 struct inet_sock *newinet;
1340 struct tcp_sock *newtp;
1341 struct sock *newsk;
1342#ifdef CONFIG_TCP_MD5SIG
1343 struct tcp_md5sig_key *key;
1344#endif
1345 struct ip_options_rcu *inet_opt;
1346
1347 if (sk_acceptq_is_full(sk))
1348 goto exit_overflow;
1349
1350 newsk = tcp_create_openreq_child(sk, req, skb);
1351 if (!newsk)
1352 goto exit_nonewsk;
1353
1354 newsk->sk_gso_type = SKB_GSO_TCPV4;
1355 inet_sk_rx_dst_set(newsk, skb);
1356
1357 newtp = tcp_sk(newsk);
1358 newinet = inet_sk(newsk);
1359 ireq = inet_rsk(req);
1360 sk_daddr_set(newsk, ireq->ir_rmt_addr);
1361 sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
1362 newsk->sk_bound_dev_if = ireq->ir_iif;
1363 newinet->inet_saddr = ireq->ir_loc_addr;
1364 inet_opt = rcu_dereference(ireq->ireq_opt);
1365 RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
1366 newinet->mc_index = inet_iif(skb);
1367 newinet->mc_ttl = ip_hdr(skb)->ttl;
1368 newinet->rcv_tos = ip_hdr(skb)->tos;
1369 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1370 if (inet_opt)
1371 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1372 newinet->inet_id = newtp->write_seq ^ jiffies;
1373
1374 if (!dst) {
1375 dst = inet_csk_route_child_sock(sk, newsk, req);
1376 if (!dst)
1377 goto put_and_exit;
1378 } else {
1379
1380 }
1381 sk_setup_caps(newsk, dst);
1382
1383 tcp_ca_openreq_child(newsk, dst);
1384
1385 tcp_sync_mss(newsk, dst_mtu(dst));
1386 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1387
1388 tcp_initialize_rcv_mss(newsk);
1389
1390#ifdef CONFIG_TCP_MD5SIG
1391
1392 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1393 AF_INET);
1394 if (key) {
1395
1396
1397
1398
1399
1400
1401 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1402 AF_INET, 32, key->key, key->keylen, GFP_ATOMIC);
1403 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1404 }
1405#endif
1406
1407 if (__inet_inherit_port(sk, newsk) < 0)
1408 goto put_and_exit;
1409 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1410 if (likely(*own_req)) {
1411 tcp_move_syn(newtp, req);
1412 ireq->ireq_opt = NULL;
1413 } else {
1414 newinet->inet_opt = NULL;
1415 }
1416 return newsk;
1417
1418exit_overflow:
1419 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1420exit_nonewsk:
1421 dst_release(dst);
1422exit:
1423 tcp_listendrop(sk);
1424 return NULL;
1425put_and_exit:
1426 newinet->inet_opt = NULL;
1427 inet_csk_prepare_forced_close(newsk);
1428 tcp_done(newsk);
1429 goto exit;
1430}
1431EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1432
1433static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
1434{
1435#ifdef CONFIG_SYN_COOKIES
1436 const struct tcphdr *th = tcp_hdr(skb);
1437
1438 if (!th->syn)
1439 sk = cookie_v4_check(sk, skb);
1440#endif
1441 return sk;
1442}
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1453{
1454 struct sock *rsk;
1455
1456 if (sk->sk_state == TCP_ESTABLISHED) {
1457 struct dst_entry *dst = sk->sk_rx_dst;
1458
1459 sock_rps_save_rxhash(sk, skb);
1460 sk_mark_napi_id(sk, skb);
1461 if (dst) {
1462 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1463 !dst->ops->check(dst, 0)) {
1464 dst_release(dst);
1465 sk->sk_rx_dst = NULL;
1466 }
1467 }
1468 tcp_rcv_established(sk, skb, tcp_hdr(skb));
1469 return 0;
1470 }
1471
1472 if (tcp_checksum_complete(skb))
1473 goto csum_err;
1474
1475 if (sk->sk_state == TCP_LISTEN) {
1476 struct sock *nsk = tcp_v4_cookie_check(sk, skb);
1477
1478 if (!nsk)
1479 goto discard;
1480 if (nsk != sk) {
1481 if (tcp_child_process(sk, nsk, skb)) {
1482 rsk = nsk;
1483 goto reset;
1484 }
1485 return 0;
1486 }
1487 } else
1488 sock_rps_save_rxhash(sk, skb);
1489
1490 if (tcp_rcv_state_process(sk, skb)) {
1491 rsk = sk;
1492 goto reset;
1493 }
1494 return 0;
1495
1496reset:
1497 tcp_v4_send_reset(rsk, skb);
1498discard:
1499 kfree_skb(skb);
1500
1501
1502
1503
1504
1505 return 0;
1506
1507csum_err:
1508 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1509 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1510 goto discard;
1511}
1512EXPORT_SYMBOL(tcp_v4_do_rcv);
1513
1514int tcp_v4_early_demux(struct sk_buff *skb)
1515{
1516 const struct iphdr *iph;
1517 const struct tcphdr *th;
1518 struct sock *sk;
1519
1520 if (skb->pkt_type != PACKET_HOST)
1521 return 0;
1522
1523 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1524 return 0;
1525
1526 iph = ip_hdr(skb);
1527 th = tcp_hdr(skb);
1528
1529 if (th->doff < sizeof(struct tcphdr) / 4)
1530 return 0;
1531
1532 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1533 iph->saddr, th->source,
1534 iph->daddr, ntohs(th->dest),
1535 skb->skb_iif, inet_sdif(skb));
1536 if (sk) {
1537 skb->sk = sk;
1538 skb->destructor = sock_edemux;
1539 if (sk_fullsock(sk)) {
1540 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1541
1542 if (dst)
1543 dst = dst_check(dst, 0);
1544 if (dst &&
1545 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1546 skb_dst_set_noref(skb, dst);
1547 }
1548 }
1549 return 0;
1550}
1551
1552bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
1553{
1554 u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
1555
1556
1557
1558
1559
1560 limit += 64*1024;
1561
1562
1563
1564
1565
1566
1567
1568 skb_condense(skb);
1569
1570 if (unlikely(sk_add_backlog(sk, skb, limit))) {
1571 bh_unlock_sock(sk);
1572 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
1573 return true;
1574 }
1575 return false;
1576}
1577EXPORT_SYMBOL(tcp_add_backlog);
1578
1579int tcp_filter(struct sock *sk, struct sk_buff *skb)
1580{
1581 struct tcphdr *th = (struct tcphdr *)skb->data;
1582 unsigned int eaten = skb->len;
1583 int err;
1584
1585 err = sk_filter_trim_cap(sk, skb, th->doff * 4);
1586 if (!err) {
1587 eaten -= skb->len;
1588 TCP_SKB_CB(skb)->end_seq -= eaten;
1589 }
1590 return err;
1591}
1592EXPORT_SYMBOL(tcp_filter);
1593
1594static void tcp_v4_restore_cb(struct sk_buff *skb)
1595{
1596 memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4,
1597 sizeof(struct inet_skb_parm));
1598}
1599
1600static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
1601 const struct tcphdr *th)
1602{
1603
1604
1605
1606 memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1607 sizeof(struct inet_skb_parm));
1608 barrier();
1609
1610 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1611 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1612 skb->len - th->doff * 4);
1613 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1614 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1615 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1616 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1617 TCP_SKB_CB(skb)->sacked = 0;
1618 TCP_SKB_CB(skb)->has_rxtstamp =
1619 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1620}
1621
1622
1623
1624
1625
1626int tcp_v4_rcv(struct sk_buff *skb)
1627{
1628 struct net *net = dev_net(skb->dev);
1629 int sdif = inet_sdif(skb);
1630 const struct iphdr *iph;
1631 const struct tcphdr *th;
1632 bool refcounted;
1633 struct sock *sk;
1634 int ret;
1635
1636 if (skb->pkt_type != PACKET_HOST)
1637 goto discard_it;
1638
1639
1640 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1641
1642 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1643 goto discard_it;
1644
1645 th = (const struct tcphdr *)skb->data;
1646
1647 if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
1648 goto bad_packet;
1649 if (!pskb_may_pull(skb, th->doff * 4))
1650 goto discard_it;
1651
1652
1653
1654
1655
1656
1657 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
1658 goto csum_error;
1659
1660 th = (const struct tcphdr *)skb->data;
1661 iph = ip_hdr(skb);
1662lookup:
1663 sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
1664 th->dest, sdif, &refcounted);
1665 if (!sk)
1666 goto no_tcp_socket;
1667
1668process:
1669 if (sk->sk_state == TCP_TIME_WAIT)
1670 goto do_time_wait;
1671
1672 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1673 struct request_sock *req = inet_reqsk(sk);
1674 struct sock *nsk;
1675
1676 sk = req->rsk_listener;
1677 if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
1678 sk_drops_add(sk, skb);
1679 reqsk_put(req);
1680 goto discard_it;
1681 }
1682 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1683 inet_csk_reqsk_queue_drop_and_put(sk, req);
1684 goto lookup;
1685 }
1686
1687
1688
1689 sock_hold(sk);
1690 refcounted = true;
1691 nsk = NULL;
1692 if (!tcp_filter(sk, skb)) {
1693 th = (const struct tcphdr *)skb->data;
1694 iph = ip_hdr(skb);
1695 tcp_v4_fill_cb(skb, iph, th);
1696 nsk = tcp_check_req(sk, skb, req, false);
1697 }
1698 if (!nsk) {
1699 reqsk_put(req);
1700 goto discard_and_relse;
1701 }
1702 if (nsk == sk) {
1703 reqsk_put(req);
1704 tcp_v4_restore_cb(skb);
1705 } else if (tcp_child_process(sk, nsk, skb)) {
1706 tcp_v4_send_reset(nsk, skb);
1707 goto discard_and_relse;
1708 } else {
1709 sock_put(sk);
1710 return 0;
1711 }
1712 }
1713 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1714 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1715 goto discard_and_relse;
1716 }
1717
1718 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1719 goto discard_and_relse;
1720
1721 if (tcp_v4_inbound_md5_hash(sk, skb))
1722 goto discard_and_relse;
1723
1724 nf_reset(skb);
1725
1726 if (tcp_filter(sk, skb))
1727 goto discard_and_relse;
1728 th = (const struct tcphdr *)skb->data;
1729 iph = ip_hdr(skb);
1730 tcp_v4_fill_cb(skb, iph, th);
1731
1732 skb->dev = NULL;
1733
1734 if (sk->sk_state == TCP_LISTEN) {
1735 ret = tcp_v4_do_rcv(sk, skb);
1736 goto put_and_return;
1737 }
1738
1739 sk_incoming_cpu_update(sk);
1740
1741 bh_lock_sock_nested(sk);
1742 tcp_segs_in(tcp_sk(sk), skb);
1743 ret = 0;
1744 if (!sock_owned_by_user(sk)) {
1745 ret = tcp_v4_do_rcv(sk, skb);
1746 } else if (tcp_add_backlog(sk, skb)) {
1747 goto discard_and_relse;
1748 }
1749 bh_unlock_sock(sk);
1750
1751put_and_return:
1752 if (refcounted)
1753 sock_put(sk);
1754
1755 return ret;
1756
1757no_tcp_socket:
1758 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1759 goto discard_it;
1760
1761 tcp_v4_fill_cb(skb, iph, th);
1762
1763 if (tcp_checksum_complete(skb)) {
1764csum_error:
1765 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1766bad_packet:
1767 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1768 } else {
1769 tcp_v4_send_reset(NULL, skb);
1770 }
1771
1772discard_it:
1773
1774 kfree_skb(skb);
1775 return 0;
1776
1777discard_and_relse:
1778 sk_drops_add(sk, skb);
1779 if (refcounted)
1780 sock_put(sk);
1781 goto discard_it;
1782
1783do_time_wait:
1784 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1785 inet_twsk_put(inet_twsk(sk));
1786 goto discard_it;
1787 }
1788
1789 tcp_v4_fill_cb(skb, iph, th);
1790
1791 if (tcp_checksum_complete(skb)) {
1792 inet_twsk_put(inet_twsk(sk));
1793 goto csum_error;
1794 }
1795 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1796 case TCP_TW_SYN: {
1797 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1798 &tcp_hashinfo, skb,
1799 __tcp_hdrlen(th),
1800 iph->saddr, th->source,
1801 iph->daddr, th->dest,
1802 inet_iif(skb),
1803 sdif);
1804 if (sk2) {
1805 inet_twsk_deschedule_put(inet_twsk(sk));
1806 sk = sk2;
1807 tcp_v4_restore_cb(skb);
1808 refcounted = false;
1809 goto process;
1810 }
1811 }
1812
1813
1814 case TCP_TW_ACK:
1815 tcp_v4_timewait_ack(sk, skb);
1816 break;
1817 case TCP_TW_RST:
1818 tcp_v4_send_reset(sk, skb);
1819 inet_twsk_deschedule_put(inet_twsk(sk));
1820 goto discard_it;
1821 case TCP_TW_SUCCESS:;
1822 }
1823 goto discard_it;
1824}
1825
1826static struct timewait_sock_ops tcp_timewait_sock_ops = {
1827 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1828 .twsk_unique = tcp_twsk_unique,
1829 .twsk_destructor= tcp_twsk_destructor,
1830};
1831
1832void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
1833{
1834 struct dst_entry *dst = skb_dst(skb);
1835
1836 if (dst && dst_hold_safe(dst)) {
1837 sk->sk_rx_dst = dst;
1838 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1839 }
1840}
1841EXPORT_SYMBOL(inet_sk_rx_dst_set);
1842
1843const struct inet_connection_sock_af_ops ipv4_specific = {
1844 .queue_xmit = ip_queue_xmit,
1845 .send_check = tcp_v4_send_check,
1846 .rebuild_header = inet_sk_rebuild_header,
1847 .sk_rx_dst_set = inet_sk_rx_dst_set,
1848 .conn_request = tcp_v4_conn_request,
1849 .syn_recv_sock = tcp_v4_syn_recv_sock,
1850 .net_header_len = sizeof(struct iphdr),
1851 .setsockopt = ip_setsockopt,
1852 .getsockopt = ip_getsockopt,
1853 .addr2sockaddr = inet_csk_addr2sockaddr,
1854 .sockaddr_len = sizeof(struct sockaddr_in),
1855#ifdef CONFIG_COMPAT
1856 .compat_setsockopt = compat_ip_setsockopt,
1857 .compat_getsockopt = compat_ip_getsockopt,
1858#endif
1859 .mtu_reduced = tcp_v4_mtu_reduced,
1860};
1861EXPORT_SYMBOL(ipv4_specific);
1862
1863#ifdef CONFIG_TCP_MD5SIG
1864static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1865 .md5_lookup = tcp_v4_md5_lookup,
1866 .calc_md5_hash = tcp_v4_md5_hash_skb,
1867 .md5_parse = tcp_v4_parse_md5_keys,
1868};
1869#endif
1870
1871
1872
1873
1874static int tcp_v4_init_sock(struct sock *sk)
1875{
1876 struct inet_connection_sock *icsk = inet_csk(sk);
1877
1878 tcp_init_sock(sk);
1879
1880 icsk->icsk_af_ops = &ipv4_specific;
1881
1882#ifdef CONFIG_TCP_MD5SIG
1883 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1884#endif
1885
1886 return 0;
1887}
1888
1889void tcp_v4_destroy_sock(struct sock *sk)
1890{
1891 struct tcp_sock *tp = tcp_sk(sk);
1892
1893 trace_tcp_destroy_sock(sk);
1894
1895 tcp_clear_xmit_timers(sk);
1896
1897 tcp_cleanup_congestion_control(sk);
1898
1899 tcp_cleanup_ulp(sk);
1900
1901
1902 tcp_write_queue_purge(sk);
1903
1904
1905 tcp_fastopen_active_disable_ofo_check(sk);
1906
1907
1908 skb_rbtree_purge(&tp->out_of_order_queue);
1909
1910#ifdef CONFIG_TCP_MD5SIG
1911
1912 if (tp->md5sig_info) {
1913 tcp_clear_md5_list(sk);
1914 kfree_rcu(tp->md5sig_info, rcu);
1915 tp->md5sig_info = NULL;
1916 }
1917#endif
1918
1919
1920 if (inet_csk(sk)->icsk_bind_hash)
1921 inet_put_port(sk);
1922
1923 BUG_ON(tp->fastopen_rsk);
1924
1925
1926 tcp_free_fastopen_req(tp);
1927 tcp_fastopen_destroy_cipher(sk);
1928 tcp_saved_syn_free(tp);
1929
1930 sk_sockets_allocated_dec(sk);
1931}
1932EXPORT_SYMBOL(tcp_v4_destroy_sock);
1933
1934#ifdef CONFIG_PROC_FS
1935
1936
1937
1938
1939
1940
1941
1942static void *listening_get_next(struct seq_file *seq, void *cur)
1943{
1944 struct tcp_iter_state *st = seq->private;
1945 struct net *net = seq_file_net(seq);
1946 struct inet_listen_hashbucket *ilb;
1947 struct sock *sk = cur;
1948
1949 if (!sk) {
1950get_head:
1951 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1952 spin_lock(&ilb->lock);
1953 sk = sk_head(&ilb->head);
1954 st->offset = 0;
1955 goto get_sk;
1956 }
1957 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1958 ++st->num;
1959 ++st->offset;
1960
1961 sk = sk_next(sk);
1962get_sk:
1963 sk_for_each_from(sk) {
1964 if (!net_eq(sock_net(sk), net))
1965 continue;
1966 if (sk->sk_family == st->family)
1967 return sk;
1968 }
1969 spin_unlock(&ilb->lock);
1970 st->offset = 0;
1971 if (++st->bucket < INET_LHTABLE_SIZE)
1972 goto get_head;
1973 return NULL;
1974}
1975
1976static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1977{
1978 struct tcp_iter_state *st = seq->private;
1979 void *rc;
1980
1981 st->bucket = 0;
1982 st->offset = 0;
1983 rc = listening_get_next(seq, NULL);
1984
1985 while (rc && *pos) {
1986 rc = listening_get_next(seq, rc);
1987 --*pos;
1988 }
1989 return rc;
1990}
1991
1992static inline bool empty_bucket(const struct tcp_iter_state *st)
1993{
1994 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
1995}
1996
1997
1998
1999
2000
2001static void *established_get_first(struct seq_file *seq)
2002{
2003 struct tcp_iter_state *st = seq->private;
2004 struct net *net = seq_file_net(seq);
2005 void *rc = NULL;
2006
2007 st->offset = 0;
2008 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2009 struct sock *sk;
2010 struct hlist_nulls_node *node;
2011 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2012
2013
2014 if (empty_bucket(st))
2015 continue;
2016
2017 spin_lock_bh(lock);
2018 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2019 if (sk->sk_family != st->family ||
2020 !net_eq(sock_net(sk), net)) {
2021 continue;
2022 }
2023 rc = sk;
2024 goto out;
2025 }
2026 spin_unlock_bh(lock);
2027 }
2028out:
2029 return rc;
2030}
2031
2032static void *established_get_next(struct seq_file *seq, void *cur)
2033{
2034 struct sock *sk = cur;
2035 struct hlist_nulls_node *node;
2036 struct tcp_iter_state *st = seq->private;
2037 struct net *net = seq_file_net(seq);
2038
2039 ++st->num;
2040 ++st->offset;
2041
2042 sk = sk_nulls_next(sk);
2043
2044 sk_nulls_for_each_from(sk, node) {
2045 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2046 return sk;
2047 }
2048
2049 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2050 ++st->bucket;
2051 return established_get_first(seq);
2052}
2053
2054static void *established_get_idx(struct seq_file *seq, loff_t pos)
2055{
2056 struct tcp_iter_state *st = seq->private;
2057 void *rc;
2058
2059 st->bucket = 0;
2060 rc = established_get_first(seq);
2061
2062 while (rc && pos) {
2063 rc = established_get_next(seq, rc);
2064 --pos;
2065 }
2066 return rc;
2067}
2068
2069static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2070{
2071 void *rc;
2072 struct tcp_iter_state *st = seq->private;
2073
2074 st->state = TCP_SEQ_STATE_LISTENING;
2075 rc = listening_get_idx(seq, &pos);
2076
2077 if (!rc) {
2078 st->state = TCP_SEQ_STATE_ESTABLISHED;
2079 rc = established_get_idx(seq, pos);
2080 }
2081
2082 return rc;
2083}
2084
2085static void *tcp_seek_last_pos(struct seq_file *seq)
2086{
2087 struct tcp_iter_state *st = seq->private;
2088 int offset = st->offset;
2089 int orig_num = st->num;
2090 void *rc = NULL;
2091
2092 switch (st->state) {
2093 case TCP_SEQ_STATE_LISTENING:
2094 if (st->bucket >= INET_LHTABLE_SIZE)
2095 break;
2096 st->state = TCP_SEQ_STATE_LISTENING;
2097 rc = listening_get_next(seq, NULL);
2098 while (offset-- && rc)
2099 rc = listening_get_next(seq, rc);
2100 if (rc)
2101 break;
2102 st->bucket = 0;
2103 st->state = TCP_SEQ_STATE_ESTABLISHED;
2104
2105 case TCP_SEQ_STATE_ESTABLISHED:
2106 if (st->bucket > tcp_hashinfo.ehash_mask)
2107 break;
2108 rc = established_get_first(seq);
2109 while (offset-- && rc)
2110 rc = established_get_next(seq, rc);
2111 }
2112
2113 st->num = orig_num;
2114
2115 return rc;
2116}
2117
2118static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2119{
2120 struct tcp_iter_state *st = seq->private;
2121 void *rc;
2122
2123 if (*pos && *pos == st->last_pos) {
2124 rc = tcp_seek_last_pos(seq);
2125 if (rc)
2126 goto out;
2127 }
2128
2129 st->state = TCP_SEQ_STATE_LISTENING;
2130 st->num = 0;
2131 st->bucket = 0;
2132 st->offset = 0;
2133 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2134
2135out:
2136 st->last_pos = *pos;
2137 return rc;
2138}
2139
2140static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2141{
2142 struct tcp_iter_state *st = seq->private;
2143 void *rc = NULL;
2144
2145 if (v == SEQ_START_TOKEN) {
2146 rc = tcp_get_idx(seq, 0);
2147 goto out;
2148 }
2149
2150 switch (st->state) {
2151 case TCP_SEQ_STATE_LISTENING:
2152 rc = listening_get_next(seq, v);
2153 if (!rc) {
2154 st->state = TCP_SEQ_STATE_ESTABLISHED;
2155 st->bucket = 0;
2156 st->offset = 0;
2157 rc = established_get_first(seq);
2158 }
2159 break;
2160 case TCP_SEQ_STATE_ESTABLISHED:
2161 rc = established_get_next(seq, v);
2162 break;
2163 }
2164out:
2165 ++*pos;
2166 st->last_pos = *pos;
2167 return rc;
2168}
2169
2170static void tcp_seq_stop(struct seq_file *seq, void *v)
2171{
2172 struct tcp_iter_state *st = seq->private;
2173
2174 switch (st->state) {
2175 case TCP_SEQ_STATE_LISTENING:
2176 if (v != SEQ_START_TOKEN)
2177 spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
2178 break;
2179 case TCP_SEQ_STATE_ESTABLISHED:
2180 if (v)
2181 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2182 break;
2183 }
2184}
2185
2186int tcp_seq_open(struct inode *inode, struct file *file)
2187{
2188 struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
2189 struct tcp_iter_state *s;
2190 int err;
2191
2192 err = seq_open_net(inode, file, &afinfo->seq_ops,
2193 sizeof(struct tcp_iter_state));
2194 if (err < 0)
2195 return err;
2196
2197 s = ((struct seq_file *)file->private_data)->private;
2198 s->family = afinfo->family;
2199 s->last_pos = 0;
2200 return 0;
2201}
2202EXPORT_SYMBOL(tcp_seq_open);
2203
2204int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2205{
2206 int rc = 0;
2207 struct proc_dir_entry *p;
2208
2209 afinfo->seq_ops.start = tcp_seq_start;
2210 afinfo->seq_ops.next = tcp_seq_next;
2211 afinfo->seq_ops.stop = tcp_seq_stop;
2212
2213 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2214 afinfo->seq_fops, afinfo);
2215 if (!p)
2216 rc = -ENOMEM;
2217 return rc;
2218}
2219EXPORT_SYMBOL(tcp_proc_register);
2220
2221void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2222{
2223 remove_proc_entry(afinfo->name, net->proc_net);
2224}
2225EXPORT_SYMBOL(tcp_proc_unregister);
2226
2227static void get_openreq4(const struct request_sock *req,
2228 struct seq_file *f, int i)
2229{
2230 const struct inet_request_sock *ireq = inet_rsk(req);
2231 long delta = req->rsk_timer.expires - jiffies;
2232
2233 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2234 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
2235 i,
2236 ireq->ir_loc_addr,
2237 ireq->ir_num,
2238 ireq->ir_rmt_addr,
2239 ntohs(ireq->ir_rmt_port),
2240 TCP_SYN_RECV,
2241 0, 0,
2242 1,
2243 jiffies_delta_to_clock_t(delta),
2244 req->num_timeout,
2245 from_kuid_munged(seq_user_ns(f),
2246 sock_i_uid(req->rsk_listener)),
2247 0,
2248 0,
2249 0,
2250 req);
2251}
2252
2253static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2254{
2255 int timer_active;
2256 unsigned long timer_expires;
2257 const struct tcp_sock *tp = tcp_sk(sk);
2258 const struct inet_connection_sock *icsk = inet_csk(sk);
2259 const struct inet_sock *inet = inet_sk(sk);
2260 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2261 __be32 dest = inet->inet_daddr;
2262 __be32 src = inet->inet_rcv_saddr;
2263 __u16 destp = ntohs(inet->inet_dport);
2264 __u16 srcp = ntohs(inet->inet_sport);
2265 int rx_queue;
2266 int state;
2267
2268 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2269 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2270 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2271 timer_active = 1;
2272 timer_expires = icsk->icsk_timeout;
2273 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2274 timer_active = 4;
2275 timer_expires = icsk->icsk_timeout;
2276 } else if (timer_pending(&sk->sk_timer)) {
2277 timer_active = 2;
2278 timer_expires = sk->sk_timer.expires;
2279 } else {
2280 timer_active = 0;
2281 timer_expires = jiffies;
2282 }
2283
2284 state = sk_state_load(sk);
2285 if (state == TCP_LISTEN)
2286 rx_queue = sk->sk_ack_backlog;
2287 else
2288
2289
2290
2291 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2292
2293 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2294 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
2295 i, src, srcp, dest, destp, state,
2296 tp->write_seq - tp->snd_una,
2297 rx_queue,
2298 timer_active,
2299 jiffies_delta_to_clock_t(timer_expires - jiffies),
2300 icsk->icsk_retransmits,
2301 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
2302 icsk->icsk_probes_out,
2303 sock_i_ino(sk),
2304 refcount_read(&sk->sk_refcnt), sk,
2305 jiffies_to_clock_t(icsk->icsk_rto),
2306 jiffies_to_clock_t(icsk->icsk_ack.ato),
2307 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2308 tp->snd_cwnd,
2309 state == TCP_LISTEN ?
2310 fastopenq->max_qlen :
2311 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
2312}
2313
2314static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2315 struct seq_file *f, int i)
2316{
2317 long delta = tw->tw_timer.expires - jiffies;
2318 __be32 dest, src;
2319 __u16 destp, srcp;
2320
2321 dest = tw->tw_daddr;
2322 src = tw->tw_rcv_saddr;
2323 destp = ntohs(tw->tw_dport);
2324 srcp = ntohs(tw->tw_sport);
2325
2326 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2327 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
2328 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2329 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2330 refcount_read(&tw->tw_refcnt), tw);
2331}
2332
2333#define TMPSZ 150
2334
2335static int tcp4_seq_show(struct seq_file *seq, void *v)
2336{
2337 struct tcp_iter_state *st;
2338 struct sock *sk = v;
2339
2340 seq_setwidth(seq, TMPSZ - 1);
2341 if (v == SEQ_START_TOKEN) {
2342 seq_puts(seq, " sl local_address rem_address st tx_queue "
2343 "rx_queue tr tm->when retrnsmt uid timeout "
2344 "inode");
2345 goto out;
2346 }
2347 st = seq->private;
2348
2349 if (sk->sk_state == TCP_TIME_WAIT)
2350 get_timewait4_sock(v, seq, st->num);
2351 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2352 get_openreq4(v, seq, st->num);
2353 else
2354 get_tcp4_sock(v, seq, st->num);
2355out:
2356 seq_pad(seq, '\n');
2357 return 0;
2358}
2359
2360static const struct file_operations tcp_afinfo_seq_fops = {
2361 .owner = THIS_MODULE,
2362 .open = tcp_seq_open,
2363 .read = seq_read,
2364 .llseek = seq_lseek,
2365 .release = seq_release_net
2366};
2367
2368static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2369 .name = "tcp",
2370 .family = AF_INET,
2371 .seq_fops = &tcp_afinfo_seq_fops,
2372 .seq_ops = {
2373 .show = tcp4_seq_show,
2374 },
2375};
2376
2377static int __net_init tcp4_proc_init_net(struct net *net)
2378{
2379 return tcp_proc_register(net, &tcp4_seq_afinfo);
2380}
2381
2382static void __net_exit tcp4_proc_exit_net(struct net *net)
2383{
2384 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2385}
2386
2387static struct pernet_operations tcp4_net_ops = {
2388 .init = tcp4_proc_init_net,
2389 .exit = tcp4_proc_exit_net,
2390};
2391
2392int __init tcp4_proc_init(void)
2393{
2394 return register_pernet_subsys(&tcp4_net_ops);
2395}
2396
2397void tcp4_proc_exit(void)
2398{
2399 unregister_pernet_subsys(&tcp4_net_ops);
2400}
2401#endif
2402
2403struct proto tcp_prot = {
2404 .name = "TCP",
2405 .owner = THIS_MODULE,
2406 .close = tcp_close,
2407 .connect = tcp_v4_connect,
2408 .disconnect = tcp_disconnect,
2409 .accept = inet_csk_accept,
2410 .ioctl = tcp_ioctl,
2411 .init = tcp_v4_init_sock,
2412 .destroy = tcp_v4_destroy_sock,
2413 .shutdown = tcp_shutdown,
2414 .setsockopt = tcp_setsockopt,
2415 .getsockopt = tcp_getsockopt,
2416 .keepalive = tcp_set_keepalive,
2417 .recvmsg = tcp_recvmsg,
2418 .sendmsg = tcp_sendmsg,
2419 .sendpage = tcp_sendpage,
2420 .backlog_rcv = tcp_v4_do_rcv,
2421 .release_cb = tcp_release_cb,
2422 .hash = inet_hash,
2423 .unhash = inet_unhash,
2424 .get_port = inet_csk_get_port,
2425 .enter_memory_pressure = tcp_enter_memory_pressure,
2426 .leave_memory_pressure = tcp_leave_memory_pressure,
2427 .stream_memory_free = tcp_stream_memory_free,
2428 .sockets_allocated = &tcp_sockets_allocated,
2429 .orphan_count = &tcp_orphan_count,
2430 .memory_allocated = &tcp_memory_allocated,
2431 .memory_pressure = &tcp_memory_pressure,
2432 .sysctl_mem = sysctl_tcp_mem,
2433 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2434 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2435 .max_header = MAX_TCP_HEADER,
2436 .obj_size = sizeof(struct tcp_sock),
2437 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2438 .twsk_prot = &tcp_timewait_sock_ops,
2439 .rsk_prot = &tcp_request_sock_ops,
2440 .h.hashinfo = &tcp_hashinfo,
2441 .no_autobind = true,
2442#ifdef CONFIG_COMPAT
2443 .compat_setsockopt = compat_tcp_setsockopt,
2444 .compat_getsockopt = compat_tcp_getsockopt,
2445#endif
2446 .diag_destroy = tcp_abort,
2447};
2448EXPORT_SYMBOL(tcp_prot);
2449
2450static void __net_exit tcp_sk_exit(struct net *net)
2451{
2452 int cpu;
2453
2454 module_put(net->ipv4.tcp_congestion_control->owner);
2455
2456 for_each_possible_cpu(cpu)
2457 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2458 free_percpu(net->ipv4.tcp_sk);
2459}
2460
2461static int __net_init tcp_sk_init(struct net *net)
2462{
2463 int res, cpu, cnt;
2464
2465 net->ipv4.tcp_sk = alloc_percpu(struct sock *);
2466 if (!net->ipv4.tcp_sk)
2467 return -ENOMEM;
2468
2469 for_each_possible_cpu(cpu) {
2470 struct sock *sk;
2471
2472 res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
2473 IPPROTO_TCP, net);
2474 if (res)
2475 goto fail;
2476 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2477 *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2478 }
2479
2480 net->ipv4.sysctl_tcp_ecn = 2;
2481 net->ipv4.sysctl_tcp_ecn_fallback = 1;
2482
2483 net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
2484 net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
2485 net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
2486
2487 net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
2488 net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
2489 net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
2490
2491 net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
2492 net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
2493 net->ipv4.sysctl_tcp_syncookies = 1;
2494 net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
2495 net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
2496 net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
2497 net->ipv4.sysctl_tcp_orphan_retries = 0;
2498 net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
2499 net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
2500 net->ipv4.sysctl_tcp_tw_reuse = 0;
2501
2502 cnt = tcp_hashinfo.ehash_mask + 1;
2503 net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2;
2504 net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
2505
2506 net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256);
2507 net->ipv4.sysctl_tcp_sack = 1;
2508 net->ipv4.sysctl_tcp_window_scaling = 1;
2509 net->ipv4.sysctl_tcp_timestamps = 1;
2510 net->ipv4.sysctl_tcp_early_retrans = 3;
2511 net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION;
2512 net->ipv4.sysctl_tcp_slow_start_after_idle = 1;
2513 net->ipv4.sysctl_tcp_retrans_collapse = 1;
2514 net->ipv4.sysctl_tcp_max_reordering = 300;
2515 net->ipv4.sysctl_tcp_dsack = 1;
2516 net->ipv4.sysctl_tcp_app_win = 31;
2517 net->ipv4.sysctl_tcp_adv_win_scale = 1;
2518 net->ipv4.sysctl_tcp_frto = 2;
2519 net->ipv4.sysctl_tcp_moderate_rcvbuf = 1;
2520
2521
2522
2523
2524 net->ipv4.sysctl_tcp_tso_win_divisor = 3;
2525
2526 net->ipv4.sysctl_tcp_limit_output_bytes = 262144;
2527
2528 net->ipv4.sysctl_tcp_challenge_ack_limit = 1000;
2529 net->ipv4.sysctl_tcp_min_tso_segs = 2;
2530 net->ipv4.sysctl_tcp_min_rtt_wlen = 300;
2531 net->ipv4.sysctl_tcp_autocorking = 1;
2532 net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2;
2533 net->ipv4.sysctl_tcp_pacing_ss_ratio = 200;
2534 net->ipv4.sysctl_tcp_pacing_ca_ratio = 120;
2535 if (net != &init_net) {
2536 memcpy(net->ipv4.sysctl_tcp_rmem,
2537 init_net.ipv4.sysctl_tcp_rmem,
2538 sizeof(init_net.ipv4.sysctl_tcp_rmem));
2539 memcpy(net->ipv4.sysctl_tcp_wmem,
2540 init_net.ipv4.sysctl_tcp_wmem,
2541 sizeof(init_net.ipv4.sysctl_tcp_wmem));
2542 }
2543 net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
2544 spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
2545 net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
2546 atomic_set(&net->ipv4.tfo_active_disable_times, 0);
2547
2548
2549 if (!net_eq(net, &init_net) &&
2550 try_module_get(init_net.ipv4.tcp_congestion_control->owner))
2551 net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control;
2552 else
2553 net->ipv4.tcp_congestion_control = &tcp_reno;
2554
2555 return 0;
2556fail:
2557 tcp_sk_exit(net);
2558
2559 return res;
2560}
2561
2562static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2563{
2564 struct net *net;
2565
2566 inet_twsk_purge(&tcp_hashinfo, AF_INET);
2567
2568 list_for_each_entry(net, net_exit_list, exit_list)
2569 tcp_fastopen_ctx_destroy(net);
2570}
2571
2572static struct pernet_operations __net_initdata tcp_sk_ops = {
2573 .init = tcp_sk_init,
2574 .exit = tcp_sk_exit,
2575 .exit_batch = tcp_sk_exit_batch,
2576};
2577
2578void __init tcp_v4_init(void)
2579{
2580 if (register_pernet_subsys(&tcp_sk_ops))
2581 panic("Failed to create the TCP control socket.\n");
2582}
2583