1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48#define pr_fmt(fmt) "TCP: " fmt
49
50#include <linux/bottom_half.h>
51#include <linux/types.h>
52#include <linux/fcntl.h>
53#include <linux/module.h>
54#include <linux/random.h>
55#include <linux/cache.h>
56#include <linux/jhash.h>
57#include <linux/init.h>
58#include <linux/times.h>
59#include <linux/slab.h>
60
61#include <net/net_namespace.h>
62#include <net/icmp.h>
63#include <net/inet_hashtables.h>
64#include <net/tcp.h>
65#include <net/transp_v6.h>
66#include <net/ipv6.h>
67#include <net/inet_common.h>
68#include <net/timewait_sock.h>
69#include <net/xfrm.h>
70#include <net/secure_seq.h>
71#include <net/busy_poll.h>
72
73#include <linux/inet.h>
74#include <linux/ipv6.h>
75#include <linux/stddef.h>
76#include <linux/proc_fs.h>
77#include <linux/seq_file.h>
78#include <linux/inetdevice.h>
79
80#include <crypto/hash.h>
81#include <linux/scatterlist.h>
82
83#include <trace/events/tcp.h>
84
85#ifdef CONFIG_TCP_MD5SIG
86static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
87 __be32 daddr, __be32 saddr, const struct tcphdr *th);
88#endif
89
90struct inet_hashinfo tcp_hashinfo;
91EXPORT_SYMBOL(tcp_hashinfo);
92
93static u32 tcp_v4_init_seq(const struct sk_buff *skb)
94{
95 return secure_tcp_seq(ip_hdr(skb)->daddr,
96 ip_hdr(skb)->saddr,
97 tcp_hdr(skb)->dest,
98 tcp_hdr(skb)->source);
99}
100
101static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
102{
103 return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
104}
105
106int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
107{
108 const struct inet_timewait_sock *tw = inet_twsk(sktw);
109 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
110 struct tcp_sock *tp = tcp_sk(sk);
111 int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse;
112
113 if (reuse == 2) {
114
115
116
117
118 bool loopback = false;
119 if (tw->tw_bound_dev_if == LOOPBACK_IFINDEX)
120 loopback = true;
121#if IS_ENABLED(CONFIG_IPV6)
122 if (tw->tw_family == AF_INET6) {
123 if (ipv6_addr_loopback(&tw->tw_v6_daddr) ||
124 (ipv6_addr_v4mapped(&tw->tw_v6_daddr) &&
125 (tw->tw_v6_daddr.s6_addr[12] == 127)) ||
126 ipv6_addr_loopback(&tw->tw_v6_rcv_saddr) ||
127 (ipv6_addr_v4mapped(&tw->tw_v6_rcv_saddr) &&
128 (tw->tw_v6_rcv_saddr.s6_addr[12] == 127)))
129 loopback = true;
130 } else
131#endif
132 {
133 if (ipv4_is_loopback(tw->tw_daddr) ||
134 ipv4_is_loopback(tw->tw_rcv_saddr))
135 loopback = true;
136 }
137 if (!loopback)
138 reuse = 0;
139 }
140
141
142
143
144
145
146
147
148
149
150
151
152 if (tcptw->tw_ts_recent_stamp &&
153 (!twp || (reuse && time_after32(ktime_get_seconds(),
154 tcptw->tw_ts_recent_stamp)))) {
155
156
157
158
159
160
161
162
163
164
165
166 if (likely(!tp->repair)) {
167 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
168 if (tp->write_seq == 0)
169 tp->write_seq = 1;
170 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
171 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
172 }
173 sock_hold(sktw);
174 return 1;
175 }
176
177 return 0;
178}
179EXPORT_SYMBOL_GPL(tcp_twsk_unique);
180
181static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
182 int addr_len)
183{
184
185
186
187
188 if (addr_len < sizeof(struct sockaddr_in))
189 return -EINVAL;
190
191 sock_owned_by_me(sk);
192
193 return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr);
194}
195
196
197int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
198{
199 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
200 struct inet_sock *inet = inet_sk(sk);
201 struct tcp_sock *tp = tcp_sk(sk);
202 __be16 orig_sport, orig_dport;
203 __be32 daddr, nexthop;
204 struct flowi4 *fl4;
205 struct rtable *rt;
206 int err;
207 struct ip_options_rcu *inet_opt;
208 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
209
210 if (addr_len < sizeof(struct sockaddr_in))
211 return -EINVAL;
212
213 if (usin->sin_family != AF_INET)
214 return -EAFNOSUPPORT;
215
216 nexthop = daddr = usin->sin_addr.s_addr;
217 inet_opt = rcu_dereference_protected(inet->inet_opt,
218 lockdep_sock_is_held(sk));
219 if (inet_opt && inet_opt->opt.srr) {
220 if (!daddr)
221 return -EINVAL;
222 nexthop = inet_opt->opt.faddr;
223 }
224
225 orig_sport = inet->inet_sport;
226 orig_dport = usin->sin_port;
227 fl4 = &inet->cork.fl.u.ip4;
228 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
229 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
230 IPPROTO_TCP,
231 orig_sport, orig_dport, sk);
232 if (IS_ERR(rt)) {
233 err = PTR_ERR(rt);
234 if (err == -ENETUNREACH)
235 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
236 return err;
237 }
238
239 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
240 ip_rt_put(rt);
241 return -ENETUNREACH;
242 }
243
244 if (!inet_opt || !inet_opt->opt.srr)
245 daddr = fl4->daddr;
246
247 if (!inet->inet_saddr)
248 inet->inet_saddr = fl4->saddr;
249 sk_rcv_saddr_set(sk, inet->inet_saddr);
250
251 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
252
253 tp->rx_opt.ts_recent = 0;
254 tp->rx_opt.ts_recent_stamp = 0;
255 if (likely(!tp->repair))
256 tp->write_seq = 0;
257 }
258
259 inet->inet_dport = usin->sin_port;
260 sk_daddr_set(sk, daddr);
261
262 inet_csk(sk)->icsk_ext_hdr_len = 0;
263 if (inet_opt)
264 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
265
266 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
267
268
269
270
271
272
273 tcp_set_state(sk, TCP_SYN_SENT);
274 err = inet_hash_connect(tcp_death_row, sk);
275 if (err)
276 goto failure;
277
278 sk_set_txhash(sk);
279
280 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
281 inet->inet_sport, inet->inet_dport, sk);
282 if (IS_ERR(rt)) {
283 err = PTR_ERR(rt);
284 rt = NULL;
285 goto failure;
286 }
287
288 sk->sk_gso_type = SKB_GSO_TCPV4;
289 sk_setup_caps(sk, &rt->dst);
290 rt = NULL;
291
292 if (likely(!tp->repair)) {
293 if (!tp->write_seq)
294 tp->write_seq = secure_tcp_seq(inet->inet_saddr,
295 inet->inet_daddr,
296 inet->inet_sport,
297 usin->sin_port);
298 tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
299 inet->inet_saddr,
300 inet->inet_daddr);
301 }
302
303 inet->inet_id = tp->write_seq ^ jiffies;
304
305 if (tcp_fastopen_defer_connect(sk, &err))
306 return err;
307 if (err)
308 goto failure;
309
310 err = tcp_connect(sk);
311
312 if (err)
313 goto failure;
314
315 return 0;
316
317failure:
318
319
320
321
322 tcp_set_state(sk, TCP_CLOSE);
323 ip_rt_put(rt);
324 sk->sk_route_caps = 0;
325 inet->inet_dport = 0;
326 return err;
327}
328EXPORT_SYMBOL(tcp_v4_connect);
329
330
331
332
333
334
335void tcp_v4_mtu_reduced(struct sock *sk)
336{
337 struct inet_sock *inet = inet_sk(sk);
338 struct dst_entry *dst;
339 u32 mtu;
340
341 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
342 return;
343 mtu = tcp_sk(sk)->mtu_info;
344 dst = inet_csk_update_pmtu(sk, mtu);
345 if (!dst)
346 return;
347
348
349
350
351 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
352 sk->sk_err_soft = EMSGSIZE;
353
354 mtu = dst_mtu(dst);
355
356 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
357 ip_sk_accept_pmtu(sk) &&
358 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
359 tcp_sync_mss(sk, mtu);
360
361
362
363
364
365
366 tcp_simple_retransmit(sk);
367 }
368}
369EXPORT_SYMBOL(tcp_v4_mtu_reduced);
370
371static void do_redirect(struct sk_buff *skb, struct sock *sk)
372{
373 struct dst_entry *dst = __sk_dst_check(sk, 0);
374
375 if (dst)
376 dst->ops->redirect(dst, sk, skb);
377}
378
379
380
381void tcp_req_err(struct sock *sk, u32 seq, bool abort)
382{
383 struct request_sock *req = inet_reqsk(sk);
384 struct net *net = sock_net(sk);
385
386
387
388
389 if (seq != tcp_rsk(req)->snt_isn) {
390 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
391 } else if (abort) {
392
393
394
395
396
397
398 inet_csk_reqsk_queue_drop(req->rsk_listener, req);
399 tcp_listendrop(req->rsk_listener);
400 }
401 reqsk_put(req);
402}
403EXPORT_SYMBOL(tcp_req_err);
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
422{
423 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
424 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
425 struct inet_connection_sock *icsk;
426 struct tcp_sock *tp;
427 struct inet_sock *inet;
428 const int type = icmp_hdr(icmp_skb)->type;
429 const int code = icmp_hdr(icmp_skb)->code;
430 struct sock *sk;
431 struct sk_buff *skb;
432 struct request_sock *fastopen;
433 u32 seq, snd_una;
434 s32 remaining;
435 u32 delta_us;
436 int err;
437 struct net *net = dev_net(icmp_skb->dev);
438
439 sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
440 th->dest, iph->saddr, ntohs(th->source),
441 inet_iif(icmp_skb), 0);
442 if (!sk) {
443 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
444 return -ENOENT;
445 }
446 if (sk->sk_state == TCP_TIME_WAIT) {
447 inet_twsk_put(inet_twsk(sk));
448 return 0;
449 }
450 seq = ntohl(th->seq);
451 if (sk->sk_state == TCP_NEW_SYN_RECV) {
452 tcp_req_err(sk, seq, type == ICMP_PARAMETERPROB ||
453 type == ICMP_TIME_EXCEEDED ||
454 (type == ICMP_DEST_UNREACH &&
455 (code == ICMP_NET_UNREACH ||
456 code == ICMP_HOST_UNREACH)));
457 return 0;
458 }
459
460 bh_lock_sock(sk);
461
462
463
464
465
466 if (sock_owned_by_user(sk)) {
467 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
468 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
469 }
470 if (sk->sk_state == TCP_CLOSE)
471 goto out;
472
473 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
474 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
475 goto out;
476 }
477
478 icsk = inet_csk(sk);
479 tp = tcp_sk(sk);
480
481 fastopen = tp->fastopen_rsk;
482 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
483 if (sk->sk_state != TCP_LISTEN &&
484 !between(seq, snd_una, tp->snd_nxt)) {
485 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
486 goto out;
487 }
488
489 switch (type) {
490 case ICMP_REDIRECT:
491 if (!sock_owned_by_user(sk))
492 do_redirect(icmp_skb, sk);
493 goto out;
494 case ICMP_SOURCE_QUENCH:
495
496 goto out;
497 case ICMP_PARAMETERPROB:
498 err = EPROTO;
499 break;
500 case ICMP_DEST_UNREACH:
501 if (code > NR_ICMP_UNREACH)
502 goto out;
503
504 if (code == ICMP_FRAG_NEEDED) {
505
506
507
508
509 if (sk->sk_state == TCP_LISTEN)
510 goto out;
511
512 tp->mtu_info = info;
513 if (!sock_owned_by_user(sk)) {
514 tcp_v4_mtu_reduced(sk);
515 } else {
516 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
517 sock_hold(sk);
518 }
519 goto out;
520 }
521
522 err = icmp_err_convert[code].errno;
523
524
525 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
526 break;
527 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
528 !icsk->icsk_backoff || fastopen)
529 break;
530
531 if (sock_owned_by_user(sk))
532 break;
533
534 skb = tcp_rtx_queue_head(sk);
535 if (WARN_ON_ONCE(!skb))
536 break;
537
538 icsk->icsk_backoff--;
539 icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
540 TCP_TIMEOUT_INIT;
541 icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
542
543
544 tcp_mstamp_refresh(tp);
545 delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb));
546 remaining = icsk->icsk_rto -
547 usecs_to_jiffies(delta_us);
548
549 if (remaining > 0) {
550 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
551 remaining, TCP_RTO_MAX);
552 } else {
553
554
555 tcp_retransmit_timer(sk);
556 }
557
558 break;
559 case ICMP_TIME_EXCEEDED:
560 err = EHOSTUNREACH;
561 break;
562 default:
563 goto out;
564 }
565
566 switch (sk->sk_state) {
567 case TCP_SYN_SENT:
568 case TCP_SYN_RECV:
569
570
571
572 if (fastopen && !fastopen->sk)
573 break;
574
575 if (!sock_owned_by_user(sk)) {
576 sk->sk_err = err;
577
578 sk->sk_error_report(sk);
579
580 tcp_done(sk);
581 } else {
582 sk->sk_err_soft = err;
583 }
584 goto out;
585 }
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603 inet = inet_sk(sk);
604 if (!sock_owned_by_user(sk) && inet->recverr) {
605 sk->sk_err = err;
606 sk->sk_error_report(sk);
607 } else {
608 sk->sk_err_soft = err;
609 }
610
611out:
612 bh_unlock_sock(sk);
613 sock_put(sk);
614 return 0;
615}
616
617void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
618{
619 struct tcphdr *th = tcp_hdr(skb);
620
621 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
622 skb->csum_start = skb_transport_header(skb) - skb->head;
623 skb->csum_offset = offsetof(struct tcphdr, check);
624}
625
626
627void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
628{
629 const struct inet_sock *inet = inet_sk(sk);
630
631 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
632}
633EXPORT_SYMBOL(tcp_v4_send_check);
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
649{
650 const struct tcphdr *th = tcp_hdr(skb);
651 struct {
652 struct tcphdr th;
653#ifdef CONFIG_TCP_MD5SIG
654 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
655#endif
656 } rep;
657 struct ip_reply_arg arg;
658#ifdef CONFIG_TCP_MD5SIG
659 struct tcp_md5sig_key *key = NULL;
660 const __u8 *hash_location = NULL;
661 unsigned char newhash[16];
662 int genhash;
663 struct sock *sk1 = NULL;
664#endif
665 u64 transmit_time = 0;
666 struct sock *ctl_sk;
667 struct net *net;
668
669
670 if (th->rst)
671 return;
672
673
674
675
676 if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
677 return;
678
679
680 memset(&rep, 0, sizeof(rep));
681 rep.th.dest = th->source;
682 rep.th.source = th->dest;
683 rep.th.doff = sizeof(struct tcphdr) / 4;
684 rep.th.rst = 1;
685
686 if (th->ack) {
687 rep.th.seq = th->ack_seq;
688 } else {
689 rep.th.ack = 1;
690 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
691 skb->len - (th->doff << 2));
692 }
693
694 memset(&arg, 0, sizeof(arg));
695 arg.iov[0].iov_base = (unsigned char *)&rep;
696 arg.iov[0].iov_len = sizeof(rep.th);
697
698 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
699#ifdef CONFIG_TCP_MD5SIG
700 rcu_read_lock();
701 hash_location = tcp_parse_md5sig_option(th);
702 if (sk && sk_fullsock(sk)) {
703 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
704 &ip_hdr(skb)->saddr, AF_INET);
705 } else if (hash_location) {
706
707
708
709
710
711
712
713 sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
714 ip_hdr(skb)->saddr,
715 th->source, ip_hdr(skb)->daddr,
716 ntohs(th->source), inet_iif(skb),
717 tcp_v4_sdif(skb));
718
719 if (!sk1)
720 goto out;
721
722 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
723 &ip_hdr(skb)->saddr, AF_INET);
724 if (!key)
725 goto out;
726
727
728 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
729 if (genhash || memcmp(hash_location, newhash, 16) != 0)
730 goto out;
731
732 }
733
734 if (key) {
735 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
736 (TCPOPT_NOP << 16) |
737 (TCPOPT_MD5SIG << 8) |
738 TCPOLEN_MD5SIG);
739
740 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
741 rep.th.doff = arg.iov[0].iov_len / 4;
742
743 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
744 key, ip_hdr(skb)->saddr,
745 ip_hdr(skb)->daddr, &rep.th);
746 }
747#endif
748 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
749 ip_hdr(skb)->saddr,
750 arg.iov[0].iov_len, IPPROTO_TCP, 0);
751 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
752 arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
753
754
755
756
757
758 if (sk) {
759 arg.bound_dev_if = sk->sk_bound_dev_if;
760 if (sk_fullsock(sk))
761 trace_tcp_send_reset(sk, skb);
762 }
763
764 BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
765 offsetof(struct inet_timewait_sock, tw_bound_dev_if));
766
767 arg.tos = ip_hdr(skb)->tos;
768 arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
769 local_bh_disable();
770 ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
771 if (sk) {
772 ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
773 inet_twsk(sk)->tw_mark : sk->sk_mark;
774 transmit_time = tcp_transmit_time(sk);
775 }
776 ip_send_unicast_reply(ctl_sk,
777 skb, &TCP_SKB_CB(skb)->header.h4.opt,
778 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
779 &arg, arg.iov[0].iov_len,
780 transmit_time);
781
782 ctl_sk->sk_mark = 0;
783 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
784 __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
785 local_bh_enable();
786
787#ifdef CONFIG_TCP_MD5SIG
788out:
789 rcu_read_unlock();
790#endif
791}
792
793
794
795
796
797static void tcp_v4_send_ack(const struct sock *sk,
798 struct sk_buff *skb, u32 seq, u32 ack,
799 u32 win, u32 tsval, u32 tsecr, int oif,
800 struct tcp_md5sig_key *key,
801 int reply_flags, u8 tos)
802{
803 const struct tcphdr *th = tcp_hdr(skb);
804 struct {
805 struct tcphdr th;
806 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
807#ifdef CONFIG_TCP_MD5SIG
808 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
809#endif
810 ];
811 } rep;
812 struct net *net = sock_net(sk);
813 struct ip_reply_arg arg;
814 struct sock *ctl_sk;
815 u64 transmit_time;
816
817 memset(&rep.th, 0, sizeof(struct tcphdr));
818 memset(&arg, 0, sizeof(arg));
819
820 arg.iov[0].iov_base = (unsigned char *)&rep;
821 arg.iov[0].iov_len = sizeof(rep.th);
822 if (tsecr) {
823 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
824 (TCPOPT_TIMESTAMP << 8) |
825 TCPOLEN_TIMESTAMP);
826 rep.opt[1] = htonl(tsval);
827 rep.opt[2] = htonl(tsecr);
828 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
829 }
830
831
832 rep.th.dest = th->source;
833 rep.th.source = th->dest;
834 rep.th.doff = arg.iov[0].iov_len / 4;
835 rep.th.seq = htonl(seq);
836 rep.th.ack_seq = htonl(ack);
837 rep.th.ack = 1;
838 rep.th.window = htons(win);
839
840#ifdef CONFIG_TCP_MD5SIG
841 if (key) {
842 int offset = (tsecr) ? 3 : 0;
843
844 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
845 (TCPOPT_NOP << 16) |
846 (TCPOPT_MD5SIG << 8) |
847 TCPOLEN_MD5SIG);
848 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
849 rep.th.doff = arg.iov[0].iov_len/4;
850
851 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
852 key, ip_hdr(skb)->saddr,
853 ip_hdr(skb)->daddr, &rep.th);
854 }
855#endif
856 arg.flags = reply_flags;
857 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
858 ip_hdr(skb)->saddr,
859 arg.iov[0].iov_len, IPPROTO_TCP, 0);
860 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
861 if (oif)
862 arg.bound_dev_if = oif;
863 arg.tos = tos;
864 arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
865 local_bh_disable();
866 ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
867 ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
868 inet_twsk(sk)->tw_mark : sk->sk_mark;
869 transmit_time = tcp_transmit_time(sk);
870 ip_send_unicast_reply(ctl_sk,
871 skb, &TCP_SKB_CB(skb)->header.h4.opt,
872 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
873 &arg, arg.iov[0].iov_len,
874 transmit_time);
875
876 ctl_sk->sk_mark = 0;
877 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
878 local_bh_enable();
879}
880
881static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
882{
883 struct inet_timewait_sock *tw = inet_twsk(sk);
884 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
885
886 tcp_v4_send_ack(sk, skb,
887 tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
888 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
889 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
890 tcptw->tw_ts_recent,
891 tw->tw_bound_dev_if,
892 tcp_twsk_md5_key(tcptw),
893 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
894 tw->tw_tos
895 );
896
897 inet_twsk_put(tw);
898}
899
900static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
901 struct request_sock *req)
902{
903
904
905
906 u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
907 tcp_sk(sk)->snd_nxt;
908
909
910
911
912
913
914 tcp_v4_send_ack(sk, skb, seq,
915 tcp_rsk(req)->rcv_nxt,
916 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
917 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
918 req->ts_recent,
919 0,
920 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
921 AF_INET),
922 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
923 ip_hdr(skb)->tos);
924}
925
926
927
928
929
930
931static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
932 struct flowi *fl,
933 struct request_sock *req,
934 struct tcp_fastopen_cookie *foc,
935 enum tcp_synack_type synack_type)
936{
937 const struct inet_request_sock *ireq = inet_rsk(req);
938 struct flowi4 fl4;
939 int err = -1;
940 struct sk_buff *skb;
941
942
943 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
944 return -1;
945
946 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
947
948 if (skb) {
949 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
950
951 rcu_read_lock();
952 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
953 ireq->ir_rmt_addr,
954 rcu_dereference(ireq->ireq_opt));
955 rcu_read_unlock();
956 err = net_xmit_eval(err);
957 }
958
959 return err;
960}
961
962
963
964
965static void tcp_v4_reqsk_destructor(struct request_sock *req)
966{
967 kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
968}
969
970#ifdef CONFIG_TCP_MD5SIG
971
972
973
974
975
976
977DEFINE_STATIC_KEY_FALSE(tcp_md5_needed);
978EXPORT_SYMBOL(tcp_md5_needed);
979
980
981struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk,
982 const union tcp_md5_addr *addr,
983 int family)
984{
985 const struct tcp_sock *tp = tcp_sk(sk);
986 struct tcp_md5sig_key *key;
987 const struct tcp_md5sig_info *md5sig;
988 __be32 mask;
989 struct tcp_md5sig_key *best_match = NULL;
990 bool match;
991
992
993 md5sig = rcu_dereference_check(tp->md5sig_info,
994 lockdep_sock_is_held(sk));
995 if (!md5sig)
996 return NULL;
997
998 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
999 if (key->family != family)
1000 continue;
1001
1002 if (family == AF_INET) {
1003 mask = inet_make_mask(key->prefixlen);
1004 match = (key->addr.a4.s_addr & mask) ==
1005 (addr->a4.s_addr & mask);
1006#if IS_ENABLED(CONFIG_IPV6)
1007 } else if (family == AF_INET6) {
1008 match = ipv6_prefix_equal(&key->addr.a6, &addr->a6,
1009 key->prefixlen);
1010#endif
1011 } else {
1012 match = false;
1013 }
1014
1015 if (match && (!best_match ||
1016 key->prefixlen > best_match->prefixlen))
1017 best_match = key;
1018 }
1019 return best_match;
1020}
1021EXPORT_SYMBOL(__tcp_md5_do_lookup);
1022
1023static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
1024 const union tcp_md5_addr *addr,
1025 int family, u8 prefixlen)
1026{
1027 const struct tcp_sock *tp = tcp_sk(sk);
1028 struct tcp_md5sig_key *key;
1029 unsigned int size = sizeof(struct in_addr);
1030 const struct tcp_md5sig_info *md5sig;
1031
1032
1033 md5sig = rcu_dereference_check(tp->md5sig_info,
1034 lockdep_sock_is_held(sk));
1035 if (!md5sig)
1036 return NULL;
1037#if IS_ENABLED(CONFIG_IPV6)
1038 if (family == AF_INET6)
1039 size = sizeof(struct in6_addr);
1040#endif
1041 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
1042 if (key->family != family)
1043 continue;
1044 if (!memcmp(&key->addr, addr, size) &&
1045 key->prefixlen == prefixlen)
1046 return key;
1047 }
1048 return NULL;
1049}
1050
1051struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
1052 const struct sock *addr_sk)
1053{
1054 const union tcp_md5_addr *addr;
1055
1056 addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
1057 return tcp_md5_do_lookup(sk, addr, AF_INET);
1058}
1059EXPORT_SYMBOL(tcp_v4_md5_lookup);
1060
1061
1062int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
1063 int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
1064 gfp_t gfp)
1065{
1066
1067 struct tcp_md5sig_key *key;
1068 struct tcp_sock *tp = tcp_sk(sk);
1069 struct tcp_md5sig_info *md5sig;
1070
1071 key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
1072 if (key) {
1073
1074 memcpy(key->key, newkey, newkeylen);
1075 key->keylen = newkeylen;
1076 return 0;
1077 }
1078
1079 md5sig = rcu_dereference_protected(tp->md5sig_info,
1080 lockdep_sock_is_held(sk));
1081 if (!md5sig) {
1082 md5sig = kmalloc(sizeof(*md5sig), gfp);
1083 if (!md5sig)
1084 return -ENOMEM;
1085
1086 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1087 INIT_HLIST_HEAD(&md5sig->head);
1088 rcu_assign_pointer(tp->md5sig_info, md5sig);
1089 }
1090
1091 key = sock_kmalloc(sk, sizeof(*key), gfp);
1092 if (!key)
1093 return -ENOMEM;
1094 if (!tcp_alloc_md5sig_pool()) {
1095 sock_kfree_s(sk, key, sizeof(*key));
1096 return -ENOMEM;
1097 }
1098
1099 memcpy(key->key, newkey, newkeylen);
1100 key->keylen = newkeylen;
1101 key->family = family;
1102 key->prefixlen = prefixlen;
1103 memcpy(&key->addr, addr,
1104 (family == AF_INET6) ? sizeof(struct in6_addr) :
1105 sizeof(struct in_addr));
1106 hlist_add_head_rcu(&key->node, &md5sig->head);
1107 return 0;
1108}
1109EXPORT_SYMBOL(tcp_md5_do_add);
1110
1111int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
1112 u8 prefixlen)
1113{
1114 struct tcp_md5sig_key *key;
1115
1116 key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
1117 if (!key)
1118 return -ENOENT;
1119 hlist_del_rcu(&key->node);
1120 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1121 kfree_rcu(key, rcu);
1122 return 0;
1123}
1124EXPORT_SYMBOL(tcp_md5_do_del);
1125
1126static void tcp_clear_md5_list(struct sock *sk)
1127{
1128 struct tcp_sock *tp = tcp_sk(sk);
1129 struct tcp_md5sig_key *key;
1130 struct hlist_node *n;
1131 struct tcp_md5sig_info *md5sig;
1132
1133 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1134
1135 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
1136 hlist_del_rcu(&key->node);
1137 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1138 kfree_rcu(key, rcu);
1139 }
1140}
1141
1142static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
1143 char __user *optval, int optlen)
1144{
1145 struct tcp_md5sig cmd;
1146 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1147 u8 prefixlen = 32;
1148
1149 if (optlen < sizeof(cmd))
1150 return -EINVAL;
1151
1152 if (copy_from_user(&cmd, optval, sizeof(cmd)))
1153 return -EFAULT;
1154
1155 if (sin->sin_family != AF_INET)
1156 return -EINVAL;
1157
1158 if (optname == TCP_MD5SIG_EXT &&
1159 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
1160 prefixlen = cmd.tcpm_prefixlen;
1161 if (prefixlen > 32)
1162 return -EINVAL;
1163 }
1164
1165 if (!cmd.tcpm_keylen)
1166 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1167 AF_INET, prefixlen);
1168
1169 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1170 return -EINVAL;
1171
1172 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1173 AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen,
1174 GFP_KERNEL);
1175}
1176
1177static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
1178 __be32 daddr, __be32 saddr,
1179 const struct tcphdr *th, int nbytes)
1180{
1181 struct tcp4_pseudohdr *bp;
1182 struct scatterlist sg;
1183 struct tcphdr *_th;
1184
1185 bp = hp->scratch;
1186 bp->saddr = saddr;
1187 bp->daddr = daddr;
1188 bp->pad = 0;
1189 bp->protocol = IPPROTO_TCP;
1190 bp->len = cpu_to_be16(nbytes);
1191
1192 _th = (struct tcphdr *)(bp + 1);
1193 memcpy(_th, th, sizeof(*th));
1194 _th->check = 0;
1195
1196 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
1197 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
1198 sizeof(*bp) + sizeof(*th));
1199 return crypto_ahash_update(hp->md5_req);
1200}
1201
1202static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1203 __be32 daddr, __be32 saddr, const struct tcphdr *th)
1204{
1205 struct tcp_md5sig_pool *hp;
1206 struct ahash_request *req;
1207
1208 hp = tcp_get_md5sig_pool();
1209 if (!hp)
1210 goto clear_hash_noput;
1211 req = hp->md5_req;
1212
1213 if (crypto_ahash_init(req))
1214 goto clear_hash;
1215 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
1216 goto clear_hash;
1217 if (tcp_md5_hash_key(hp, key))
1218 goto clear_hash;
1219 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1220 if (crypto_ahash_final(req))
1221 goto clear_hash;
1222
1223 tcp_put_md5sig_pool();
1224 return 0;
1225
1226clear_hash:
1227 tcp_put_md5sig_pool();
1228clear_hash_noput:
1229 memset(md5_hash, 0, 16);
1230 return 1;
1231}
1232
1233int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1234 const struct sock *sk,
1235 const struct sk_buff *skb)
1236{
1237 struct tcp_md5sig_pool *hp;
1238 struct ahash_request *req;
1239 const struct tcphdr *th = tcp_hdr(skb);
1240 __be32 saddr, daddr;
1241
1242 if (sk) {
1243 saddr = sk->sk_rcv_saddr;
1244 daddr = sk->sk_daddr;
1245 } else {
1246 const struct iphdr *iph = ip_hdr(skb);
1247 saddr = iph->saddr;
1248 daddr = iph->daddr;
1249 }
1250
1251 hp = tcp_get_md5sig_pool();
1252 if (!hp)
1253 goto clear_hash_noput;
1254 req = hp->md5_req;
1255
1256 if (crypto_ahash_init(req))
1257 goto clear_hash;
1258
1259 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
1260 goto clear_hash;
1261 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1262 goto clear_hash;
1263 if (tcp_md5_hash_key(hp, key))
1264 goto clear_hash;
1265 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1266 if (crypto_ahash_final(req))
1267 goto clear_hash;
1268
1269 tcp_put_md5sig_pool();
1270 return 0;
1271
1272clear_hash:
1273 tcp_put_md5sig_pool();
1274clear_hash_noput:
1275 memset(md5_hash, 0, 16);
1276 return 1;
1277}
1278EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1279
1280#endif
1281
1282
1283static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
1284 const struct sk_buff *skb)
1285{
1286#ifdef CONFIG_TCP_MD5SIG
1287
1288
1289
1290
1291
1292
1293
1294
1295 const __u8 *hash_location = NULL;
1296 struct tcp_md5sig_key *hash_expected;
1297 const struct iphdr *iph = ip_hdr(skb);
1298 const struct tcphdr *th = tcp_hdr(skb);
1299 int genhash;
1300 unsigned char newhash[16];
1301
1302 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1303 AF_INET);
1304 hash_location = tcp_parse_md5sig_option(th);
1305
1306
1307 if (!hash_expected && !hash_location)
1308 return false;
1309
1310 if (hash_expected && !hash_location) {
1311 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1312 return true;
1313 }
1314
1315 if (!hash_expected && hash_location) {
1316 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1317 return true;
1318 }
1319
1320
1321
1322
1323 genhash = tcp_v4_md5_hash_skb(newhash,
1324 hash_expected,
1325 NULL, skb);
1326
1327 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1328 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
1329 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1330 &iph->saddr, ntohs(th->source),
1331 &iph->daddr, ntohs(th->dest),
1332 genhash ? " tcp_v4_calc_md5_hash failed"
1333 : "");
1334 return true;
1335 }
1336 return false;
1337#endif
1338 return false;
1339}
1340
1341static void tcp_v4_init_req(struct request_sock *req,
1342 const struct sock *sk_listener,
1343 struct sk_buff *skb)
1344{
1345 struct inet_request_sock *ireq = inet_rsk(req);
1346 struct net *net = sock_net(sk_listener);
1347
1348 sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
1349 sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
1350 RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
1351}
1352
1353static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
1354 struct flowi *fl,
1355 const struct request_sock *req)
1356{
1357 return inet_csk_route_req(sk, &fl->u.ip4, req);
1358}
1359
1360struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1361 .family = PF_INET,
1362 .obj_size = sizeof(struct tcp_request_sock),
1363 .rtx_syn_ack = tcp_rtx_synack,
1364 .send_ack = tcp_v4_reqsk_send_ack,
1365 .destructor = tcp_v4_reqsk_destructor,
1366 .send_reset = tcp_v4_send_reset,
1367 .syn_ack_timeout = tcp_syn_ack_timeout,
1368};
1369
1370static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1371 .mss_clamp = TCP_MSS_DEFAULT,
1372#ifdef CONFIG_TCP_MD5SIG
1373 .req_md5_lookup = tcp_v4_md5_lookup,
1374 .calc_md5_hash = tcp_v4_md5_hash_skb,
1375#endif
1376 .init_req = tcp_v4_init_req,
1377#ifdef CONFIG_SYN_COOKIES
1378 .cookie_init_seq = cookie_v4_init_sequence,
1379#endif
1380 .route_req = tcp_v4_route_req,
1381 .init_seq = tcp_v4_init_seq,
1382 .init_ts_off = tcp_v4_init_ts_off,
1383 .send_synack = tcp_v4_send_synack,
1384};
1385
1386int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1387{
1388
1389 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1390 goto drop;
1391
1392 return tcp_conn_request(&tcp_request_sock_ops,
1393 &tcp_request_sock_ipv4_ops, sk, skb);
1394
1395drop:
1396 tcp_listendrop(sk);
1397 return 0;
1398}
1399EXPORT_SYMBOL(tcp_v4_conn_request);
1400
1401
1402
1403
1404
1405
1406struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1407 struct request_sock *req,
1408 struct dst_entry *dst,
1409 struct request_sock *req_unhash,
1410 bool *own_req)
1411{
1412 struct inet_request_sock *ireq;
1413 struct inet_sock *newinet;
1414 struct tcp_sock *newtp;
1415 struct sock *newsk;
1416#ifdef CONFIG_TCP_MD5SIG
1417 struct tcp_md5sig_key *key;
1418#endif
1419 struct ip_options_rcu *inet_opt;
1420
1421 if (sk_acceptq_is_full(sk))
1422 goto exit_overflow;
1423
1424 newsk = tcp_create_openreq_child(sk, req, skb);
1425 if (!newsk)
1426 goto exit_nonewsk;
1427
1428 newsk->sk_gso_type = SKB_GSO_TCPV4;
1429 inet_sk_rx_dst_set(newsk, skb);
1430
1431 newtp = tcp_sk(newsk);
1432 newinet = inet_sk(newsk);
1433 ireq = inet_rsk(req);
1434 sk_daddr_set(newsk, ireq->ir_rmt_addr);
1435 sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
1436 newsk->sk_bound_dev_if = ireq->ir_iif;
1437 newinet->inet_saddr = ireq->ir_loc_addr;
1438 inet_opt = rcu_dereference(ireq->ireq_opt);
1439 RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
1440 newinet->mc_index = inet_iif(skb);
1441 newinet->mc_ttl = ip_hdr(skb)->ttl;
1442 newinet->rcv_tos = ip_hdr(skb)->tos;
1443 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1444 if (inet_opt)
1445 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1446 newinet->inet_id = newtp->write_seq ^ jiffies;
1447
1448 if (!dst) {
1449 dst = inet_csk_route_child_sock(sk, newsk, req);
1450 if (!dst)
1451 goto put_and_exit;
1452 } else {
1453
1454 }
1455 sk_setup_caps(newsk, dst);
1456
1457 tcp_ca_openreq_child(newsk, dst);
1458
1459 tcp_sync_mss(newsk, dst_mtu(dst));
1460 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1461
1462 tcp_initialize_rcv_mss(newsk);
1463
1464#ifdef CONFIG_TCP_MD5SIG
1465
1466 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1467 AF_INET);
1468 if (key) {
1469
1470
1471
1472
1473
1474
1475 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1476 AF_INET, 32, key->key, key->keylen, GFP_ATOMIC);
1477 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1478 }
1479#endif
1480
1481 if (__inet_inherit_port(sk, newsk) < 0)
1482 goto put_and_exit;
1483 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1484 if (likely(*own_req)) {
1485 tcp_move_syn(newtp, req);
1486 ireq->ireq_opt = NULL;
1487 } else {
1488 newinet->inet_opt = NULL;
1489 }
1490 return newsk;
1491
1492exit_overflow:
1493 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1494exit_nonewsk:
1495 dst_release(dst);
1496exit:
1497 tcp_listendrop(sk);
1498 return NULL;
1499put_and_exit:
1500 newinet->inet_opt = NULL;
1501 inet_csk_prepare_forced_close(newsk);
1502 tcp_done(newsk);
1503 goto exit;
1504}
1505EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1506
1507static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
1508{
1509#ifdef CONFIG_SYN_COOKIES
1510 const struct tcphdr *th = tcp_hdr(skb);
1511
1512 if (!th->syn)
1513 sk = cookie_v4_check(sk, skb);
1514#endif
1515 return sk;
1516}
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1527{
1528 struct sock *rsk;
1529
1530 if (sk->sk_state == TCP_ESTABLISHED) {
1531 struct dst_entry *dst = sk->sk_rx_dst;
1532
1533 sock_rps_save_rxhash(sk, skb);
1534 sk_mark_napi_id(sk, skb);
1535 if (dst) {
1536 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1537 !dst->ops->check(dst, 0)) {
1538 dst_release(dst);
1539 sk->sk_rx_dst = NULL;
1540 }
1541 }
1542 tcp_rcv_established(sk, skb);
1543 return 0;
1544 }
1545
1546 if (tcp_checksum_complete(skb))
1547 goto csum_err;
1548
1549 if (sk->sk_state == TCP_LISTEN) {
1550 struct sock *nsk = tcp_v4_cookie_check(sk, skb);
1551
1552 if (!nsk)
1553 goto discard;
1554 if (nsk != sk) {
1555 if (tcp_child_process(sk, nsk, skb)) {
1556 rsk = nsk;
1557 goto reset;
1558 }
1559 return 0;
1560 }
1561 } else
1562 sock_rps_save_rxhash(sk, skb);
1563
1564 if (tcp_rcv_state_process(sk, skb)) {
1565 rsk = sk;
1566 goto reset;
1567 }
1568 return 0;
1569
1570reset:
1571 tcp_v4_send_reset(rsk, skb);
1572discard:
1573 kfree_skb(skb);
1574
1575
1576
1577
1578
1579 return 0;
1580
1581csum_err:
1582 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1583 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1584 goto discard;
1585}
1586EXPORT_SYMBOL(tcp_v4_do_rcv);
1587
1588int tcp_v4_early_demux(struct sk_buff *skb)
1589{
1590 const struct iphdr *iph;
1591 const struct tcphdr *th;
1592 struct sock *sk;
1593
1594 if (skb->pkt_type != PACKET_HOST)
1595 return 0;
1596
1597 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1598 return 0;
1599
1600 iph = ip_hdr(skb);
1601 th = tcp_hdr(skb);
1602
1603 if (th->doff < sizeof(struct tcphdr) / 4)
1604 return 0;
1605
1606 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1607 iph->saddr, th->source,
1608 iph->daddr, ntohs(th->dest),
1609 skb->skb_iif, inet_sdif(skb));
1610 if (sk) {
1611 skb->sk = sk;
1612 skb->destructor = sock_edemux;
1613 if (sk_fullsock(sk)) {
1614 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1615
1616 if (dst)
1617 dst = dst_check(dst, 0);
1618 if (dst &&
1619 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1620 skb_dst_set_noref(skb, dst);
1621 }
1622 }
1623 return 0;
1624}
1625
1626bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
1627{
1628 u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
1629 struct skb_shared_info *shinfo;
1630 const struct tcphdr *th;
1631 struct tcphdr *thtail;
1632 struct sk_buff *tail;
1633 unsigned int hdrlen;
1634 bool fragstolen;
1635 u32 gso_segs;
1636 int delta;
1637
1638
1639
1640
1641
1642
1643
1644 skb_condense(skb);
1645
1646 skb_dst_drop(skb);
1647
1648 if (unlikely(tcp_checksum_complete(skb))) {
1649 bh_unlock_sock(sk);
1650 __TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1651 __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1652 return true;
1653 }
1654
1655
1656
1657
1658
1659 th = (const struct tcphdr *)skb->data;
1660 hdrlen = th->doff * 4;
1661 shinfo = skb_shinfo(skb);
1662
1663 if (!shinfo->gso_size)
1664 shinfo->gso_size = skb->len - hdrlen;
1665
1666 if (!shinfo->gso_segs)
1667 shinfo->gso_segs = 1;
1668
1669 tail = sk->sk_backlog.tail;
1670 if (!tail)
1671 goto no_coalesce;
1672 thtail = (struct tcphdr *)tail->data;
1673
1674 if (TCP_SKB_CB(tail)->end_seq != TCP_SKB_CB(skb)->seq ||
1675 TCP_SKB_CB(tail)->ip_dsfield != TCP_SKB_CB(skb)->ip_dsfield ||
1676 ((TCP_SKB_CB(tail)->tcp_flags |
1677 TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_SYN | TCPHDR_RST | TCPHDR_URG)) ||
1678 !((TCP_SKB_CB(tail)->tcp_flags &
1679 TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) ||
1680 ((TCP_SKB_CB(tail)->tcp_flags ^
1681 TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) ||
1682#ifdef CONFIG_TLS_DEVICE
1683 tail->decrypted != skb->decrypted ||
1684#endif
1685 thtail->doff != th->doff ||
1686 memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th)))
1687 goto no_coalesce;
1688
1689 __skb_pull(skb, hdrlen);
1690 if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) {
1691 thtail->window = th->window;
1692
1693 TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq;
1694
1695 if (after(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq))
1696 TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706 thtail->fin |= th->fin;
1707 TCP_SKB_CB(tail)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1708
1709 if (TCP_SKB_CB(skb)->has_rxtstamp) {
1710 TCP_SKB_CB(tail)->has_rxtstamp = true;
1711 tail->tstamp = skb->tstamp;
1712 skb_hwtstamps(tail)->hwtstamp = skb_hwtstamps(skb)->hwtstamp;
1713 }
1714
1715
1716 skb_shinfo(tail)->gso_size = max(shinfo->gso_size,
1717 skb_shinfo(tail)->gso_size);
1718
1719 gso_segs = skb_shinfo(tail)->gso_segs + shinfo->gso_segs;
1720 skb_shinfo(tail)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
1721
1722 sk->sk_backlog.len += delta;
1723 __NET_INC_STATS(sock_net(sk),
1724 LINUX_MIB_TCPBACKLOGCOALESCE);
1725 kfree_skb_partial(skb, fragstolen);
1726 return false;
1727 }
1728 __skb_push(skb, hdrlen);
1729
1730no_coalesce:
1731
1732
1733
1734
1735 limit += 64*1024;
1736
1737 if (unlikely(sk_add_backlog(sk, skb, limit))) {
1738 bh_unlock_sock(sk);
1739 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
1740 return true;
1741 }
1742 return false;
1743}
1744EXPORT_SYMBOL(tcp_add_backlog);
1745
1746int tcp_filter(struct sock *sk, struct sk_buff *skb)
1747{
1748 struct tcphdr *th = (struct tcphdr *)skb->data;
1749
1750 return sk_filter_trim_cap(sk, skb, th->doff * 4);
1751}
1752EXPORT_SYMBOL(tcp_filter);
1753
1754static void tcp_v4_restore_cb(struct sk_buff *skb)
1755{
1756 memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4,
1757 sizeof(struct inet_skb_parm));
1758}
1759
1760static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
1761 const struct tcphdr *th)
1762{
1763
1764
1765
1766 memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1767 sizeof(struct inet_skb_parm));
1768 barrier();
1769
1770 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1771 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1772 skb->len - th->doff * 4);
1773 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1774 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1775 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1776 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1777 TCP_SKB_CB(skb)->sacked = 0;
1778 TCP_SKB_CB(skb)->has_rxtstamp =
1779 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1780}
1781
1782
1783
1784
1785
1786int tcp_v4_rcv(struct sk_buff *skb)
1787{
1788 struct net *net = dev_net(skb->dev);
1789 struct sk_buff *skb_to_free;
1790 int sdif = inet_sdif(skb);
1791 const struct iphdr *iph;
1792 const struct tcphdr *th;
1793 bool refcounted;
1794 struct sock *sk;
1795 int ret;
1796
1797 if (skb->pkt_type != PACKET_HOST)
1798 goto discard_it;
1799
1800
1801 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1802
1803 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1804 goto discard_it;
1805
1806 th = (const struct tcphdr *)skb->data;
1807
1808 if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
1809 goto bad_packet;
1810 if (!pskb_may_pull(skb, th->doff * 4))
1811 goto discard_it;
1812
1813
1814
1815
1816
1817
1818 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
1819 goto csum_error;
1820
1821 th = (const struct tcphdr *)skb->data;
1822 iph = ip_hdr(skb);
1823lookup:
1824 sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
1825 th->dest, sdif, &refcounted);
1826 if (!sk)
1827 goto no_tcp_socket;
1828
1829process:
1830 if (sk->sk_state == TCP_TIME_WAIT)
1831 goto do_time_wait;
1832
1833 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1834 struct request_sock *req = inet_reqsk(sk);
1835 bool req_stolen = false;
1836 struct sock *nsk;
1837
1838 sk = req->rsk_listener;
1839 if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
1840 sk_drops_add(sk, skb);
1841 reqsk_put(req);
1842 goto discard_it;
1843 }
1844 if (tcp_checksum_complete(skb)) {
1845 reqsk_put(req);
1846 goto csum_error;
1847 }
1848 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1849 inet_csk_reqsk_queue_drop_and_put(sk, req);
1850 goto lookup;
1851 }
1852
1853
1854
1855 sock_hold(sk);
1856 refcounted = true;
1857 nsk = NULL;
1858 if (!tcp_filter(sk, skb)) {
1859 th = (const struct tcphdr *)skb->data;
1860 iph = ip_hdr(skb);
1861 tcp_v4_fill_cb(skb, iph, th);
1862 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1863 }
1864 if (!nsk) {
1865 reqsk_put(req);
1866 if (req_stolen) {
1867
1868
1869
1870
1871
1872 tcp_v4_restore_cb(skb);
1873 sock_put(sk);
1874 goto lookup;
1875 }
1876 goto discard_and_relse;
1877 }
1878 if (nsk == sk) {
1879 reqsk_put(req);
1880 tcp_v4_restore_cb(skb);
1881 } else if (tcp_child_process(sk, nsk, skb)) {
1882 tcp_v4_send_reset(nsk, skb);
1883 goto discard_and_relse;
1884 } else {
1885 sock_put(sk);
1886 return 0;
1887 }
1888 }
1889 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1890 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1891 goto discard_and_relse;
1892 }
1893
1894 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1895 goto discard_and_relse;
1896
1897 if (tcp_v4_inbound_md5_hash(sk, skb))
1898 goto discard_and_relse;
1899
1900 nf_reset(skb);
1901
1902 if (tcp_filter(sk, skb))
1903 goto discard_and_relse;
1904 th = (const struct tcphdr *)skb->data;
1905 iph = ip_hdr(skb);
1906 tcp_v4_fill_cb(skb, iph, th);
1907
1908 skb->dev = NULL;
1909
1910 if (sk->sk_state == TCP_LISTEN) {
1911 ret = tcp_v4_do_rcv(sk, skb);
1912 goto put_and_return;
1913 }
1914
1915 sk_incoming_cpu_update(sk);
1916
1917 bh_lock_sock_nested(sk);
1918 tcp_segs_in(tcp_sk(sk), skb);
1919 ret = 0;
1920 if (!sock_owned_by_user(sk)) {
1921 skb_to_free = sk->sk_rx_skb_cache;
1922 sk->sk_rx_skb_cache = NULL;
1923 ret = tcp_v4_do_rcv(sk, skb);
1924 } else {
1925 if (tcp_add_backlog(sk, skb))
1926 goto discard_and_relse;
1927 skb_to_free = NULL;
1928 }
1929 bh_unlock_sock(sk);
1930 if (skb_to_free)
1931 __kfree_skb(skb_to_free);
1932
1933put_and_return:
1934 if (refcounted)
1935 sock_put(sk);
1936
1937 return ret;
1938
1939no_tcp_socket:
1940 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1941 goto discard_it;
1942
1943 tcp_v4_fill_cb(skb, iph, th);
1944
1945 if (tcp_checksum_complete(skb)) {
1946csum_error:
1947 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1948bad_packet:
1949 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1950 } else {
1951 tcp_v4_send_reset(NULL, skb);
1952 }
1953
1954discard_it:
1955
1956 kfree_skb(skb);
1957 return 0;
1958
1959discard_and_relse:
1960 sk_drops_add(sk, skb);
1961 if (refcounted)
1962 sock_put(sk);
1963 goto discard_it;
1964
1965do_time_wait:
1966 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1967 inet_twsk_put(inet_twsk(sk));
1968 goto discard_it;
1969 }
1970
1971 tcp_v4_fill_cb(skb, iph, th);
1972
1973 if (tcp_checksum_complete(skb)) {
1974 inet_twsk_put(inet_twsk(sk));
1975 goto csum_error;
1976 }
1977 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1978 case TCP_TW_SYN: {
1979 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1980 &tcp_hashinfo, skb,
1981 __tcp_hdrlen(th),
1982 iph->saddr, th->source,
1983 iph->daddr, th->dest,
1984 inet_iif(skb),
1985 sdif);
1986 if (sk2) {
1987 inet_twsk_deschedule_put(inet_twsk(sk));
1988 sk = sk2;
1989 tcp_v4_restore_cb(skb);
1990 refcounted = false;
1991 goto process;
1992 }
1993 }
1994
1995
1996 case TCP_TW_ACK:
1997 tcp_v4_timewait_ack(sk, skb);
1998 break;
1999 case TCP_TW_RST:
2000 tcp_v4_send_reset(sk, skb);
2001 inet_twsk_deschedule_put(inet_twsk(sk));
2002 goto discard_it;
2003 case TCP_TW_SUCCESS:;
2004 }
2005 goto discard_it;
2006}
2007
2008static struct timewait_sock_ops tcp_timewait_sock_ops = {
2009 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
2010 .twsk_unique = tcp_twsk_unique,
2011 .twsk_destructor= tcp_twsk_destructor,
2012};
2013
2014void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
2015{
2016 struct dst_entry *dst = skb_dst(skb);
2017
2018 if (dst && dst_hold_safe(dst)) {
2019 sk->sk_rx_dst = dst;
2020 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
2021 }
2022}
2023EXPORT_SYMBOL(inet_sk_rx_dst_set);
2024
2025const struct inet_connection_sock_af_ops ipv4_specific = {
2026 .queue_xmit = ip_queue_xmit,
2027 .send_check = tcp_v4_send_check,
2028 .rebuild_header = inet_sk_rebuild_header,
2029 .sk_rx_dst_set = inet_sk_rx_dst_set,
2030 .conn_request = tcp_v4_conn_request,
2031 .syn_recv_sock = tcp_v4_syn_recv_sock,
2032 .net_header_len = sizeof(struct iphdr),
2033 .setsockopt = ip_setsockopt,
2034 .getsockopt = ip_getsockopt,
2035 .addr2sockaddr = inet_csk_addr2sockaddr,
2036 .sockaddr_len = sizeof(struct sockaddr_in),
2037#ifdef CONFIG_COMPAT
2038 .compat_setsockopt = compat_ip_setsockopt,
2039 .compat_getsockopt = compat_ip_getsockopt,
2040#endif
2041 .mtu_reduced = tcp_v4_mtu_reduced,
2042};
2043EXPORT_SYMBOL(ipv4_specific);
2044
2045#ifdef CONFIG_TCP_MD5SIG
2046static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
2047 .md5_lookup = tcp_v4_md5_lookup,
2048 .calc_md5_hash = tcp_v4_md5_hash_skb,
2049 .md5_parse = tcp_v4_parse_md5_keys,
2050};
2051#endif
2052
2053
2054
2055
2056static int tcp_v4_init_sock(struct sock *sk)
2057{
2058 struct inet_connection_sock *icsk = inet_csk(sk);
2059
2060 tcp_init_sock(sk);
2061
2062 icsk->icsk_af_ops = &ipv4_specific;
2063
2064#ifdef CONFIG_TCP_MD5SIG
2065 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
2066#endif
2067
2068 return 0;
2069}
2070
2071void tcp_v4_destroy_sock(struct sock *sk)
2072{
2073 struct tcp_sock *tp = tcp_sk(sk);
2074
2075 trace_tcp_destroy_sock(sk);
2076
2077 tcp_clear_xmit_timers(sk);
2078
2079 tcp_cleanup_congestion_control(sk);
2080
2081 tcp_cleanup_ulp(sk);
2082
2083
2084 tcp_write_queue_purge(sk);
2085
2086
2087 tcp_fastopen_active_disable_ofo_check(sk);
2088
2089
2090 skb_rbtree_purge(&tp->out_of_order_queue);
2091
2092#ifdef CONFIG_TCP_MD5SIG
2093
2094 if (tp->md5sig_info) {
2095 tcp_clear_md5_list(sk);
2096 kfree_rcu(rcu_dereference_protected(tp->md5sig_info, 1), rcu);
2097 tp->md5sig_info = NULL;
2098 }
2099#endif
2100
2101
2102 if (inet_csk(sk)->icsk_bind_hash)
2103 inet_put_port(sk);
2104
2105 BUG_ON(tp->fastopen_rsk);
2106
2107
2108 tcp_free_fastopen_req(tp);
2109 tcp_fastopen_destroy_cipher(sk);
2110 tcp_saved_syn_free(tp);
2111
2112 sk_sockets_allocated_dec(sk);
2113}
2114EXPORT_SYMBOL(tcp_v4_destroy_sock);
2115
2116#ifdef CONFIG_PROC_FS
2117
2118
2119
2120
2121
2122
2123
2124static void *listening_get_next(struct seq_file *seq, void *cur)
2125{
2126 struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
2127 struct tcp_iter_state *st = seq->private;
2128 struct net *net = seq_file_net(seq);
2129 struct inet_listen_hashbucket *ilb;
2130 struct sock *sk = cur;
2131
2132 if (!sk) {
2133get_head:
2134 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2135 spin_lock(&ilb->lock);
2136 sk = sk_head(&ilb->head);
2137 st->offset = 0;
2138 goto get_sk;
2139 }
2140 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2141 ++st->num;
2142 ++st->offset;
2143
2144 sk = sk_next(sk);
2145get_sk:
2146 sk_for_each_from(sk) {
2147 if (!net_eq(sock_net(sk), net))
2148 continue;
2149 if (sk->sk_family == afinfo->family)
2150 return sk;
2151 }
2152 spin_unlock(&ilb->lock);
2153 st->offset = 0;
2154 if (++st->bucket < INET_LHTABLE_SIZE)
2155 goto get_head;
2156 return NULL;
2157}
2158
2159static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2160{
2161 struct tcp_iter_state *st = seq->private;
2162 void *rc;
2163
2164 st->bucket = 0;
2165 st->offset = 0;
2166 rc = listening_get_next(seq, NULL);
2167
2168 while (rc && *pos) {
2169 rc = listening_get_next(seq, rc);
2170 --*pos;
2171 }
2172 return rc;
2173}
2174
2175static inline bool empty_bucket(const struct tcp_iter_state *st)
2176{
2177 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
2178}
2179
2180
2181
2182
2183
2184static void *established_get_first(struct seq_file *seq)
2185{
2186 struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
2187 struct tcp_iter_state *st = seq->private;
2188 struct net *net = seq_file_net(seq);
2189 void *rc = NULL;
2190
2191 st->offset = 0;
2192 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2193 struct sock *sk;
2194 struct hlist_nulls_node *node;
2195 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2196
2197
2198 if (empty_bucket(st))
2199 continue;
2200
2201 spin_lock_bh(lock);
2202 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2203 if (sk->sk_family != afinfo->family ||
2204 !net_eq(sock_net(sk), net)) {
2205 continue;
2206 }
2207 rc = sk;
2208 goto out;
2209 }
2210 spin_unlock_bh(lock);
2211 }
2212out:
2213 return rc;
2214}
2215
2216static void *established_get_next(struct seq_file *seq, void *cur)
2217{
2218 struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
2219 struct sock *sk = cur;
2220 struct hlist_nulls_node *node;
2221 struct tcp_iter_state *st = seq->private;
2222 struct net *net = seq_file_net(seq);
2223
2224 ++st->num;
2225 ++st->offset;
2226
2227 sk = sk_nulls_next(sk);
2228
2229 sk_nulls_for_each_from(sk, node) {
2230 if (sk->sk_family == afinfo->family &&
2231 net_eq(sock_net(sk), net))
2232 return sk;
2233 }
2234
2235 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2236 ++st->bucket;
2237 return established_get_first(seq);
2238}
2239
2240static void *established_get_idx(struct seq_file *seq, loff_t pos)
2241{
2242 struct tcp_iter_state *st = seq->private;
2243 void *rc;
2244
2245 st->bucket = 0;
2246 rc = established_get_first(seq);
2247
2248 while (rc && pos) {
2249 rc = established_get_next(seq, rc);
2250 --pos;
2251 }
2252 return rc;
2253}
2254
2255static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2256{
2257 void *rc;
2258 struct tcp_iter_state *st = seq->private;
2259
2260 st->state = TCP_SEQ_STATE_LISTENING;
2261 rc = listening_get_idx(seq, &pos);
2262
2263 if (!rc) {
2264 st->state = TCP_SEQ_STATE_ESTABLISHED;
2265 rc = established_get_idx(seq, pos);
2266 }
2267
2268 return rc;
2269}
2270
2271static void *tcp_seek_last_pos(struct seq_file *seq)
2272{
2273 struct tcp_iter_state *st = seq->private;
2274 int offset = st->offset;
2275 int orig_num = st->num;
2276 void *rc = NULL;
2277
2278 switch (st->state) {
2279 case TCP_SEQ_STATE_LISTENING:
2280 if (st->bucket >= INET_LHTABLE_SIZE)
2281 break;
2282 st->state = TCP_SEQ_STATE_LISTENING;
2283 rc = listening_get_next(seq, NULL);
2284 while (offset-- && rc)
2285 rc = listening_get_next(seq, rc);
2286 if (rc)
2287 break;
2288 st->bucket = 0;
2289 st->state = TCP_SEQ_STATE_ESTABLISHED;
2290
2291 case TCP_SEQ_STATE_ESTABLISHED:
2292 if (st->bucket > tcp_hashinfo.ehash_mask)
2293 break;
2294 rc = established_get_first(seq);
2295 while (offset-- && rc)
2296 rc = established_get_next(seq, rc);
2297 }
2298
2299 st->num = orig_num;
2300
2301 return rc;
2302}
2303
2304void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2305{
2306 struct tcp_iter_state *st = seq->private;
2307 void *rc;
2308
2309 if (*pos && *pos == st->last_pos) {
2310 rc = tcp_seek_last_pos(seq);
2311 if (rc)
2312 goto out;
2313 }
2314
2315 st->state = TCP_SEQ_STATE_LISTENING;
2316 st->num = 0;
2317 st->bucket = 0;
2318 st->offset = 0;
2319 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2320
2321out:
2322 st->last_pos = *pos;
2323 return rc;
2324}
2325EXPORT_SYMBOL(tcp_seq_start);
2326
2327void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2328{
2329 struct tcp_iter_state *st = seq->private;
2330 void *rc = NULL;
2331
2332 if (v == SEQ_START_TOKEN) {
2333 rc = tcp_get_idx(seq, 0);
2334 goto out;
2335 }
2336
2337 switch (st->state) {
2338 case TCP_SEQ_STATE_LISTENING:
2339 rc = listening_get_next(seq, v);
2340 if (!rc) {
2341 st->state = TCP_SEQ_STATE_ESTABLISHED;
2342 st->bucket = 0;
2343 st->offset = 0;
2344 rc = established_get_first(seq);
2345 }
2346 break;
2347 case TCP_SEQ_STATE_ESTABLISHED:
2348 rc = established_get_next(seq, v);
2349 break;
2350 }
2351out:
2352 ++*pos;
2353 st->last_pos = *pos;
2354 return rc;
2355}
2356EXPORT_SYMBOL(tcp_seq_next);
2357
2358void tcp_seq_stop(struct seq_file *seq, void *v)
2359{
2360 struct tcp_iter_state *st = seq->private;
2361
2362 switch (st->state) {
2363 case TCP_SEQ_STATE_LISTENING:
2364 if (v != SEQ_START_TOKEN)
2365 spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
2366 break;
2367 case TCP_SEQ_STATE_ESTABLISHED:
2368 if (v)
2369 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2370 break;
2371 }
2372}
2373EXPORT_SYMBOL(tcp_seq_stop);
2374
2375static void get_openreq4(const struct request_sock *req,
2376 struct seq_file *f, int i)
2377{
2378 const struct inet_request_sock *ireq = inet_rsk(req);
2379 long delta = req->rsk_timer.expires - jiffies;
2380
2381 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2382 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
2383 i,
2384 ireq->ir_loc_addr,
2385 ireq->ir_num,
2386 ireq->ir_rmt_addr,
2387 ntohs(ireq->ir_rmt_port),
2388 TCP_SYN_RECV,
2389 0, 0,
2390 1,
2391 jiffies_delta_to_clock_t(delta),
2392 req->num_timeout,
2393 from_kuid_munged(seq_user_ns(f),
2394 sock_i_uid(req->rsk_listener)),
2395 0,
2396 0,
2397 0,
2398 req);
2399}
2400
2401static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2402{
2403 int timer_active;
2404 unsigned long timer_expires;
2405 const struct tcp_sock *tp = tcp_sk(sk);
2406 const struct inet_connection_sock *icsk = inet_csk(sk);
2407 const struct inet_sock *inet = inet_sk(sk);
2408 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2409 __be32 dest = inet->inet_daddr;
2410 __be32 src = inet->inet_rcv_saddr;
2411 __u16 destp = ntohs(inet->inet_dport);
2412 __u16 srcp = ntohs(inet->inet_sport);
2413 int rx_queue;
2414 int state;
2415
2416 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2417 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2418 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2419 timer_active = 1;
2420 timer_expires = icsk->icsk_timeout;
2421 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2422 timer_active = 4;
2423 timer_expires = icsk->icsk_timeout;
2424 } else if (timer_pending(&sk->sk_timer)) {
2425 timer_active = 2;
2426 timer_expires = sk->sk_timer.expires;
2427 } else {
2428 timer_active = 0;
2429 timer_expires = jiffies;
2430 }
2431
2432 state = inet_sk_state_load(sk);
2433 if (state == TCP_LISTEN)
2434 rx_queue = sk->sk_ack_backlog;
2435 else
2436
2437
2438
2439 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2440
2441 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2442 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
2443 i, src, srcp, dest, destp, state,
2444 tp->write_seq - tp->snd_una,
2445 rx_queue,
2446 timer_active,
2447 jiffies_delta_to_clock_t(timer_expires - jiffies),
2448 icsk->icsk_retransmits,
2449 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
2450 icsk->icsk_probes_out,
2451 sock_i_ino(sk),
2452 refcount_read(&sk->sk_refcnt), sk,
2453 jiffies_to_clock_t(icsk->icsk_rto),
2454 jiffies_to_clock_t(icsk->icsk_ack.ato),
2455 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sk),
2456 tp->snd_cwnd,
2457 state == TCP_LISTEN ?
2458 fastopenq->max_qlen :
2459 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
2460}
2461
2462static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2463 struct seq_file *f, int i)
2464{
2465 long delta = tw->tw_timer.expires - jiffies;
2466 __be32 dest, src;
2467 __u16 destp, srcp;
2468
2469 dest = tw->tw_daddr;
2470 src = tw->tw_rcv_saddr;
2471 destp = ntohs(tw->tw_dport);
2472 srcp = ntohs(tw->tw_sport);
2473
2474 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2475 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
2476 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2477 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2478 refcount_read(&tw->tw_refcnt), tw);
2479}
2480
2481#define TMPSZ 150
2482
2483static int tcp4_seq_show(struct seq_file *seq, void *v)
2484{
2485 struct tcp_iter_state *st;
2486 struct sock *sk = v;
2487
2488 seq_setwidth(seq, TMPSZ - 1);
2489 if (v == SEQ_START_TOKEN) {
2490 seq_puts(seq, " sl local_address rem_address st tx_queue "
2491 "rx_queue tr tm->when retrnsmt uid timeout "
2492 "inode");
2493 goto out;
2494 }
2495 st = seq->private;
2496
2497 if (sk->sk_state == TCP_TIME_WAIT)
2498 get_timewait4_sock(v, seq, st->num);
2499 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2500 get_openreq4(v, seq, st->num);
2501 else
2502 get_tcp4_sock(v, seq, st->num);
2503out:
2504 seq_pad(seq, '\n');
2505 return 0;
2506}
2507
2508static const struct seq_operations tcp4_seq_ops = {
2509 .show = tcp4_seq_show,
2510 .start = tcp_seq_start,
2511 .next = tcp_seq_next,
2512 .stop = tcp_seq_stop,
2513};
2514
2515static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2516 .family = AF_INET,
2517};
2518
2519static int __net_init tcp4_proc_init_net(struct net *net)
2520{
2521 if (!proc_create_net_data("tcp", 0444, net->proc_net, &tcp4_seq_ops,
2522 sizeof(struct tcp_iter_state), &tcp4_seq_afinfo))
2523 return -ENOMEM;
2524 return 0;
2525}
2526
2527static void __net_exit tcp4_proc_exit_net(struct net *net)
2528{
2529 remove_proc_entry("tcp", net->proc_net);
2530}
2531
2532static struct pernet_operations tcp4_net_ops = {
2533 .init = tcp4_proc_init_net,
2534 .exit = tcp4_proc_exit_net,
2535};
2536
2537int __init tcp4_proc_init(void)
2538{
2539 return register_pernet_subsys(&tcp4_net_ops);
2540}
2541
2542void tcp4_proc_exit(void)
2543{
2544 unregister_pernet_subsys(&tcp4_net_ops);
2545}
2546#endif
2547
2548struct proto tcp_prot = {
2549 .name = "TCP",
2550 .owner = THIS_MODULE,
2551 .close = tcp_close,
2552 .pre_connect = tcp_v4_pre_connect,
2553 .connect = tcp_v4_connect,
2554 .disconnect = tcp_disconnect,
2555 .accept = inet_csk_accept,
2556 .ioctl = tcp_ioctl,
2557 .init = tcp_v4_init_sock,
2558 .destroy = tcp_v4_destroy_sock,
2559 .shutdown = tcp_shutdown,
2560 .setsockopt = tcp_setsockopt,
2561 .getsockopt = tcp_getsockopt,
2562 .keepalive = tcp_set_keepalive,
2563 .recvmsg = tcp_recvmsg,
2564 .sendmsg = tcp_sendmsg,
2565 .sendpage = tcp_sendpage,
2566 .backlog_rcv = tcp_v4_do_rcv,
2567 .release_cb = tcp_release_cb,
2568 .hash = inet_hash,
2569 .unhash = inet_unhash,
2570 .get_port = inet_csk_get_port,
2571 .enter_memory_pressure = tcp_enter_memory_pressure,
2572 .leave_memory_pressure = tcp_leave_memory_pressure,
2573 .stream_memory_free = tcp_stream_memory_free,
2574 .sockets_allocated = &tcp_sockets_allocated,
2575 .orphan_count = &tcp_orphan_count,
2576 .memory_allocated = &tcp_memory_allocated,
2577 .memory_pressure = &tcp_memory_pressure,
2578 .sysctl_mem = sysctl_tcp_mem,
2579 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2580 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2581 .max_header = MAX_TCP_HEADER,
2582 .obj_size = sizeof(struct tcp_sock),
2583 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2584 .twsk_prot = &tcp_timewait_sock_ops,
2585 .rsk_prot = &tcp_request_sock_ops,
2586 .h.hashinfo = &tcp_hashinfo,
2587 .no_autobind = true,
2588#ifdef CONFIG_COMPAT
2589 .compat_setsockopt = compat_tcp_setsockopt,
2590 .compat_getsockopt = compat_tcp_getsockopt,
2591#endif
2592 .diag_destroy = tcp_abort,
2593};
2594EXPORT_SYMBOL(tcp_prot);
2595
2596static void __net_exit tcp_sk_exit(struct net *net)
2597{
2598 int cpu;
2599
2600 if (net->ipv4.tcp_congestion_control)
2601 module_put(net->ipv4.tcp_congestion_control->owner);
2602
2603 for_each_possible_cpu(cpu)
2604 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2605 free_percpu(net->ipv4.tcp_sk);
2606}
2607
2608static int __net_init tcp_sk_init(struct net *net)
2609{
2610 int res, cpu, cnt;
2611
2612 net->ipv4.tcp_sk = alloc_percpu(struct sock *);
2613 if (!net->ipv4.tcp_sk)
2614 return -ENOMEM;
2615
2616 for_each_possible_cpu(cpu) {
2617 struct sock *sk;
2618
2619 res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
2620 IPPROTO_TCP, net);
2621 if (res)
2622 goto fail;
2623 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2624
2625
2626
2627
2628 inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
2629
2630 *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2631 }
2632
2633 net->ipv4.sysctl_tcp_ecn = 2;
2634 net->ipv4.sysctl_tcp_ecn_fallback = 1;
2635
2636 net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
2637 net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS;
2638 net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
2639 net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
2640
2641 net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
2642 net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
2643 net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
2644
2645 net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
2646 net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
2647 net->ipv4.sysctl_tcp_syncookies = 1;
2648 net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
2649 net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
2650 net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
2651 net->ipv4.sysctl_tcp_orphan_retries = 0;
2652 net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
2653 net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
2654 net->ipv4.sysctl_tcp_tw_reuse = 2;
2655
2656 cnt = tcp_hashinfo.ehash_mask + 1;
2657 net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
2658 net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
2659
2660 net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256);
2661 net->ipv4.sysctl_tcp_sack = 1;
2662 net->ipv4.sysctl_tcp_window_scaling = 1;
2663 net->ipv4.sysctl_tcp_timestamps = 1;
2664 net->ipv4.sysctl_tcp_early_retrans = 3;
2665 net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION;
2666 net->ipv4.sysctl_tcp_slow_start_after_idle = 1;
2667 net->ipv4.sysctl_tcp_retrans_collapse = 1;
2668 net->ipv4.sysctl_tcp_max_reordering = 300;
2669 net->ipv4.sysctl_tcp_dsack = 1;
2670 net->ipv4.sysctl_tcp_app_win = 31;
2671 net->ipv4.sysctl_tcp_adv_win_scale = 1;
2672 net->ipv4.sysctl_tcp_frto = 2;
2673 net->ipv4.sysctl_tcp_moderate_rcvbuf = 1;
2674
2675
2676
2677
2678 net->ipv4.sysctl_tcp_tso_win_divisor = 3;
2679
2680 net->ipv4.sysctl_tcp_limit_output_bytes = 16 * 65536;
2681
2682 net->ipv4.sysctl_tcp_challenge_ack_limit = 1000;
2683 net->ipv4.sysctl_tcp_min_tso_segs = 2;
2684 net->ipv4.sysctl_tcp_min_rtt_wlen = 300;
2685 net->ipv4.sysctl_tcp_autocorking = 1;
2686 net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2;
2687 net->ipv4.sysctl_tcp_pacing_ss_ratio = 200;
2688 net->ipv4.sysctl_tcp_pacing_ca_ratio = 120;
2689 if (net != &init_net) {
2690 memcpy(net->ipv4.sysctl_tcp_rmem,
2691 init_net.ipv4.sysctl_tcp_rmem,
2692 sizeof(init_net.ipv4.sysctl_tcp_rmem));
2693 memcpy(net->ipv4.sysctl_tcp_wmem,
2694 init_net.ipv4.sysctl_tcp_wmem,
2695 sizeof(init_net.ipv4.sysctl_tcp_wmem));
2696 }
2697 net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC;
2698 net->ipv4.sysctl_tcp_comp_sack_nr = 44;
2699 net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
2700 spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
2701 net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
2702 atomic_set(&net->ipv4.tfo_active_disable_times, 0);
2703
2704
2705 if (!net_eq(net, &init_net) &&
2706 try_module_get(init_net.ipv4.tcp_congestion_control->owner))
2707 net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control;
2708 else
2709 net->ipv4.tcp_congestion_control = &tcp_reno;
2710
2711 return 0;
2712fail:
2713 tcp_sk_exit(net);
2714
2715 return res;
2716}
2717
2718static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2719{
2720 struct net *net;
2721
2722 inet_twsk_purge(&tcp_hashinfo, AF_INET);
2723
2724 list_for_each_entry(net, net_exit_list, exit_list)
2725 tcp_fastopen_ctx_destroy(net);
2726}
2727
2728static struct pernet_operations __net_initdata tcp_sk_ops = {
2729 .init = tcp_sk_init,
2730 .exit = tcp_sk_exit,
2731 .exit_batch = tcp_sk_exit_batch,
2732};
2733
2734void __init tcp_v4_init(void)
2735{
2736 if (register_pernet_subsys(&tcp_sk_ops))
2737 panic("Failed to create the TCP control socket.\n");
2738}
2739