1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53#define pr_fmt(fmt) "TCP: " fmt
54
55#include <linux/bottom_half.h>
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
64#include <linux/slab.h>
65
66#include <net/net_namespace.h>
67#include <net/icmp.h>
68#include <net/inet_hashtables.h>
69#include <net/tcp.h>
70#include <net/transp_v6.h>
71#include <net/ipv6.h>
72#include <net/inet_common.h>
73#include <net/timewait_sock.h>
74#include <net/xfrm.h>
75#include <net/secure_seq.h>
76#include <net/busy_poll.h>
77
78#include <linux/inet.h>
79#include <linux/ipv6.h>
80#include <linux/stddef.h>
81#include <linux/proc_fs.h>
82#include <linux/seq_file.h>
83#include <linux/inetdevice.h>
84
85#include <crypto/hash.h>
86#include <linux/scatterlist.h>
87
88#include <trace/events/tcp.h>
89
90#ifdef CONFIG_TCP_MD5SIG
91static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
92 __be32 daddr, __be32 saddr, const struct tcphdr *th);
93#endif
94
95struct inet_hashinfo tcp_hashinfo;
96EXPORT_SYMBOL(tcp_hashinfo);
97
98static u32 tcp_v4_init_seq(const struct sk_buff *skb)
99{
100 return secure_tcp_seq(ip_hdr(skb)->daddr,
101 ip_hdr(skb)->saddr,
102 tcp_hdr(skb)->dest,
103 tcp_hdr(skb)->source);
104}
105
106static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
107{
108 return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
109}
110
111int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
112{
113 const struct inet_timewait_sock *tw = inet_twsk(sktw);
114 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
115 struct tcp_sock *tp = tcp_sk(sk);
116 int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse;
117
118 if (reuse == 2) {
119
120
121
122
123 bool loopback = false;
124 if (tw->tw_bound_dev_if == LOOPBACK_IFINDEX)
125 loopback = true;
126#if IS_ENABLED(CONFIG_IPV6)
127 if (tw->tw_family == AF_INET6) {
128 if (ipv6_addr_loopback(&tw->tw_v6_daddr) ||
129 (ipv6_addr_v4mapped(&tw->tw_v6_daddr) &&
130 (tw->tw_v6_daddr.s6_addr[12] == 127)) ||
131 ipv6_addr_loopback(&tw->tw_v6_rcv_saddr) ||
132 (ipv6_addr_v4mapped(&tw->tw_v6_rcv_saddr) &&
133 (tw->tw_v6_rcv_saddr.s6_addr[12] == 127)))
134 loopback = true;
135 } else
136#endif
137 {
138 if (ipv4_is_loopback(tw->tw_daddr) ||
139 ipv4_is_loopback(tw->tw_rcv_saddr))
140 loopback = true;
141 }
142 if (!loopback)
143 reuse = 0;
144 }
145
146
147
148
149
150
151
152
153
154
155
156
157 if (tcptw->tw_ts_recent_stamp &&
158 (!twp || (reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
159
160
161
162
163
164
165
166
167
168
169
170 if (likely(!tp->repair)) {
171 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
172 if (tp->write_seq == 0)
173 tp->write_seq = 1;
174 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
175 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
176 }
177 sock_hold(sktw);
178 return 1;
179 }
180
181 return 0;
182}
183EXPORT_SYMBOL_GPL(tcp_twsk_unique);
184
185static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
186 int addr_len)
187{
188
189
190
191
192 if (addr_len < sizeof(struct sockaddr_in))
193 return -EINVAL;
194
195 sock_owned_by_me(sk);
196
197 return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr);
198}
199
200
201int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
202{
203 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
204 struct inet_sock *inet = inet_sk(sk);
205 struct tcp_sock *tp = tcp_sk(sk);
206 __be16 orig_sport, orig_dport;
207 __be32 daddr, nexthop;
208 struct flowi4 *fl4;
209 struct rtable *rt;
210 int err;
211 struct ip_options_rcu *inet_opt;
212 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
213
214 if (addr_len < sizeof(struct sockaddr_in))
215 return -EINVAL;
216
217 if (usin->sin_family != AF_INET)
218 return -EAFNOSUPPORT;
219
220 nexthop = daddr = usin->sin_addr.s_addr;
221 inet_opt = rcu_dereference_protected(inet->inet_opt,
222 lockdep_sock_is_held(sk));
223 if (inet_opt && inet_opt->opt.srr) {
224 if (!daddr)
225 return -EINVAL;
226 nexthop = inet_opt->opt.faddr;
227 }
228
229 orig_sport = inet->inet_sport;
230 orig_dport = usin->sin_port;
231 fl4 = &inet->cork.fl.u.ip4;
232 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
233 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
234 IPPROTO_TCP,
235 orig_sport, orig_dport, sk);
236 if (IS_ERR(rt)) {
237 err = PTR_ERR(rt);
238 if (err == -ENETUNREACH)
239 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
240 return err;
241 }
242
243 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
244 ip_rt_put(rt);
245 return -ENETUNREACH;
246 }
247
248 if (!inet_opt || !inet_opt->opt.srr)
249 daddr = fl4->daddr;
250
251 if (!inet->inet_saddr)
252 inet->inet_saddr = fl4->saddr;
253 sk_rcv_saddr_set(sk, inet->inet_saddr);
254
255 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
256
257 tp->rx_opt.ts_recent = 0;
258 tp->rx_opt.ts_recent_stamp = 0;
259 if (likely(!tp->repair))
260 tp->write_seq = 0;
261 }
262
263 inet->inet_dport = usin->sin_port;
264 sk_daddr_set(sk, daddr);
265
266 inet_csk(sk)->icsk_ext_hdr_len = 0;
267 if (inet_opt)
268 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
269
270 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
271
272
273
274
275
276
277 tcp_set_state(sk, TCP_SYN_SENT);
278 err = inet_hash_connect(tcp_death_row, sk);
279 if (err)
280 goto failure;
281
282 sk_set_txhash(sk);
283
284 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
285 inet->inet_sport, inet->inet_dport, sk);
286 if (IS_ERR(rt)) {
287 err = PTR_ERR(rt);
288 rt = NULL;
289 goto failure;
290 }
291
292 sk->sk_gso_type = SKB_GSO_TCPV4;
293 sk_setup_caps(sk, &rt->dst);
294 rt = NULL;
295
296 if (likely(!tp->repair)) {
297 if (!tp->write_seq)
298 tp->write_seq = secure_tcp_seq(inet->inet_saddr,
299 inet->inet_daddr,
300 inet->inet_sport,
301 usin->sin_port);
302 tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
303 inet->inet_saddr,
304 inet->inet_daddr);
305 }
306
307 inet->inet_id = tp->write_seq ^ jiffies;
308
309 if (tcp_fastopen_defer_connect(sk, &err))
310 return err;
311 if (err)
312 goto failure;
313
314 err = tcp_connect(sk);
315
316 if (err)
317 goto failure;
318
319 return 0;
320
321failure:
322
323
324
325
326 tcp_set_state(sk, TCP_CLOSE);
327 ip_rt_put(rt);
328 sk->sk_route_caps = 0;
329 inet->inet_dport = 0;
330 return err;
331}
332EXPORT_SYMBOL(tcp_v4_connect);
333
334
335
336
337
338
339void tcp_v4_mtu_reduced(struct sock *sk)
340{
341 struct inet_sock *inet = inet_sk(sk);
342 struct dst_entry *dst;
343 u32 mtu;
344
345 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
346 return;
347 mtu = tcp_sk(sk)->mtu_info;
348 dst = inet_csk_update_pmtu(sk, mtu);
349 if (!dst)
350 return;
351
352
353
354
355 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
356 sk->sk_err_soft = EMSGSIZE;
357
358 mtu = dst_mtu(dst);
359
360 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
361 ip_sk_accept_pmtu(sk) &&
362 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
363 tcp_sync_mss(sk, mtu);
364
365
366
367
368
369
370 tcp_simple_retransmit(sk);
371 }
372}
373EXPORT_SYMBOL(tcp_v4_mtu_reduced);
374
375static void do_redirect(struct sk_buff *skb, struct sock *sk)
376{
377 struct dst_entry *dst = __sk_dst_check(sk, 0);
378
379 if (dst)
380 dst->ops->redirect(dst, sk, skb);
381}
382
383
384
385void tcp_req_err(struct sock *sk, u32 seq, bool abort)
386{
387 struct request_sock *req = inet_reqsk(sk);
388 struct net *net = sock_net(sk);
389
390
391
392
393 if (seq != tcp_rsk(req)->snt_isn) {
394 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
395 } else if (abort) {
396
397
398
399
400
401
402 inet_csk_reqsk_queue_drop(req->rsk_listener, req);
403 tcp_listendrop(req->rsk_listener);
404 }
405 reqsk_put(req);
406}
407EXPORT_SYMBOL(tcp_req_err);
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
426{
427 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
428 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
429 struct inet_connection_sock *icsk;
430 struct tcp_sock *tp;
431 struct inet_sock *inet;
432 const int type = icmp_hdr(icmp_skb)->type;
433 const int code = icmp_hdr(icmp_skb)->code;
434 struct sock *sk;
435 struct sk_buff *skb;
436 struct request_sock *fastopen;
437 u32 seq, snd_una;
438 s32 remaining;
439 u32 delta_us;
440 int err;
441 struct net *net = dev_net(icmp_skb->dev);
442
443 sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
444 th->dest, iph->saddr, ntohs(th->source),
445 inet_iif(icmp_skb), 0);
446 if (!sk) {
447 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
448 return;
449 }
450 if (sk->sk_state == TCP_TIME_WAIT) {
451 inet_twsk_put(inet_twsk(sk));
452 return;
453 }
454 seq = ntohl(th->seq);
455 if (sk->sk_state == TCP_NEW_SYN_RECV)
456 return tcp_req_err(sk, seq,
457 type == ICMP_PARAMETERPROB ||
458 type == ICMP_TIME_EXCEEDED ||
459 (type == ICMP_DEST_UNREACH &&
460 (code == ICMP_NET_UNREACH ||
461 code == ICMP_HOST_UNREACH)));
462
463 bh_lock_sock(sk);
464
465
466
467
468
469 if (sock_owned_by_user(sk)) {
470 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
471 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
472 }
473 if (sk->sk_state == TCP_CLOSE)
474 goto out;
475
476 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
477 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
478 goto out;
479 }
480
481 icsk = inet_csk(sk);
482 tp = tcp_sk(sk);
483
484 fastopen = tp->fastopen_rsk;
485 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
486 if (sk->sk_state != TCP_LISTEN &&
487 !between(seq, snd_una, tp->snd_nxt)) {
488 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
489 goto out;
490 }
491
492 switch (type) {
493 case ICMP_REDIRECT:
494 if (!sock_owned_by_user(sk))
495 do_redirect(icmp_skb, sk);
496 goto out;
497 case ICMP_SOURCE_QUENCH:
498
499 goto out;
500 case ICMP_PARAMETERPROB:
501 err = EPROTO;
502 break;
503 case ICMP_DEST_UNREACH:
504 if (code > NR_ICMP_UNREACH)
505 goto out;
506
507 if (code == ICMP_FRAG_NEEDED) {
508
509
510
511
512 if (sk->sk_state == TCP_LISTEN)
513 goto out;
514
515 tp->mtu_info = info;
516 if (!sock_owned_by_user(sk)) {
517 tcp_v4_mtu_reduced(sk);
518 } else {
519 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
520 sock_hold(sk);
521 }
522 goto out;
523 }
524
525 err = icmp_err_convert[code].errno;
526
527
528 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
529 break;
530 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
531 !icsk->icsk_backoff || fastopen)
532 break;
533
534 if (sock_owned_by_user(sk))
535 break;
536
537 skb = tcp_rtx_queue_head(sk);
538 if (WARN_ON_ONCE(!skb))
539 break;
540
541 icsk->icsk_backoff--;
542 icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
543 TCP_TIMEOUT_INIT;
544 icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
545
546
547 tcp_mstamp_refresh(tp);
548 delta_us = (u32)(tp->tcp_mstamp - skb->skb_mstamp);
549 remaining = icsk->icsk_rto -
550 usecs_to_jiffies(delta_us);
551
552 if (remaining > 0) {
553 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
554 remaining, TCP_RTO_MAX);
555 } else {
556
557
558 tcp_retransmit_timer(sk);
559 }
560
561 break;
562 case ICMP_TIME_EXCEEDED:
563 err = EHOSTUNREACH;
564 break;
565 default:
566 goto out;
567 }
568
569 switch (sk->sk_state) {
570 case TCP_SYN_SENT:
571 case TCP_SYN_RECV:
572
573
574
575 if (fastopen && !fastopen->sk)
576 break;
577
578 if (!sock_owned_by_user(sk)) {
579 sk->sk_err = err;
580
581 sk->sk_error_report(sk);
582
583 tcp_done(sk);
584 } else {
585 sk->sk_err_soft = err;
586 }
587 goto out;
588 }
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606 inet = inet_sk(sk);
607 if (!sock_owned_by_user(sk) && inet->recverr) {
608 sk->sk_err = err;
609 sk->sk_error_report(sk);
610 } else {
611 sk->sk_err_soft = err;
612 }
613
614out:
615 bh_unlock_sock(sk);
616 sock_put(sk);
617}
618
619void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
620{
621 struct tcphdr *th = tcp_hdr(skb);
622
623 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
624 skb->csum_start = skb_transport_header(skb) - skb->head;
625 skb->csum_offset = offsetof(struct tcphdr, check);
626}
627
628
629void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
630{
631 const struct inet_sock *inet = inet_sk(sk);
632
633 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
634}
635EXPORT_SYMBOL(tcp_v4_send_check);
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
651{
652 const struct tcphdr *th = tcp_hdr(skb);
653 struct {
654 struct tcphdr th;
655#ifdef CONFIG_TCP_MD5SIG
656 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
657#endif
658 } rep;
659 struct ip_reply_arg arg;
660#ifdef CONFIG_TCP_MD5SIG
661 struct tcp_md5sig_key *key = NULL;
662 const __u8 *hash_location = NULL;
663 unsigned char newhash[16];
664 int genhash;
665 struct sock *sk1 = NULL;
666#endif
667 struct net *net;
668 struct sock *ctl_sk;
669
670
671 if (th->rst)
672 return;
673
674
675
676
677 if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
678 return;
679
680
681 memset(&rep, 0, sizeof(rep));
682 rep.th.dest = th->source;
683 rep.th.source = th->dest;
684 rep.th.doff = sizeof(struct tcphdr) / 4;
685 rep.th.rst = 1;
686
687 if (th->ack) {
688 rep.th.seq = th->ack_seq;
689 } else {
690 rep.th.ack = 1;
691 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
692 skb->len - (th->doff << 2));
693 }
694
695 memset(&arg, 0, sizeof(arg));
696 arg.iov[0].iov_base = (unsigned char *)&rep;
697 arg.iov[0].iov_len = sizeof(rep.th);
698
699 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
700#ifdef CONFIG_TCP_MD5SIG
701 rcu_read_lock();
702 hash_location = tcp_parse_md5sig_option(th);
703 if (sk && sk_fullsock(sk)) {
704 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
705 &ip_hdr(skb)->saddr, AF_INET);
706 } else if (hash_location) {
707
708
709
710
711
712
713
714 sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
715 ip_hdr(skb)->saddr,
716 th->source, ip_hdr(skb)->daddr,
717 ntohs(th->source), inet_iif(skb),
718 tcp_v4_sdif(skb));
719
720 if (!sk1)
721 goto out;
722
723 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
724 &ip_hdr(skb)->saddr, AF_INET);
725 if (!key)
726 goto out;
727
728
729 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
730 if (genhash || memcmp(hash_location, newhash, 16) != 0)
731 goto out;
732
733 }
734
735 if (key) {
736 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
737 (TCPOPT_NOP << 16) |
738 (TCPOPT_MD5SIG << 8) |
739 TCPOLEN_MD5SIG);
740
741 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
742 rep.th.doff = arg.iov[0].iov_len / 4;
743
744 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
745 key, ip_hdr(skb)->saddr,
746 ip_hdr(skb)->daddr, &rep.th);
747 }
748#endif
749 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
750 ip_hdr(skb)->saddr,
751 arg.iov[0].iov_len, IPPROTO_TCP, 0);
752 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
753 arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
754
755
756
757
758
759 if (sk) {
760 arg.bound_dev_if = sk->sk_bound_dev_if;
761 if (sk_fullsock(sk))
762 trace_tcp_send_reset(sk, skb);
763 }
764
765 BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
766 offsetof(struct inet_timewait_sock, tw_bound_dev_if));
767
768 arg.tos = ip_hdr(skb)->tos;
769 arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
770 local_bh_disable();
771 ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk);
772 if (sk)
773 ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
774 inet_twsk(sk)->tw_mark : sk->sk_mark;
775 ip_send_unicast_reply(ctl_sk,
776 skb, &TCP_SKB_CB(skb)->header.h4.opt,
777 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
778 &arg, arg.iov[0].iov_len);
779
780 ctl_sk->sk_mark = 0;
781 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
782 __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
783 local_bh_enable();
784
785#ifdef CONFIG_TCP_MD5SIG
786out:
787 rcu_read_unlock();
788#endif
789}
790
791
792
793
794
795static void tcp_v4_send_ack(const struct sock *sk,
796 struct sk_buff *skb, u32 seq, u32 ack,
797 u32 win, u32 tsval, u32 tsecr, int oif,
798 struct tcp_md5sig_key *key,
799 int reply_flags, u8 tos)
800{
801 const struct tcphdr *th = tcp_hdr(skb);
802 struct {
803 struct tcphdr th;
804 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
805#ifdef CONFIG_TCP_MD5SIG
806 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
807#endif
808 ];
809 } rep;
810 struct net *net = sock_net(sk);
811 struct ip_reply_arg arg;
812 struct sock *ctl_sk;
813
814 memset(&rep.th, 0, sizeof(struct tcphdr));
815 memset(&arg, 0, sizeof(arg));
816
817 arg.iov[0].iov_base = (unsigned char *)&rep;
818 arg.iov[0].iov_len = sizeof(rep.th);
819 if (tsecr) {
820 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
821 (TCPOPT_TIMESTAMP << 8) |
822 TCPOLEN_TIMESTAMP);
823 rep.opt[1] = htonl(tsval);
824 rep.opt[2] = htonl(tsecr);
825 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
826 }
827
828
829 rep.th.dest = th->source;
830 rep.th.source = th->dest;
831 rep.th.doff = arg.iov[0].iov_len / 4;
832 rep.th.seq = htonl(seq);
833 rep.th.ack_seq = htonl(ack);
834 rep.th.ack = 1;
835 rep.th.window = htons(win);
836
837#ifdef CONFIG_TCP_MD5SIG
838 if (key) {
839 int offset = (tsecr) ? 3 : 0;
840
841 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
842 (TCPOPT_NOP << 16) |
843 (TCPOPT_MD5SIG << 8) |
844 TCPOLEN_MD5SIG);
845 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
846 rep.th.doff = arg.iov[0].iov_len/4;
847
848 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
849 key, ip_hdr(skb)->saddr,
850 ip_hdr(skb)->daddr, &rep.th);
851 }
852#endif
853 arg.flags = reply_flags;
854 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
855 ip_hdr(skb)->saddr,
856 arg.iov[0].iov_len, IPPROTO_TCP, 0);
857 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
858 if (oif)
859 arg.bound_dev_if = oif;
860 arg.tos = tos;
861 arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
862 local_bh_disable();
863 ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk);
864 if (sk)
865 ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
866 inet_twsk(sk)->tw_mark : sk->sk_mark;
867 ip_send_unicast_reply(ctl_sk,
868 skb, &TCP_SKB_CB(skb)->header.h4.opt,
869 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
870 &arg, arg.iov[0].iov_len);
871
872 ctl_sk->sk_mark = 0;
873 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
874 local_bh_enable();
875}
876
877static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
878{
879 struct inet_timewait_sock *tw = inet_twsk(sk);
880 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
881
882 tcp_v4_send_ack(sk, skb,
883 tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
884 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
885 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
886 tcptw->tw_ts_recent,
887 tw->tw_bound_dev_if,
888 tcp_twsk_md5_key(tcptw),
889 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
890 tw->tw_tos
891 );
892
893 inet_twsk_put(tw);
894}
895
896static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
897 struct request_sock *req)
898{
899
900
901
902 u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
903 tcp_sk(sk)->snd_nxt;
904
905
906
907
908
909
910 tcp_v4_send_ack(sk, skb, seq,
911 tcp_rsk(req)->rcv_nxt,
912 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
913 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
914 req->ts_recent,
915 0,
916 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
917 AF_INET),
918 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
919 ip_hdr(skb)->tos);
920}
921
922
923
924
925
926
927static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
928 struct flowi *fl,
929 struct request_sock *req,
930 struct tcp_fastopen_cookie *foc,
931 enum tcp_synack_type synack_type)
932{
933 const struct inet_request_sock *ireq = inet_rsk(req);
934 struct flowi4 fl4;
935 int err = -1;
936 struct sk_buff *skb;
937
938
939 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
940 return -1;
941
942 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
943
944 if (skb) {
945 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
946
947 rcu_read_lock();
948 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
949 ireq->ir_rmt_addr,
950 rcu_dereference(ireq->ireq_opt));
951 rcu_read_unlock();
952 err = net_xmit_eval(err);
953 }
954
955 return err;
956}
957
958
959
960
961static void tcp_v4_reqsk_destructor(struct request_sock *req)
962{
963 kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
964}
965
966#ifdef CONFIG_TCP_MD5SIG
967
968
969
970
971
972
973
974struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
975 const union tcp_md5_addr *addr,
976 int family)
977{
978 const struct tcp_sock *tp = tcp_sk(sk);
979 struct tcp_md5sig_key *key;
980 const struct tcp_md5sig_info *md5sig;
981 __be32 mask;
982 struct tcp_md5sig_key *best_match = NULL;
983 bool match;
984
985
986 md5sig = rcu_dereference_check(tp->md5sig_info,
987 lockdep_sock_is_held(sk));
988 if (!md5sig)
989 return NULL;
990
991 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
992 if (key->family != family)
993 continue;
994
995 if (family == AF_INET) {
996 mask = inet_make_mask(key->prefixlen);
997 match = (key->addr.a4.s_addr & mask) ==
998 (addr->a4.s_addr & mask);
999#if IS_ENABLED(CONFIG_IPV6)
1000 } else if (family == AF_INET6) {
1001 match = ipv6_prefix_equal(&key->addr.a6, &addr->a6,
1002 key->prefixlen);
1003#endif
1004 } else {
1005 match = false;
1006 }
1007
1008 if (match && (!best_match ||
1009 key->prefixlen > best_match->prefixlen))
1010 best_match = key;
1011 }
1012 return best_match;
1013}
1014EXPORT_SYMBOL(tcp_md5_do_lookup);
1015
1016static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
1017 const union tcp_md5_addr *addr,
1018 int family, u8 prefixlen)
1019{
1020 const struct tcp_sock *tp = tcp_sk(sk);
1021 struct tcp_md5sig_key *key;
1022 unsigned int size = sizeof(struct in_addr);
1023 const struct tcp_md5sig_info *md5sig;
1024
1025
1026 md5sig = rcu_dereference_check(tp->md5sig_info,
1027 lockdep_sock_is_held(sk));
1028 if (!md5sig)
1029 return NULL;
1030#if IS_ENABLED(CONFIG_IPV6)
1031 if (family == AF_INET6)
1032 size = sizeof(struct in6_addr);
1033#endif
1034 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
1035 if (key->family != family)
1036 continue;
1037 if (!memcmp(&key->addr, addr, size) &&
1038 key->prefixlen == prefixlen)
1039 return key;
1040 }
1041 return NULL;
1042}
1043
1044struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
1045 const struct sock *addr_sk)
1046{
1047 const union tcp_md5_addr *addr;
1048
1049 addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
1050 return tcp_md5_do_lookup(sk, addr, AF_INET);
1051}
1052EXPORT_SYMBOL(tcp_v4_md5_lookup);
1053
1054
1055int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
1056 int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
1057 gfp_t gfp)
1058{
1059
1060 struct tcp_md5sig_key *key;
1061 struct tcp_sock *tp = tcp_sk(sk);
1062 struct tcp_md5sig_info *md5sig;
1063
1064 key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
1065 if (key) {
1066
1067 memcpy(key->key, newkey, newkeylen);
1068 key->keylen = newkeylen;
1069 return 0;
1070 }
1071
1072 md5sig = rcu_dereference_protected(tp->md5sig_info,
1073 lockdep_sock_is_held(sk));
1074 if (!md5sig) {
1075 md5sig = kmalloc(sizeof(*md5sig), gfp);
1076 if (!md5sig)
1077 return -ENOMEM;
1078
1079 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1080 INIT_HLIST_HEAD(&md5sig->head);
1081 rcu_assign_pointer(tp->md5sig_info, md5sig);
1082 }
1083
1084 key = sock_kmalloc(sk, sizeof(*key), gfp);
1085 if (!key)
1086 return -ENOMEM;
1087 if (!tcp_alloc_md5sig_pool()) {
1088 sock_kfree_s(sk, key, sizeof(*key));
1089 return -ENOMEM;
1090 }
1091
1092 memcpy(key->key, newkey, newkeylen);
1093 key->keylen = newkeylen;
1094 key->family = family;
1095 key->prefixlen = prefixlen;
1096 memcpy(&key->addr, addr,
1097 (family == AF_INET6) ? sizeof(struct in6_addr) :
1098 sizeof(struct in_addr));
1099 hlist_add_head_rcu(&key->node, &md5sig->head);
1100 return 0;
1101}
1102EXPORT_SYMBOL(tcp_md5_do_add);
1103
1104int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
1105 u8 prefixlen)
1106{
1107 struct tcp_md5sig_key *key;
1108
1109 key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
1110 if (!key)
1111 return -ENOENT;
1112 hlist_del_rcu(&key->node);
1113 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1114 kfree_rcu(key, rcu);
1115 return 0;
1116}
1117EXPORT_SYMBOL(tcp_md5_do_del);
1118
1119static void tcp_clear_md5_list(struct sock *sk)
1120{
1121 struct tcp_sock *tp = tcp_sk(sk);
1122 struct tcp_md5sig_key *key;
1123 struct hlist_node *n;
1124 struct tcp_md5sig_info *md5sig;
1125
1126 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1127
1128 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
1129 hlist_del_rcu(&key->node);
1130 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1131 kfree_rcu(key, rcu);
1132 }
1133}
1134
1135static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
1136 char __user *optval, int optlen)
1137{
1138 struct tcp_md5sig cmd;
1139 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1140 u8 prefixlen = 32;
1141
1142 if (optlen < sizeof(cmd))
1143 return -EINVAL;
1144
1145 if (copy_from_user(&cmd, optval, sizeof(cmd)))
1146 return -EFAULT;
1147
1148 if (sin->sin_family != AF_INET)
1149 return -EINVAL;
1150
1151 if (optname == TCP_MD5SIG_EXT &&
1152 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
1153 prefixlen = cmd.tcpm_prefixlen;
1154 if (prefixlen > 32)
1155 return -EINVAL;
1156 }
1157
1158 if (!cmd.tcpm_keylen)
1159 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1160 AF_INET, prefixlen);
1161
1162 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1163 return -EINVAL;
1164
1165 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1166 AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen,
1167 GFP_KERNEL);
1168}
1169
1170static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
1171 __be32 daddr, __be32 saddr,
1172 const struct tcphdr *th, int nbytes)
1173{
1174 struct tcp4_pseudohdr *bp;
1175 struct scatterlist sg;
1176 struct tcphdr *_th;
1177
1178 bp = hp->scratch;
1179 bp->saddr = saddr;
1180 bp->daddr = daddr;
1181 bp->pad = 0;
1182 bp->protocol = IPPROTO_TCP;
1183 bp->len = cpu_to_be16(nbytes);
1184
1185 _th = (struct tcphdr *)(bp + 1);
1186 memcpy(_th, th, sizeof(*th));
1187 _th->check = 0;
1188
1189 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
1190 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
1191 sizeof(*bp) + sizeof(*th));
1192 return crypto_ahash_update(hp->md5_req);
1193}
1194
1195static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1196 __be32 daddr, __be32 saddr, const struct tcphdr *th)
1197{
1198 struct tcp_md5sig_pool *hp;
1199 struct ahash_request *req;
1200
1201 hp = tcp_get_md5sig_pool();
1202 if (!hp)
1203 goto clear_hash_noput;
1204 req = hp->md5_req;
1205
1206 if (crypto_ahash_init(req))
1207 goto clear_hash;
1208 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
1209 goto clear_hash;
1210 if (tcp_md5_hash_key(hp, key))
1211 goto clear_hash;
1212 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1213 if (crypto_ahash_final(req))
1214 goto clear_hash;
1215
1216 tcp_put_md5sig_pool();
1217 return 0;
1218
1219clear_hash:
1220 tcp_put_md5sig_pool();
1221clear_hash_noput:
1222 memset(md5_hash, 0, 16);
1223 return 1;
1224}
1225
1226int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1227 const struct sock *sk,
1228 const struct sk_buff *skb)
1229{
1230 struct tcp_md5sig_pool *hp;
1231 struct ahash_request *req;
1232 const struct tcphdr *th = tcp_hdr(skb);
1233 __be32 saddr, daddr;
1234
1235 if (sk) {
1236 saddr = sk->sk_rcv_saddr;
1237 daddr = sk->sk_daddr;
1238 } else {
1239 const struct iphdr *iph = ip_hdr(skb);
1240 saddr = iph->saddr;
1241 daddr = iph->daddr;
1242 }
1243
1244 hp = tcp_get_md5sig_pool();
1245 if (!hp)
1246 goto clear_hash_noput;
1247 req = hp->md5_req;
1248
1249 if (crypto_ahash_init(req))
1250 goto clear_hash;
1251
1252 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
1253 goto clear_hash;
1254 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1255 goto clear_hash;
1256 if (tcp_md5_hash_key(hp, key))
1257 goto clear_hash;
1258 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1259 if (crypto_ahash_final(req))
1260 goto clear_hash;
1261
1262 tcp_put_md5sig_pool();
1263 return 0;
1264
1265clear_hash:
1266 tcp_put_md5sig_pool();
1267clear_hash_noput:
1268 memset(md5_hash, 0, 16);
1269 return 1;
1270}
1271EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1272
1273#endif
1274
1275
1276static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
1277 const struct sk_buff *skb)
1278{
1279#ifdef CONFIG_TCP_MD5SIG
1280
1281
1282
1283
1284
1285
1286
1287
1288 const __u8 *hash_location = NULL;
1289 struct tcp_md5sig_key *hash_expected;
1290 const struct iphdr *iph = ip_hdr(skb);
1291 const struct tcphdr *th = tcp_hdr(skb);
1292 int genhash;
1293 unsigned char newhash[16];
1294
1295 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1296 AF_INET);
1297 hash_location = tcp_parse_md5sig_option(th);
1298
1299
1300 if (!hash_expected && !hash_location)
1301 return false;
1302
1303 if (hash_expected && !hash_location) {
1304 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1305 return true;
1306 }
1307
1308 if (!hash_expected && hash_location) {
1309 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1310 return true;
1311 }
1312
1313
1314
1315
1316 genhash = tcp_v4_md5_hash_skb(newhash,
1317 hash_expected,
1318 NULL, skb);
1319
1320 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1321 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
1322 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1323 &iph->saddr, ntohs(th->source),
1324 &iph->daddr, ntohs(th->dest),
1325 genhash ? " tcp_v4_calc_md5_hash failed"
1326 : "");
1327 return true;
1328 }
1329 return false;
1330#endif
1331 return false;
1332}
1333
1334static void tcp_v4_init_req(struct request_sock *req,
1335 const struct sock *sk_listener,
1336 struct sk_buff *skb)
1337{
1338 struct inet_request_sock *ireq = inet_rsk(req);
1339 struct net *net = sock_net(sk_listener);
1340
1341 sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
1342 sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
1343 RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
1344}
1345
1346static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
1347 struct flowi *fl,
1348 const struct request_sock *req)
1349{
1350 return inet_csk_route_req(sk, &fl->u.ip4, req);
1351}
1352
1353struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1354 .family = PF_INET,
1355 .obj_size = sizeof(struct tcp_request_sock),
1356 .rtx_syn_ack = tcp_rtx_synack,
1357 .send_ack = tcp_v4_reqsk_send_ack,
1358 .destructor = tcp_v4_reqsk_destructor,
1359 .send_reset = tcp_v4_send_reset,
1360 .syn_ack_timeout = tcp_syn_ack_timeout,
1361};
1362
1363static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1364 .mss_clamp = TCP_MSS_DEFAULT,
1365#ifdef CONFIG_TCP_MD5SIG
1366 .req_md5_lookup = tcp_v4_md5_lookup,
1367 .calc_md5_hash = tcp_v4_md5_hash_skb,
1368#endif
1369 .init_req = tcp_v4_init_req,
1370#ifdef CONFIG_SYN_COOKIES
1371 .cookie_init_seq = cookie_v4_init_sequence,
1372#endif
1373 .route_req = tcp_v4_route_req,
1374 .init_seq = tcp_v4_init_seq,
1375 .init_ts_off = tcp_v4_init_ts_off,
1376 .send_synack = tcp_v4_send_synack,
1377};
1378
1379int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1380{
1381
1382 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1383 goto drop;
1384
1385 return tcp_conn_request(&tcp_request_sock_ops,
1386 &tcp_request_sock_ipv4_ops, sk, skb);
1387
1388drop:
1389 tcp_listendrop(sk);
1390 return 0;
1391}
1392EXPORT_SYMBOL(tcp_v4_conn_request);
1393
1394
1395
1396
1397
1398
1399struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1400 struct request_sock *req,
1401 struct dst_entry *dst,
1402 struct request_sock *req_unhash,
1403 bool *own_req)
1404{
1405 struct inet_request_sock *ireq;
1406 struct inet_sock *newinet;
1407 struct tcp_sock *newtp;
1408 struct sock *newsk;
1409#ifdef CONFIG_TCP_MD5SIG
1410 struct tcp_md5sig_key *key;
1411#endif
1412 struct ip_options_rcu *inet_opt;
1413
1414 if (sk_acceptq_is_full(sk))
1415 goto exit_overflow;
1416
1417 newsk = tcp_create_openreq_child(sk, req, skb);
1418 if (!newsk)
1419 goto exit_nonewsk;
1420
1421 newsk->sk_gso_type = SKB_GSO_TCPV4;
1422 inet_sk_rx_dst_set(newsk, skb);
1423
1424 newtp = tcp_sk(newsk);
1425 newinet = inet_sk(newsk);
1426 ireq = inet_rsk(req);
1427 sk_daddr_set(newsk, ireq->ir_rmt_addr);
1428 sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
1429 newsk->sk_bound_dev_if = ireq->ir_iif;
1430 newinet->inet_saddr = ireq->ir_loc_addr;
1431 inet_opt = rcu_dereference(ireq->ireq_opt);
1432 RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
1433 newinet->mc_index = inet_iif(skb);
1434 newinet->mc_ttl = ip_hdr(skb)->ttl;
1435 newinet->rcv_tos = ip_hdr(skb)->tos;
1436 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1437 if (inet_opt)
1438 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1439 newinet->inet_id = newtp->write_seq ^ jiffies;
1440
1441 if (!dst) {
1442 dst = inet_csk_route_child_sock(sk, newsk, req);
1443 if (!dst)
1444 goto put_and_exit;
1445 } else {
1446
1447 }
1448 sk_setup_caps(newsk, dst);
1449
1450 tcp_ca_openreq_child(newsk, dst);
1451
1452 tcp_sync_mss(newsk, dst_mtu(dst));
1453 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1454
1455 tcp_initialize_rcv_mss(newsk);
1456
1457#ifdef CONFIG_TCP_MD5SIG
1458
1459 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1460 AF_INET);
1461 if (key) {
1462
1463
1464
1465
1466
1467
1468 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1469 AF_INET, 32, key->key, key->keylen, GFP_ATOMIC);
1470 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1471 }
1472#endif
1473
1474 if (__inet_inherit_port(sk, newsk) < 0)
1475 goto put_and_exit;
1476 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1477 if (likely(*own_req)) {
1478 tcp_move_syn(newtp, req);
1479 ireq->ireq_opt = NULL;
1480 } else {
1481 newinet->inet_opt = NULL;
1482 }
1483 return newsk;
1484
1485exit_overflow:
1486 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1487exit_nonewsk:
1488 dst_release(dst);
1489exit:
1490 tcp_listendrop(sk);
1491 return NULL;
1492put_and_exit:
1493 newinet->inet_opt = NULL;
1494 inet_csk_prepare_forced_close(newsk);
1495 tcp_done(newsk);
1496 goto exit;
1497}
1498EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1499
1500static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
1501{
1502#ifdef CONFIG_SYN_COOKIES
1503 const struct tcphdr *th = tcp_hdr(skb);
1504
1505 if (!th->syn)
1506 sk = cookie_v4_check(sk, skb);
1507#endif
1508 return sk;
1509}
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1520{
1521 struct sock *rsk;
1522
1523 if (sk->sk_state == TCP_ESTABLISHED) {
1524 struct dst_entry *dst = sk->sk_rx_dst;
1525
1526 sock_rps_save_rxhash(sk, skb);
1527 sk_mark_napi_id(sk, skb);
1528 if (dst) {
1529 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1530 !dst->ops->check(dst, 0)) {
1531 dst_release(dst);
1532 sk->sk_rx_dst = NULL;
1533 }
1534 }
1535 tcp_rcv_established(sk, skb);
1536 return 0;
1537 }
1538
1539 if (tcp_checksum_complete(skb))
1540 goto csum_err;
1541
1542 if (sk->sk_state == TCP_LISTEN) {
1543 struct sock *nsk = tcp_v4_cookie_check(sk, skb);
1544
1545 if (!nsk)
1546 goto discard;
1547 if (nsk != sk) {
1548 if (tcp_child_process(sk, nsk, skb)) {
1549 rsk = nsk;
1550 goto reset;
1551 }
1552 return 0;
1553 }
1554 } else
1555 sock_rps_save_rxhash(sk, skb);
1556
1557 if (tcp_rcv_state_process(sk, skb)) {
1558 rsk = sk;
1559 goto reset;
1560 }
1561 return 0;
1562
1563reset:
1564 tcp_v4_send_reset(rsk, skb);
1565discard:
1566 kfree_skb(skb);
1567
1568
1569
1570
1571
1572 return 0;
1573
1574csum_err:
1575 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1576 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1577 goto discard;
1578}
1579EXPORT_SYMBOL(tcp_v4_do_rcv);
1580
1581int tcp_v4_early_demux(struct sk_buff *skb)
1582{
1583 const struct iphdr *iph;
1584 const struct tcphdr *th;
1585 struct sock *sk;
1586
1587 if (skb->pkt_type != PACKET_HOST)
1588 return 0;
1589
1590 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1591 return 0;
1592
1593 iph = ip_hdr(skb);
1594 th = tcp_hdr(skb);
1595
1596 if (th->doff < sizeof(struct tcphdr) / 4)
1597 return 0;
1598
1599 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1600 iph->saddr, th->source,
1601 iph->daddr, ntohs(th->dest),
1602 skb->skb_iif, inet_sdif(skb));
1603 if (sk) {
1604 skb->sk = sk;
1605 skb->destructor = sock_edemux;
1606 if (sk_fullsock(sk)) {
1607 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1608
1609 if (dst)
1610 dst = dst_check(dst, 0);
1611 if (dst &&
1612 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1613 skb_dst_set_noref(skb, dst);
1614 }
1615 }
1616 return 0;
1617}
1618
1619bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
1620{
1621 u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
1622
1623
1624
1625
1626
1627 limit += 64*1024;
1628
1629
1630
1631
1632
1633
1634
1635 skb_condense(skb);
1636
1637 if (unlikely(sk_add_backlog(sk, skb, limit))) {
1638 bh_unlock_sock(sk);
1639 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
1640 return true;
1641 }
1642 return false;
1643}
1644EXPORT_SYMBOL(tcp_add_backlog);
1645
1646int tcp_filter(struct sock *sk, struct sk_buff *skb)
1647{
1648 struct tcphdr *th = (struct tcphdr *)skb->data;
1649
1650 return sk_filter_trim_cap(sk, skb, th->doff * 4);
1651}
1652EXPORT_SYMBOL(tcp_filter);
1653
1654static void tcp_v4_restore_cb(struct sk_buff *skb)
1655{
1656 memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4,
1657 sizeof(struct inet_skb_parm));
1658}
1659
1660static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
1661 const struct tcphdr *th)
1662{
1663
1664
1665
1666 memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1667 sizeof(struct inet_skb_parm));
1668 barrier();
1669
1670 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1671 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1672 skb->len - th->doff * 4);
1673 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1674 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1675 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1676 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1677 TCP_SKB_CB(skb)->sacked = 0;
1678 TCP_SKB_CB(skb)->has_rxtstamp =
1679 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1680}
1681
1682
1683
1684
1685
1686int tcp_v4_rcv(struct sk_buff *skb)
1687{
1688 struct net *net = dev_net(skb->dev);
1689 int sdif = inet_sdif(skb);
1690 const struct iphdr *iph;
1691 const struct tcphdr *th;
1692 bool refcounted;
1693 struct sock *sk;
1694 int ret;
1695
1696 if (skb->pkt_type != PACKET_HOST)
1697 goto discard_it;
1698
1699
1700 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1701
1702 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1703 goto discard_it;
1704
1705 th = (const struct tcphdr *)skb->data;
1706
1707 if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
1708 goto bad_packet;
1709 if (!pskb_may_pull(skb, th->doff * 4))
1710 goto discard_it;
1711
1712
1713
1714
1715
1716
1717 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
1718 goto csum_error;
1719
1720 th = (const struct tcphdr *)skb->data;
1721 iph = ip_hdr(skb);
1722lookup:
1723 sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
1724 th->dest, sdif, &refcounted);
1725 if (!sk)
1726 goto no_tcp_socket;
1727
1728process:
1729 if (sk->sk_state == TCP_TIME_WAIT)
1730 goto do_time_wait;
1731
1732 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1733 struct request_sock *req = inet_reqsk(sk);
1734 bool req_stolen = false;
1735 struct sock *nsk;
1736
1737 sk = req->rsk_listener;
1738 if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
1739 sk_drops_add(sk, skb);
1740 reqsk_put(req);
1741 goto discard_it;
1742 }
1743 if (tcp_checksum_complete(skb)) {
1744 reqsk_put(req);
1745 goto csum_error;
1746 }
1747 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1748 inet_csk_reqsk_queue_drop_and_put(sk, req);
1749 goto lookup;
1750 }
1751
1752
1753
1754 sock_hold(sk);
1755 refcounted = true;
1756 nsk = NULL;
1757 if (!tcp_filter(sk, skb)) {
1758 th = (const struct tcphdr *)skb->data;
1759 iph = ip_hdr(skb);
1760 tcp_v4_fill_cb(skb, iph, th);
1761 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1762 }
1763 if (!nsk) {
1764 reqsk_put(req);
1765 if (req_stolen) {
1766
1767
1768
1769
1770
1771 tcp_v4_restore_cb(skb);
1772 sock_put(sk);
1773 goto lookup;
1774 }
1775 goto discard_and_relse;
1776 }
1777 if (nsk == sk) {
1778 reqsk_put(req);
1779 tcp_v4_restore_cb(skb);
1780 } else if (tcp_child_process(sk, nsk, skb)) {
1781 tcp_v4_send_reset(nsk, skb);
1782 goto discard_and_relse;
1783 } else {
1784 sock_put(sk);
1785 return 0;
1786 }
1787 }
1788 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1789 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1790 goto discard_and_relse;
1791 }
1792
1793 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1794 goto discard_and_relse;
1795
1796 if (tcp_v4_inbound_md5_hash(sk, skb))
1797 goto discard_and_relse;
1798
1799 nf_reset(skb);
1800
1801 if (tcp_filter(sk, skb))
1802 goto discard_and_relse;
1803 th = (const struct tcphdr *)skb->data;
1804 iph = ip_hdr(skb);
1805 tcp_v4_fill_cb(skb, iph, th);
1806
1807 skb->dev = NULL;
1808
1809 if (sk->sk_state == TCP_LISTEN) {
1810 ret = tcp_v4_do_rcv(sk, skb);
1811 goto put_and_return;
1812 }
1813
1814 sk_incoming_cpu_update(sk);
1815
1816 bh_lock_sock_nested(sk);
1817 tcp_segs_in(tcp_sk(sk), skb);
1818 ret = 0;
1819 if (!sock_owned_by_user(sk)) {
1820 ret = tcp_v4_do_rcv(sk, skb);
1821 } else if (tcp_add_backlog(sk, skb)) {
1822 goto discard_and_relse;
1823 }
1824 bh_unlock_sock(sk);
1825
1826put_and_return:
1827 if (refcounted)
1828 sock_put(sk);
1829
1830 return ret;
1831
1832no_tcp_socket:
1833 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1834 goto discard_it;
1835
1836 tcp_v4_fill_cb(skb, iph, th);
1837
1838 if (tcp_checksum_complete(skb)) {
1839csum_error:
1840 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1841bad_packet:
1842 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1843 } else {
1844 tcp_v4_send_reset(NULL, skb);
1845 }
1846
1847discard_it:
1848
1849 kfree_skb(skb);
1850 return 0;
1851
1852discard_and_relse:
1853 sk_drops_add(sk, skb);
1854 if (refcounted)
1855 sock_put(sk);
1856 goto discard_it;
1857
1858do_time_wait:
1859 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1860 inet_twsk_put(inet_twsk(sk));
1861 goto discard_it;
1862 }
1863
1864 tcp_v4_fill_cb(skb, iph, th);
1865
1866 if (tcp_checksum_complete(skb)) {
1867 inet_twsk_put(inet_twsk(sk));
1868 goto csum_error;
1869 }
1870 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1871 case TCP_TW_SYN: {
1872 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1873 &tcp_hashinfo, skb,
1874 __tcp_hdrlen(th),
1875 iph->saddr, th->source,
1876 iph->daddr, th->dest,
1877 inet_iif(skb),
1878 sdif);
1879 if (sk2) {
1880 inet_twsk_deschedule_put(inet_twsk(sk));
1881 sk = sk2;
1882 tcp_v4_restore_cb(skb);
1883 refcounted = false;
1884 goto process;
1885 }
1886 }
1887
1888
1889 case TCP_TW_ACK:
1890 tcp_v4_timewait_ack(sk, skb);
1891 break;
1892 case TCP_TW_RST:
1893 tcp_v4_send_reset(sk, skb);
1894 inet_twsk_deschedule_put(inet_twsk(sk));
1895 goto discard_it;
1896 case TCP_TW_SUCCESS:;
1897 }
1898 goto discard_it;
1899}
1900
1901static struct timewait_sock_ops tcp_timewait_sock_ops = {
1902 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1903 .twsk_unique = tcp_twsk_unique,
1904 .twsk_destructor= tcp_twsk_destructor,
1905};
1906
1907void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
1908{
1909 struct dst_entry *dst = skb_dst(skb);
1910
1911 if (dst && dst_hold_safe(dst)) {
1912 sk->sk_rx_dst = dst;
1913 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1914 }
1915}
1916EXPORT_SYMBOL(inet_sk_rx_dst_set);
1917
1918const struct inet_connection_sock_af_ops ipv4_specific = {
1919 .queue_xmit = ip_queue_xmit,
1920 .send_check = tcp_v4_send_check,
1921 .rebuild_header = inet_sk_rebuild_header,
1922 .sk_rx_dst_set = inet_sk_rx_dst_set,
1923 .conn_request = tcp_v4_conn_request,
1924 .syn_recv_sock = tcp_v4_syn_recv_sock,
1925 .net_header_len = sizeof(struct iphdr),
1926 .setsockopt = ip_setsockopt,
1927 .getsockopt = ip_getsockopt,
1928 .addr2sockaddr = inet_csk_addr2sockaddr,
1929 .sockaddr_len = sizeof(struct sockaddr_in),
1930#ifdef CONFIG_COMPAT
1931 .compat_setsockopt = compat_ip_setsockopt,
1932 .compat_getsockopt = compat_ip_getsockopt,
1933#endif
1934 .mtu_reduced = tcp_v4_mtu_reduced,
1935};
1936EXPORT_SYMBOL(ipv4_specific);
1937
1938#ifdef CONFIG_TCP_MD5SIG
1939static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1940 .md5_lookup = tcp_v4_md5_lookup,
1941 .calc_md5_hash = tcp_v4_md5_hash_skb,
1942 .md5_parse = tcp_v4_parse_md5_keys,
1943};
1944#endif
1945
1946
1947
1948
1949static int tcp_v4_init_sock(struct sock *sk)
1950{
1951 struct inet_connection_sock *icsk = inet_csk(sk);
1952
1953 tcp_init_sock(sk);
1954
1955 icsk->icsk_af_ops = &ipv4_specific;
1956
1957#ifdef CONFIG_TCP_MD5SIG
1958 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1959#endif
1960
1961 return 0;
1962}
1963
1964void tcp_v4_destroy_sock(struct sock *sk)
1965{
1966 struct tcp_sock *tp = tcp_sk(sk);
1967
1968 trace_tcp_destroy_sock(sk);
1969
1970 tcp_clear_xmit_timers(sk);
1971
1972 tcp_cleanup_congestion_control(sk);
1973
1974 tcp_cleanup_ulp(sk);
1975
1976
1977 tcp_write_queue_purge(sk);
1978
1979
1980 tcp_fastopen_active_disable_ofo_check(sk);
1981
1982
1983 skb_rbtree_purge(&tp->out_of_order_queue);
1984
1985#ifdef CONFIG_TCP_MD5SIG
1986
1987 if (tp->md5sig_info) {
1988 tcp_clear_md5_list(sk);
1989 kfree_rcu(rcu_dereference_protected(tp->md5sig_info, 1), rcu);
1990 tp->md5sig_info = NULL;
1991 }
1992#endif
1993
1994
1995 if (inet_csk(sk)->icsk_bind_hash)
1996 inet_put_port(sk);
1997
1998 BUG_ON(tp->fastopen_rsk);
1999
2000
2001 tcp_free_fastopen_req(tp);
2002 tcp_fastopen_destroy_cipher(sk);
2003 tcp_saved_syn_free(tp);
2004
2005 sk_sockets_allocated_dec(sk);
2006}
2007EXPORT_SYMBOL(tcp_v4_destroy_sock);
2008
2009#ifdef CONFIG_PROC_FS
2010
2011
2012
2013
2014
2015
2016
2017static void *listening_get_next(struct seq_file *seq, void *cur)
2018{
2019 struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
2020 struct tcp_iter_state *st = seq->private;
2021 struct net *net = seq_file_net(seq);
2022 struct inet_listen_hashbucket *ilb;
2023 struct sock *sk = cur;
2024
2025 if (!sk) {
2026get_head:
2027 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2028 spin_lock(&ilb->lock);
2029 sk = sk_head(&ilb->head);
2030 st->offset = 0;
2031 goto get_sk;
2032 }
2033 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2034 ++st->num;
2035 ++st->offset;
2036
2037 sk = sk_next(sk);
2038get_sk:
2039 sk_for_each_from(sk) {
2040 if (!net_eq(sock_net(sk), net))
2041 continue;
2042 if (sk->sk_family == afinfo->family)
2043 return sk;
2044 }
2045 spin_unlock(&ilb->lock);
2046 st->offset = 0;
2047 if (++st->bucket < INET_LHTABLE_SIZE)
2048 goto get_head;
2049 return NULL;
2050}
2051
2052static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2053{
2054 struct tcp_iter_state *st = seq->private;
2055 void *rc;
2056
2057 st->bucket = 0;
2058 st->offset = 0;
2059 rc = listening_get_next(seq, NULL);
2060
2061 while (rc && *pos) {
2062 rc = listening_get_next(seq, rc);
2063 --*pos;
2064 }
2065 return rc;
2066}
2067
2068static inline bool empty_bucket(const struct tcp_iter_state *st)
2069{
2070 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
2071}
2072
2073
2074
2075
2076
2077static void *established_get_first(struct seq_file *seq)
2078{
2079 struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
2080 struct tcp_iter_state *st = seq->private;
2081 struct net *net = seq_file_net(seq);
2082 void *rc = NULL;
2083
2084 st->offset = 0;
2085 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2086 struct sock *sk;
2087 struct hlist_nulls_node *node;
2088 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2089
2090
2091 if (empty_bucket(st))
2092 continue;
2093
2094 spin_lock_bh(lock);
2095 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2096 if (sk->sk_family != afinfo->family ||
2097 !net_eq(sock_net(sk), net)) {
2098 continue;
2099 }
2100 rc = sk;
2101 goto out;
2102 }
2103 spin_unlock_bh(lock);
2104 }
2105out:
2106 return rc;
2107}
2108
2109static void *established_get_next(struct seq_file *seq, void *cur)
2110{
2111 struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
2112 struct sock *sk = cur;
2113 struct hlist_nulls_node *node;
2114 struct tcp_iter_state *st = seq->private;
2115 struct net *net = seq_file_net(seq);
2116
2117 ++st->num;
2118 ++st->offset;
2119
2120 sk = sk_nulls_next(sk);
2121
2122 sk_nulls_for_each_from(sk, node) {
2123 if (sk->sk_family == afinfo->family &&
2124 net_eq(sock_net(sk), net))
2125 return sk;
2126 }
2127
2128 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2129 ++st->bucket;
2130 return established_get_first(seq);
2131}
2132
2133static void *established_get_idx(struct seq_file *seq, loff_t pos)
2134{
2135 struct tcp_iter_state *st = seq->private;
2136 void *rc;
2137
2138 st->bucket = 0;
2139 rc = established_get_first(seq);
2140
2141 while (rc && pos) {
2142 rc = established_get_next(seq, rc);
2143 --pos;
2144 }
2145 return rc;
2146}
2147
2148static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2149{
2150 void *rc;
2151 struct tcp_iter_state *st = seq->private;
2152
2153 st->state = TCP_SEQ_STATE_LISTENING;
2154 rc = listening_get_idx(seq, &pos);
2155
2156 if (!rc) {
2157 st->state = TCP_SEQ_STATE_ESTABLISHED;
2158 rc = established_get_idx(seq, pos);
2159 }
2160
2161 return rc;
2162}
2163
2164static void *tcp_seek_last_pos(struct seq_file *seq)
2165{
2166 struct tcp_iter_state *st = seq->private;
2167 int offset = st->offset;
2168 int orig_num = st->num;
2169 void *rc = NULL;
2170
2171 switch (st->state) {
2172 case TCP_SEQ_STATE_LISTENING:
2173 if (st->bucket >= INET_LHTABLE_SIZE)
2174 break;
2175 st->state = TCP_SEQ_STATE_LISTENING;
2176 rc = listening_get_next(seq, NULL);
2177 while (offset-- && rc)
2178 rc = listening_get_next(seq, rc);
2179 if (rc)
2180 break;
2181 st->bucket = 0;
2182 st->state = TCP_SEQ_STATE_ESTABLISHED;
2183
2184 case TCP_SEQ_STATE_ESTABLISHED:
2185 if (st->bucket > tcp_hashinfo.ehash_mask)
2186 break;
2187 rc = established_get_first(seq);
2188 while (offset-- && rc)
2189 rc = established_get_next(seq, rc);
2190 }
2191
2192 st->num = orig_num;
2193
2194 return rc;
2195}
2196
2197void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2198{
2199 struct tcp_iter_state *st = seq->private;
2200 void *rc;
2201
2202 if (*pos && *pos == st->last_pos) {
2203 rc = tcp_seek_last_pos(seq);
2204 if (rc)
2205 goto out;
2206 }
2207
2208 st->state = TCP_SEQ_STATE_LISTENING;
2209 st->num = 0;
2210 st->bucket = 0;
2211 st->offset = 0;
2212 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2213
2214out:
2215 st->last_pos = *pos;
2216 return rc;
2217}
2218EXPORT_SYMBOL(tcp_seq_start);
2219
2220void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2221{
2222 struct tcp_iter_state *st = seq->private;
2223 void *rc = NULL;
2224
2225 if (v == SEQ_START_TOKEN) {
2226 rc = tcp_get_idx(seq, 0);
2227 goto out;
2228 }
2229
2230 switch (st->state) {
2231 case TCP_SEQ_STATE_LISTENING:
2232 rc = listening_get_next(seq, v);
2233 if (!rc) {
2234 st->state = TCP_SEQ_STATE_ESTABLISHED;
2235 st->bucket = 0;
2236 st->offset = 0;
2237 rc = established_get_first(seq);
2238 }
2239 break;
2240 case TCP_SEQ_STATE_ESTABLISHED:
2241 rc = established_get_next(seq, v);
2242 break;
2243 }
2244out:
2245 ++*pos;
2246 st->last_pos = *pos;
2247 return rc;
2248}
2249EXPORT_SYMBOL(tcp_seq_next);
2250
2251void tcp_seq_stop(struct seq_file *seq, void *v)
2252{
2253 struct tcp_iter_state *st = seq->private;
2254
2255 switch (st->state) {
2256 case TCP_SEQ_STATE_LISTENING:
2257 if (v != SEQ_START_TOKEN)
2258 spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
2259 break;
2260 case TCP_SEQ_STATE_ESTABLISHED:
2261 if (v)
2262 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2263 break;
2264 }
2265}
2266EXPORT_SYMBOL(tcp_seq_stop);
2267
2268static void get_openreq4(const struct request_sock *req,
2269 struct seq_file *f, int i)
2270{
2271 const struct inet_request_sock *ireq = inet_rsk(req);
2272 long delta = req->rsk_timer.expires - jiffies;
2273
2274 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2275 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
2276 i,
2277 ireq->ir_loc_addr,
2278 ireq->ir_num,
2279 ireq->ir_rmt_addr,
2280 ntohs(ireq->ir_rmt_port),
2281 TCP_SYN_RECV,
2282 0, 0,
2283 1,
2284 jiffies_delta_to_clock_t(delta),
2285 req->num_timeout,
2286 from_kuid_munged(seq_user_ns(f),
2287 sock_i_uid(req->rsk_listener)),
2288 0,
2289 0,
2290 0,
2291 req);
2292}
2293
2294static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2295{
2296 int timer_active;
2297 unsigned long timer_expires;
2298 const struct tcp_sock *tp = tcp_sk(sk);
2299 const struct inet_connection_sock *icsk = inet_csk(sk);
2300 const struct inet_sock *inet = inet_sk(sk);
2301 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2302 __be32 dest = inet->inet_daddr;
2303 __be32 src = inet->inet_rcv_saddr;
2304 __u16 destp = ntohs(inet->inet_dport);
2305 __u16 srcp = ntohs(inet->inet_sport);
2306 int rx_queue;
2307 int state;
2308
2309 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2310 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2311 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2312 timer_active = 1;
2313 timer_expires = icsk->icsk_timeout;
2314 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2315 timer_active = 4;
2316 timer_expires = icsk->icsk_timeout;
2317 } else if (timer_pending(&sk->sk_timer)) {
2318 timer_active = 2;
2319 timer_expires = sk->sk_timer.expires;
2320 } else {
2321 timer_active = 0;
2322 timer_expires = jiffies;
2323 }
2324
2325 state = inet_sk_state_load(sk);
2326 if (state == TCP_LISTEN)
2327 rx_queue = sk->sk_ack_backlog;
2328 else
2329
2330
2331
2332 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2333
2334 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2335 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
2336 i, src, srcp, dest, destp, state,
2337 tp->write_seq - tp->snd_una,
2338 rx_queue,
2339 timer_active,
2340 jiffies_delta_to_clock_t(timer_expires - jiffies),
2341 icsk->icsk_retransmits,
2342 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
2343 icsk->icsk_probes_out,
2344 sock_i_ino(sk),
2345 refcount_read(&sk->sk_refcnt), sk,
2346 jiffies_to_clock_t(icsk->icsk_rto),
2347 jiffies_to_clock_t(icsk->icsk_ack.ato),
2348 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2349 tp->snd_cwnd,
2350 state == TCP_LISTEN ?
2351 fastopenq->max_qlen :
2352 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
2353}
2354
2355static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2356 struct seq_file *f, int i)
2357{
2358 long delta = tw->tw_timer.expires - jiffies;
2359 __be32 dest, src;
2360 __u16 destp, srcp;
2361
2362 dest = tw->tw_daddr;
2363 src = tw->tw_rcv_saddr;
2364 destp = ntohs(tw->tw_dport);
2365 srcp = ntohs(tw->tw_sport);
2366
2367 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2368 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
2369 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2370 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2371 refcount_read(&tw->tw_refcnt), tw);
2372}
2373
2374#define TMPSZ 150
2375
2376static int tcp4_seq_show(struct seq_file *seq, void *v)
2377{
2378 struct tcp_iter_state *st;
2379 struct sock *sk = v;
2380
2381 seq_setwidth(seq, TMPSZ - 1);
2382 if (v == SEQ_START_TOKEN) {
2383 seq_puts(seq, " sl local_address rem_address st tx_queue "
2384 "rx_queue tr tm->when retrnsmt uid timeout "
2385 "inode");
2386 goto out;
2387 }
2388 st = seq->private;
2389
2390 if (sk->sk_state == TCP_TIME_WAIT)
2391 get_timewait4_sock(v, seq, st->num);
2392 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2393 get_openreq4(v, seq, st->num);
2394 else
2395 get_tcp4_sock(v, seq, st->num);
2396out:
2397 seq_pad(seq, '\n');
2398 return 0;
2399}
2400
2401static const struct seq_operations tcp4_seq_ops = {
2402 .show = tcp4_seq_show,
2403 .start = tcp_seq_start,
2404 .next = tcp_seq_next,
2405 .stop = tcp_seq_stop,
2406};
2407
2408static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2409 .family = AF_INET,
2410};
2411
2412static int __net_init tcp4_proc_init_net(struct net *net)
2413{
2414 if (!proc_create_net_data("tcp", 0444, net->proc_net, &tcp4_seq_ops,
2415 sizeof(struct tcp_iter_state), &tcp4_seq_afinfo))
2416 return -ENOMEM;
2417 return 0;
2418}
2419
2420static void __net_exit tcp4_proc_exit_net(struct net *net)
2421{
2422 remove_proc_entry("tcp", net->proc_net);
2423}
2424
2425static struct pernet_operations tcp4_net_ops = {
2426 .init = tcp4_proc_init_net,
2427 .exit = tcp4_proc_exit_net,
2428};
2429
2430int __init tcp4_proc_init(void)
2431{
2432 return register_pernet_subsys(&tcp4_net_ops);
2433}
2434
2435void tcp4_proc_exit(void)
2436{
2437 unregister_pernet_subsys(&tcp4_net_ops);
2438}
2439#endif
2440
2441struct proto tcp_prot = {
2442 .name = "TCP",
2443 .owner = THIS_MODULE,
2444 .close = tcp_close,
2445 .pre_connect = tcp_v4_pre_connect,
2446 .connect = tcp_v4_connect,
2447 .disconnect = tcp_disconnect,
2448 .accept = inet_csk_accept,
2449 .ioctl = tcp_ioctl,
2450 .init = tcp_v4_init_sock,
2451 .destroy = tcp_v4_destroy_sock,
2452 .shutdown = tcp_shutdown,
2453 .setsockopt = tcp_setsockopt,
2454 .getsockopt = tcp_getsockopt,
2455 .keepalive = tcp_set_keepalive,
2456 .recvmsg = tcp_recvmsg,
2457 .sendmsg = tcp_sendmsg,
2458 .sendpage = tcp_sendpage,
2459 .backlog_rcv = tcp_v4_do_rcv,
2460 .release_cb = tcp_release_cb,
2461 .hash = inet_hash,
2462 .unhash = inet_unhash,
2463 .get_port = inet_csk_get_port,
2464 .enter_memory_pressure = tcp_enter_memory_pressure,
2465 .leave_memory_pressure = tcp_leave_memory_pressure,
2466 .stream_memory_free = tcp_stream_memory_free,
2467 .sockets_allocated = &tcp_sockets_allocated,
2468 .orphan_count = &tcp_orphan_count,
2469 .memory_allocated = &tcp_memory_allocated,
2470 .memory_pressure = &tcp_memory_pressure,
2471 .sysctl_mem = sysctl_tcp_mem,
2472 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2473 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2474 .max_header = MAX_TCP_HEADER,
2475 .obj_size = sizeof(struct tcp_sock),
2476 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2477 .twsk_prot = &tcp_timewait_sock_ops,
2478 .rsk_prot = &tcp_request_sock_ops,
2479 .h.hashinfo = &tcp_hashinfo,
2480 .no_autobind = true,
2481#ifdef CONFIG_COMPAT
2482 .compat_setsockopt = compat_tcp_setsockopt,
2483 .compat_getsockopt = compat_tcp_getsockopt,
2484#endif
2485 .diag_destroy = tcp_abort,
2486};
2487EXPORT_SYMBOL(tcp_prot);
2488
2489static void __net_exit tcp_sk_exit(struct net *net)
2490{
2491 int cpu;
2492
2493 if (net->ipv4.tcp_congestion_control)
2494 module_put(net->ipv4.tcp_congestion_control->owner);
2495
2496 for_each_possible_cpu(cpu)
2497 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2498 free_percpu(net->ipv4.tcp_sk);
2499}
2500
2501static int __net_init tcp_sk_init(struct net *net)
2502{
2503 int res, cpu, cnt;
2504
2505 net->ipv4.tcp_sk = alloc_percpu(struct sock *);
2506 if (!net->ipv4.tcp_sk)
2507 return -ENOMEM;
2508
2509 for_each_possible_cpu(cpu) {
2510 struct sock *sk;
2511
2512 res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
2513 IPPROTO_TCP, net);
2514 if (res)
2515 goto fail;
2516 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2517
2518
2519
2520
2521 inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
2522
2523 *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2524 }
2525
2526 net->ipv4.sysctl_tcp_ecn = 2;
2527 net->ipv4.sysctl_tcp_ecn_fallback = 1;
2528
2529 net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
2530 net->ipv4_sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS;
2531 net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
2532 net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
2533
2534 net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
2535 net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
2536 net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
2537
2538 net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
2539 net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
2540 net->ipv4.sysctl_tcp_syncookies = 1;
2541 net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
2542 net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
2543 net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
2544 net->ipv4.sysctl_tcp_orphan_retries = 0;
2545 net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
2546 net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
2547 net->ipv4.sysctl_tcp_tw_reuse = 2;
2548
2549 cnt = tcp_hashinfo.ehash_mask + 1;
2550 net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2;
2551 net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
2552
2553 net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256);
2554 net->ipv4.sysctl_tcp_sack = 1;
2555 net->ipv4.sysctl_tcp_window_scaling = 1;
2556 net->ipv4.sysctl_tcp_timestamps = 1;
2557 net->ipv4.sysctl_tcp_early_retrans = 3;
2558 net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION;
2559 net->ipv4.sysctl_tcp_slow_start_after_idle = 1;
2560 net->ipv4.sysctl_tcp_retrans_collapse = 1;
2561 net->ipv4.sysctl_tcp_max_reordering = 300;
2562 net->ipv4.sysctl_tcp_dsack = 1;
2563 net->ipv4.sysctl_tcp_app_win = 31;
2564 net->ipv4.sysctl_tcp_adv_win_scale = 1;
2565 net->ipv4.sysctl_tcp_frto = 2;
2566 net->ipv4.sysctl_tcp_moderate_rcvbuf = 1;
2567
2568
2569
2570
2571 net->ipv4.sysctl_tcp_tso_win_divisor = 3;
2572
2573 net->ipv4.sysctl_tcp_limit_output_bytes = 262144;
2574
2575 net->ipv4.sysctl_tcp_challenge_ack_limit = 1000;
2576 net->ipv4.sysctl_tcp_min_tso_segs = 2;
2577 net->ipv4.sysctl_tcp_min_rtt_wlen = 300;
2578 net->ipv4.sysctl_tcp_autocorking = 1;
2579 net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2;
2580 net->ipv4.sysctl_tcp_pacing_ss_ratio = 200;
2581 net->ipv4.sysctl_tcp_pacing_ca_ratio = 120;
2582 if (net != &init_net) {
2583 memcpy(net->ipv4.sysctl_tcp_rmem,
2584 init_net.ipv4.sysctl_tcp_rmem,
2585 sizeof(init_net.ipv4.sysctl_tcp_rmem));
2586 memcpy(net->ipv4.sysctl_tcp_wmem,
2587 init_net.ipv4.sysctl_tcp_wmem,
2588 sizeof(init_net.ipv4.sysctl_tcp_wmem));
2589 }
2590 net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC;
2591 net->ipv4.sysctl_tcp_comp_sack_nr = 44;
2592 net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
2593 spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
2594 net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
2595 atomic_set(&net->ipv4.tfo_active_disable_times, 0);
2596
2597
2598 if (!net_eq(net, &init_net) &&
2599 try_module_get(init_net.ipv4.tcp_congestion_control->owner))
2600 net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control;
2601 else
2602 net->ipv4.tcp_congestion_control = &tcp_reno;
2603
2604 return 0;
2605fail:
2606 tcp_sk_exit(net);
2607
2608 return res;
2609}
2610
2611static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2612{
2613 struct net *net;
2614
2615 inet_twsk_purge(&tcp_hashinfo, AF_INET);
2616
2617 list_for_each_entry(net, net_exit_list, exit_list)
2618 tcp_fastopen_ctx_destroy(net);
2619}
2620
2621static struct pernet_operations __net_initdata tcp_sk_ops = {
2622 .init = tcp_sk_init,
2623 .exit = tcp_sk_exit,
2624 .exit_batch = tcp_sk_exit_batch,
2625};
2626
2627void __init tcp_v4_init(void)
2628{
2629 if (register_pernet_subsys(&tcp_sk_ops))
2630 panic("Failed to create the TCP control socket.\n");
2631}
2632