1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53#define pr_fmt(fmt) "TCP: " fmt
54
55#include <linux/bottom_half.h>
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
64#include <linux/slab.h>
65
66#include <net/net_namespace.h>
67#include <net/icmp.h>
68#include <net/inet_hashtables.h>
69#include <net/tcp.h>
70#include <net/transp_v6.h>
71#include <net/ipv6.h>
72#include <net/inet_common.h>
73#include <net/timewait_sock.h>
74#include <net/xfrm.h>
75#include <net/secure_seq.h>
76#include <net/busy_poll.h>
77
78#include <linux/inet.h>
79#include <linux/ipv6.h>
80#include <linux/stddef.h>
81#include <linux/proc_fs.h>
82#include <linux/seq_file.h>
83#include <linux/inetdevice.h>
84
85#include <crypto/hash.h>
86#include <linux/scatterlist.h>
87
88#include <trace/events/tcp.h>
89
90#ifdef CONFIG_TCP_MD5SIG
91static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
92 __be32 daddr, __be32 saddr, const struct tcphdr *th);
93#endif
94
95struct inet_hashinfo tcp_hashinfo;
96EXPORT_SYMBOL(tcp_hashinfo);
97
98static u32 tcp_v4_init_seq(const struct sk_buff *skb)
99{
100 return secure_tcp_seq(ip_hdr(skb)->daddr,
101 ip_hdr(skb)->saddr,
102 tcp_hdr(skb)->dest,
103 tcp_hdr(skb)->source);
104}
105
106static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
107{
108 return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
109}
110
111int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
112{
113 const struct inet_timewait_sock *tw = inet_twsk(sktw);
114 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
115 struct tcp_sock *tp = tcp_sk(sk);
116 int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse;
117
118 if (reuse == 2) {
119
120
121
122
123 bool loopback = false;
124 if (tw->tw_bound_dev_if == LOOPBACK_IFINDEX)
125 loopback = true;
126#if IS_ENABLED(CONFIG_IPV6)
127 if (tw->tw_family == AF_INET6) {
128 if (ipv6_addr_loopback(&tw->tw_v6_daddr) ||
129 (ipv6_addr_v4mapped(&tw->tw_v6_daddr) &&
130 (tw->tw_v6_daddr.s6_addr[12] == 127)) ||
131 ipv6_addr_loopback(&tw->tw_v6_rcv_saddr) ||
132 (ipv6_addr_v4mapped(&tw->tw_v6_rcv_saddr) &&
133 (tw->tw_v6_rcv_saddr.s6_addr[12] == 127)))
134 loopback = true;
135 } else
136#endif
137 {
138 if (ipv4_is_loopback(tw->tw_daddr) ||
139 ipv4_is_loopback(tw->tw_rcv_saddr))
140 loopback = true;
141 }
142 if (!loopback)
143 reuse = 0;
144 }
145
146
147
148
149
150
151
152
153
154
155
156
157 if (tcptw->tw_ts_recent_stamp &&
158 (!twp || (reuse && time_after32(ktime_get_seconds(),
159 tcptw->tw_ts_recent_stamp)))) {
160
161
162
163
164
165
166
167
168
169
170
171 if (likely(!tp->repair)) {
172 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
173 if (tp->write_seq == 0)
174 tp->write_seq = 1;
175 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
176 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
177 }
178 sock_hold(sktw);
179 return 1;
180 }
181
182 return 0;
183}
184EXPORT_SYMBOL_GPL(tcp_twsk_unique);
185
186static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
187 int addr_len)
188{
189
190
191
192
193 if (addr_len < sizeof(struct sockaddr_in))
194 return -EINVAL;
195
196 sock_owned_by_me(sk);
197
198 return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr);
199}
200
201
202int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
203{
204 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
205 struct inet_sock *inet = inet_sk(sk);
206 struct tcp_sock *tp = tcp_sk(sk);
207 __be16 orig_sport, orig_dport;
208 __be32 daddr, nexthop;
209 struct flowi4 *fl4;
210 struct rtable *rt;
211 int err;
212 struct ip_options_rcu *inet_opt;
213 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
214
215 if (addr_len < sizeof(struct sockaddr_in))
216 return -EINVAL;
217
218 if (usin->sin_family != AF_INET)
219 return -EAFNOSUPPORT;
220
221 nexthop = daddr = usin->sin_addr.s_addr;
222 inet_opt = rcu_dereference_protected(inet->inet_opt,
223 lockdep_sock_is_held(sk));
224 if (inet_opt && inet_opt->opt.srr) {
225 if (!daddr)
226 return -EINVAL;
227 nexthop = inet_opt->opt.faddr;
228 }
229
230 orig_sport = inet->inet_sport;
231 orig_dport = usin->sin_port;
232 fl4 = &inet->cork.fl.u.ip4;
233 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
234 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
235 IPPROTO_TCP,
236 orig_sport, orig_dport, sk);
237 if (IS_ERR(rt)) {
238 err = PTR_ERR(rt);
239 if (err == -ENETUNREACH)
240 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
241 return err;
242 }
243
244 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
245 ip_rt_put(rt);
246 return -ENETUNREACH;
247 }
248
249 if (!inet_opt || !inet_opt->opt.srr)
250 daddr = fl4->daddr;
251
252 if (!inet->inet_saddr)
253 inet->inet_saddr = fl4->saddr;
254 sk_rcv_saddr_set(sk, inet->inet_saddr);
255
256 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
257
258 tp->rx_opt.ts_recent = 0;
259 tp->rx_opt.ts_recent_stamp = 0;
260 if (likely(!tp->repair))
261 tp->write_seq = 0;
262 }
263
264 inet->inet_dport = usin->sin_port;
265 sk_daddr_set(sk, daddr);
266
267 inet_csk(sk)->icsk_ext_hdr_len = 0;
268 if (inet_opt)
269 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
270
271 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
272
273
274
275
276
277
278 tcp_set_state(sk, TCP_SYN_SENT);
279 err = inet_hash_connect(tcp_death_row, sk);
280 if (err)
281 goto failure;
282
283 sk_set_txhash(sk);
284
285 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
286 inet->inet_sport, inet->inet_dport, sk);
287 if (IS_ERR(rt)) {
288 err = PTR_ERR(rt);
289 rt = NULL;
290 goto failure;
291 }
292
293 sk->sk_gso_type = SKB_GSO_TCPV4;
294 sk_setup_caps(sk, &rt->dst);
295 rt = NULL;
296
297 if (likely(!tp->repair)) {
298 if (!tp->write_seq)
299 tp->write_seq = secure_tcp_seq(inet->inet_saddr,
300 inet->inet_daddr,
301 inet->inet_sport,
302 usin->sin_port);
303 tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
304 inet->inet_saddr,
305 inet->inet_daddr);
306 }
307
308 inet->inet_id = tp->write_seq ^ jiffies;
309
310 if (tcp_fastopen_defer_connect(sk, &err))
311 return err;
312 if (err)
313 goto failure;
314
315 err = tcp_connect(sk);
316
317 if (err)
318 goto failure;
319
320 return 0;
321
322failure:
323
324
325
326
327 tcp_set_state(sk, TCP_CLOSE);
328 ip_rt_put(rt);
329 sk->sk_route_caps = 0;
330 inet->inet_dport = 0;
331 return err;
332}
333EXPORT_SYMBOL(tcp_v4_connect);
334
335
336
337
338
339
340void tcp_v4_mtu_reduced(struct sock *sk)
341{
342 struct inet_sock *inet = inet_sk(sk);
343 struct dst_entry *dst;
344 u32 mtu;
345
346 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
347 return;
348 mtu = tcp_sk(sk)->mtu_info;
349 dst = inet_csk_update_pmtu(sk, mtu);
350 if (!dst)
351 return;
352
353
354
355
356 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
357 sk->sk_err_soft = EMSGSIZE;
358
359 mtu = dst_mtu(dst);
360
361 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
362 ip_sk_accept_pmtu(sk) &&
363 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
364 tcp_sync_mss(sk, mtu);
365
366
367
368
369
370
371 tcp_simple_retransmit(sk);
372 }
373}
374EXPORT_SYMBOL(tcp_v4_mtu_reduced);
375
376static void do_redirect(struct sk_buff *skb, struct sock *sk)
377{
378 struct dst_entry *dst = __sk_dst_check(sk, 0);
379
380 if (dst)
381 dst->ops->redirect(dst, sk, skb);
382}
383
384
385
386void tcp_req_err(struct sock *sk, u32 seq, bool abort)
387{
388 struct request_sock *req = inet_reqsk(sk);
389 struct net *net = sock_net(sk);
390
391
392
393
394 if (seq != tcp_rsk(req)->snt_isn) {
395 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
396 } else if (abort) {
397
398
399
400
401
402
403 inet_csk_reqsk_queue_drop(req->rsk_listener, req);
404 tcp_listendrop(req->rsk_listener);
405 }
406 reqsk_put(req);
407}
408EXPORT_SYMBOL(tcp_req_err);
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
427{
428 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
429 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
430 struct inet_connection_sock *icsk;
431 struct tcp_sock *tp;
432 struct inet_sock *inet;
433 const int type = icmp_hdr(icmp_skb)->type;
434 const int code = icmp_hdr(icmp_skb)->code;
435 struct sock *sk;
436 struct sk_buff *skb;
437 struct request_sock *fastopen;
438 u32 seq, snd_una;
439 s32 remaining;
440 u32 delta_us;
441 int err;
442 struct net *net = dev_net(icmp_skb->dev);
443
444 sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
445 th->dest, iph->saddr, ntohs(th->source),
446 inet_iif(icmp_skb), 0);
447 if (!sk) {
448 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
449 return;
450 }
451 if (sk->sk_state == TCP_TIME_WAIT) {
452 inet_twsk_put(inet_twsk(sk));
453 return;
454 }
455 seq = ntohl(th->seq);
456 if (sk->sk_state == TCP_NEW_SYN_RECV)
457 return tcp_req_err(sk, seq,
458 type == ICMP_PARAMETERPROB ||
459 type == ICMP_TIME_EXCEEDED ||
460 (type == ICMP_DEST_UNREACH &&
461 (code == ICMP_NET_UNREACH ||
462 code == ICMP_HOST_UNREACH)));
463
464 bh_lock_sock(sk);
465
466
467
468
469
470 if (sock_owned_by_user(sk)) {
471 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
472 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
473 }
474 if (sk->sk_state == TCP_CLOSE)
475 goto out;
476
477 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
478 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
479 goto out;
480 }
481
482 icsk = inet_csk(sk);
483 tp = tcp_sk(sk);
484
485 fastopen = tp->fastopen_rsk;
486 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
487 if (sk->sk_state != TCP_LISTEN &&
488 !between(seq, snd_una, tp->snd_nxt)) {
489 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
490 goto out;
491 }
492
493 switch (type) {
494 case ICMP_REDIRECT:
495 if (!sock_owned_by_user(sk))
496 do_redirect(icmp_skb, sk);
497 goto out;
498 case ICMP_SOURCE_QUENCH:
499
500 goto out;
501 case ICMP_PARAMETERPROB:
502 err = EPROTO;
503 break;
504 case ICMP_DEST_UNREACH:
505 if (code > NR_ICMP_UNREACH)
506 goto out;
507
508 if (code == ICMP_FRAG_NEEDED) {
509
510
511
512
513 if (sk->sk_state == TCP_LISTEN)
514 goto out;
515
516 tp->mtu_info = info;
517 if (!sock_owned_by_user(sk)) {
518 tcp_v4_mtu_reduced(sk);
519 } else {
520 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
521 sock_hold(sk);
522 }
523 goto out;
524 }
525
526 err = icmp_err_convert[code].errno;
527
528
529 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
530 break;
531 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
532 !icsk->icsk_backoff || fastopen)
533 break;
534
535 if (sock_owned_by_user(sk))
536 break;
537
538 icsk->icsk_backoff--;
539 icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
540 TCP_TIMEOUT_INIT;
541 icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
542
543 skb = tcp_rtx_queue_head(sk);
544 BUG_ON(!skb);
545
546 tcp_mstamp_refresh(tp);
547 delta_us = (u32)(tp->tcp_mstamp - skb->skb_mstamp);
548 remaining = icsk->icsk_rto -
549 usecs_to_jiffies(delta_us);
550
551 if (remaining > 0) {
552 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
553 remaining, TCP_RTO_MAX);
554 } else {
555
556
557 tcp_retransmit_timer(sk);
558 }
559
560 break;
561 case ICMP_TIME_EXCEEDED:
562 err = EHOSTUNREACH;
563 break;
564 default:
565 goto out;
566 }
567
568 switch (sk->sk_state) {
569 case TCP_SYN_SENT:
570 case TCP_SYN_RECV:
571
572
573
574 if (fastopen && !fastopen->sk)
575 break;
576
577 if (!sock_owned_by_user(sk)) {
578 sk->sk_err = err;
579
580 sk->sk_error_report(sk);
581
582 tcp_done(sk);
583 } else {
584 sk->sk_err_soft = err;
585 }
586 goto out;
587 }
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605 inet = inet_sk(sk);
606 if (!sock_owned_by_user(sk) && inet->recverr) {
607 sk->sk_err = err;
608 sk->sk_error_report(sk);
609 } else {
610 sk->sk_err_soft = err;
611 }
612
613out:
614 bh_unlock_sock(sk);
615 sock_put(sk);
616}
617
618void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
619{
620 struct tcphdr *th = tcp_hdr(skb);
621
622 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
623 skb->csum_start = skb_transport_header(skb) - skb->head;
624 skb->csum_offset = offsetof(struct tcphdr, check);
625}
626
627
628void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
629{
630 const struct inet_sock *inet = inet_sk(sk);
631
632 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
633}
634EXPORT_SYMBOL(tcp_v4_send_check);
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
650{
651 const struct tcphdr *th = tcp_hdr(skb);
652 struct {
653 struct tcphdr th;
654#ifdef CONFIG_TCP_MD5SIG
655 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
656#endif
657 } rep;
658 struct ip_reply_arg arg;
659#ifdef CONFIG_TCP_MD5SIG
660 struct tcp_md5sig_key *key = NULL;
661 const __u8 *hash_location = NULL;
662 unsigned char newhash[16];
663 int genhash;
664 struct sock *sk1 = NULL;
665#endif
666 struct net *net;
667 struct sock *ctl_sk;
668
669
670 if (th->rst)
671 return;
672
673
674
675
676 if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
677 return;
678
679
680 memset(&rep, 0, sizeof(rep));
681 rep.th.dest = th->source;
682 rep.th.source = th->dest;
683 rep.th.doff = sizeof(struct tcphdr) / 4;
684 rep.th.rst = 1;
685
686 if (th->ack) {
687 rep.th.seq = th->ack_seq;
688 } else {
689 rep.th.ack = 1;
690 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
691 skb->len - (th->doff << 2));
692 }
693
694 memset(&arg, 0, sizeof(arg));
695 arg.iov[0].iov_base = (unsigned char *)&rep;
696 arg.iov[0].iov_len = sizeof(rep.th);
697
698 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
699#ifdef CONFIG_TCP_MD5SIG
700 rcu_read_lock();
701 hash_location = tcp_parse_md5sig_option(th);
702 if (sk && sk_fullsock(sk)) {
703 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
704 &ip_hdr(skb)->saddr, AF_INET);
705 } else if (hash_location) {
706
707
708
709
710
711
712
713 sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
714 ip_hdr(skb)->saddr,
715 th->source, ip_hdr(skb)->daddr,
716 ntohs(th->source), inet_iif(skb),
717 tcp_v4_sdif(skb));
718
719 if (!sk1)
720 goto out;
721
722 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
723 &ip_hdr(skb)->saddr, AF_INET);
724 if (!key)
725 goto out;
726
727
728 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
729 if (genhash || memcmp(hash_location, newhash, 16) != 0)
730 goto out;
731
732 }
733
734 if (key) {
735 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
736 (TCPOPT_NOP << 16) |
737 (TCPOPT_MD5SIG << 8) |
738 TCPOLEN_MD5SIG);
739
740 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
741 rep.th.doff = arg.iov[0].iov_len / 4;
742
743 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
744 key, ip_hdr(skb)->saddr,
745 ip_hdr(skb)->daddr, &rep.th);
746 }
747#endif
748 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
749 ip_hdr(skb)->saddr,
750 arg.iov[0].iov_len, IPPROTO_TCP, 0);
751 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
752 arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
753
754
755
756
757
758 if (sk) {
759 arg.bound_dev_if = sk->sk_bound_dev_if;
760 if (sk_fullsock(sk))
761 trace_tcp_send_reset(sk, skb);
762 }
763
764 BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
765 offsetof(struct inet_timewait_sock, tw_bound_dev_if));
766
767 arg.tos = ip_hdr(skb)->tos;
768 arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
769 local_bh_disable();
770 ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk);
771 if (sk)
772 ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
773 inet_twsk(sk)->tw_mark : sk->sk_mark;
774 ip_send_unicast_reply(ctl_sk,
775 skb, &TCP_SKB_CB(skb)->header.h4.opt,
776 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
777 &arg, arg.iov[0].iov_len);
778
779 ctl_sk->sk_mark = 0;
780 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
781 __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
782 local_bh_enable();
783
784#ifdef CONFIG_TCP_MD5SIG
785out:
786 rcu_read_unlock();
787#endif
788}
789
790
791
792
793
794static void tcp_v4_send_ack(const struct sock *sk,
795 struct sk_buff *skb, u32 seq, u32 ack,
796 u32 win, u32 tsval, u32 tsecr, int oif,
797 struct tcp_md5sig_key *key,
798 int reply_flags, u8 tos)
799{
800 const struct tcphdr *th = tcp_hdr(skb);
801 struct {
802 struct tcphdr th;
803 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
804#ifdef CONFIG_TCP_MD5SIG
805 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
806#endif
807 ];
808 } rep;
809 struct net *net = sock_net(sk);
810 struct ip_reply_arg arg;
811 struct sock *ctl_sk;
812
813 memset(&rep.th, 0, sizeof(struct tcphdr));
814 memset(&arg, 0, sizeof(arg));
815
816 arg.iov[0].iov_base = (unsigned char *)&rep;
817 arg.iov[0].iov_len = sizeof(rep.th);
818 if (tsecr) {
819 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
820 (TCPOPT_TIMESTAMP << 8) |
821 TCPOLEN_TIMESTAMP);
822 rep.opt[1] = htonl(tsval);
823 rep.opt[2] = htonl(tsecr);
824 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
825 }
826
827
828 rep.th.dest = th->source;
829 rep.th.source = th->dest;
830 rep.th.doff = arg.iov[0].iov_len / 4;
831 rep.th.seq = htonl(seq);
832 rep.th.ack_seq = htonl(ack);
833 rep.th.ack = 1;
834 rep.th.window = htons(win);
835
836#ifdef CONFIG_TCP_MD5SIG
837 if (key) {
838 int offset = (tsecr) ? 3 : 0;
839
840 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
841 (TCPOPT_NOP << 16) |
842 (TCPOPT_MD5SIG << 8) |
843 TCPOLEN_MD5SIG);
844 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
845 rep.th.doff = arg.iov[0].iov_len/4;
846
847 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
848 key, ip_hdr(skb)->saddr,
849 ip_hdr(skb)->daddr, &rep.th);
850 }
851#endif
852 arg.flags = reply_flags;
853 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
854 ip_hdr(skb)->saddr,
855 arg.iov[0].iov_len, IPPROTO_TCP, 0);
856 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
857 if (oif)
858 arg.bound_dev_if = oif;
859 arg.tos = tos;
860 arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
861 local_bh_disable();
862 ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk);
863 if (sk)
864 ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
865 inet_twsk(sk)->tw_mark : sk->sk_mark;
866 ip_send_unicast_reply(ctl_sk,
867 skb, &TCP_SKB_CB(skb)->header.h4.opt,
868 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
869 &arg, arg.iov[0].iov_len);
870
871 ctl_sk->sk_mark = 0;
872 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
873 local_bh_enable();
874}
875
876static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
877{
878 struct inet_timewait_sock *tw = inet_twsk(sk);
879 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
880
881 tcp_v4_send_ack(sk, skb,
882 tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
883 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
884 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
885 tcptw->tw_ts_recent,
886 tw->tw_bound_dev_if,
887 tcp_twsk_md5_key(tcptw),
888 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
889 tw->tw_tos
890 );
891
892 inet_twsk_put(tw);
893}
894
895static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
896 struct request_sock *req)
897{
898
899
900
901 u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
902 tcp_sk(sk)->snd_nxt;
903
904
905
906
907
908
909 tcp_v4_send_ack(sk, skb, seq,
910 tcp_rsk(req)->rcv_nxt,
911 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
912 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
913 req->ts_recent,
914 0,
915 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
916 AF_INET),
917 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
918 ip_hdr(skb)->tos);
919}
920
921
922
923
924
925
926static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
927 struct flowi *fl,
928 struct request_sock *req,
929 struct tcp_fastopen_cookie *foc,
930 enum tcp_synack_type synack_type)
931{
932 const struct inet_request_sock *ireq = inet_rsk(req);
933 struct flowi4 fl4;
934 int err = -1;
935 struct sk_buff *skb;
936
937
938 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
939 return -1;
940
941 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
942
943 if (skb) {
944 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
945
946 rcu_read_lock();
947 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
948 ireq->ir_rmt_addr,
949 rcu_dereference(ireq->ireq_opt));
950 rcu_read_unlock();
951 err = net_xmit_eval(err);
952 }
953
954 return err;
955}
956
957
958
959
960static void tcp_v4_reqsk_destructor(struct request_sock *req)
961{
962 kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
963}
964
965#ifdef CONFIG_TCP_MD5SIG
966
967
968
969
970
971
972
973struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
974 const union tcp_md5_addr *addr,
975 int family)
976{
977 const struct tcp_sock *tp = tcp_sk(sk);
978 struct tcp_md5sig_key *key;
979 const struct tcp_md5sig_info *md5sig;
980 __be32 mask;
981 struct tcp_md5sig_key *best_match = NULL;
982 bool match;
983
984
985 md5sig = rcu_dereference_check(tp->md5sig_info,
986 lockdep_sock_is_held(sk));
987 if (!md5sig)
988 return NULL;
989
990 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
991 if (key->family != family)
992 continue;
993
994 if (family == AF_INET) {
995 mask = inet_make_mask(key->prefixlen);
996 match = (key->addr.a4.s_addr & mask) ==
997 (addr->a4.s_addr & mask);
998#if IS_ENABLED(CONFIG_IPV6)
999 } else if (family == AF_INET6) {
1000 match = ipv6_prefix_equal(&key->addr.a6, &addr->a6,
1001 key->prefixlen);
1002#endif
1003 } else {
1004 match = false;
1005 }
1006
1007 if (match && (!best_match ||
1008 key->prefixlen > best_match->prefixlen))
1009 best_match = key;
1010 }
1011 return best_match;
1012}
1013EXPORT_SYMBOL(tcp_md5_do_lookup);
1014
1015static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
1016 const union tcp_md5_addr *addr,
1017 int family, u8 prefixlen)
1018{
1019 const struct tcp_sock *tp = tcp_sk(sk);
1020 struct tcp_md5sig_key *key;
1021 unsigned int size = sizeof(struct in_addr);
1022 const struct tcp_md5sig_info *md5sig;
1023
1024
1025 md5sig = rcu_dereference_check(tp->md5sig_info,
1026 lockdep_sock_is_held(sk));
1027 if (!md5sig)
1028 return NULL;
1029#if IS_ENABLED(CONFIG_IPV6)
1030 if (family == AF_INET6)
1031 size = sizeof(struct in6_addr);
1032#endif
1033 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
1034 if (key->family != family)
1035 continue;
1036 if (!memcmp(&key->addr, addr, size) &&
1037 key->prefixlen == prefixlen)
1038 return key;
1039 }
1040 return NULL;
1041}
1042
1043struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
1044 const struct sock *addr_sk)
1045{
1046 const union tcp_md5_addr *addr;
1047
1048 addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
1049 return tcp_md5_do_lookup(sk, addr, AF_INET);
1050}
1051EXPORT_SYMBOL(tcp_v4_md5_lookup);
1052
1053
1054int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
1055 int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
1056 gfp_t gfp)
1057{
1058
1059 struct tcp_md5sig_key *key;
1060 struct tcp_sock *tp = tcp_sk(sk);
1061 struct tcp_md5sig_info *md5sig;
1062
1063 key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
1064 if (key) {
1065
1066 memcpy(key->key, newkey, newkeylen);
1067 key->keylen = newkeylen;
1068 return 0;
1069 }
1070
1071 md5sig = rcu_dereference_protected(tp->md5sig_info,
1072 lockdep_sock_is_held(sk));
1073 if (!md5sig) {
1074 md5sig = kmalloc(sizeof(*md5sig), gfp);
1075 if (!md5sig)
1076 return -ENOMEM;
1077
1078 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1079 INIT_HLIST_HEAD(&md5sig->head);
1080 rcu_assign_pointer(tp->md5sig_info, md5sig);
1081 }
1082
1083 key = sock_kmalloc(sk, sizeof(*key), gfp);
1084 if (!key)
1085 return -ENOMEM;
1086 if (!tcp_alloc_md5sig_pool()) {
1087 sock_kfree_s(sk, key, sizeof(*key));
1088 return -ENOMEM;
1089 }
1090
1091 memcpy(key->key, newkey, newkeylen);
1092 key->keylen = newkeylen;
1093 key->family = family;
1094 key->prefixlen = prefixlen;
1095 memcpy(&key->addr, addr,
1096 (family == AF_INET6) ? sizeof(struct in6_addr) :
1097 sizeof(struct in_addr));
1098 hlist_add_head_rcu(&key->node, &md5sig->head);
1099 return 0;
1100}
1101EXPORT_SYMBOL(tcp_md5_do_add);
1102
1103int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
1104 u8 prefixlen)
1105{
1106 struct tcp_md5sig_key *key;
1107
1108 key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
1109 if (!key)
1110 return -ENOENT;
1111 hlist_del_rcu(&key->node);
1112 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1113 kfree_rcu(key, rcu);
1114 return 0;
1115}
1116EXPORT_SYMBOL(tcp_md5_do_del);
1117
1118static void tcp_clear_md5_list(struct sock *sk)
1119{
1120 struct tcp_sock *tp = tcp_sk(sk);
1121 struct tcp_md5sig_key *key;
1122 struct hlist_node *n;
1123 struct tcp_md5sig_info *md5sig;
1124
1125 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1126
1127 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
1128 hlist_del_rcu(&key->node);
1129 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1130 kfree_rcu(key, rcu);
1131 }
1132}
1133
1134static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
1135 char __user *optval, int optlen)
1136{
1137 struct tcp_md5sig cmd;
1138 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1139 u8 prefixlen = 32;
1140
1141 if (optlen < sizeof(cmd))
1142 return -EINVAL;
1143
1144 if (copy_from_user(&cmd, optval, sizeof(cmd)))
1145 return -EFAULT;
1146
1147 if (sin->sin_family != AF_INET)
1148 return -EINVAL;
1149
1150 if (optname == TCP_MD5SIG_EXT &&
1151 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
1152 prefixlen = cmd.tcpm_prefixlen;
1153 if (prefixlen > 32)
1154 return -EINVAL;
1155 }
1156
1157 if (!cmd.tcpm_keylen)
1158 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1159 AF_INET, prefixlen);
1160
1161 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1162 return -EINVAL;
1163
1164 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1165 AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen,
1166 GFP_KERNEL);
1167}
1168
1169static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
1170 __be32 daddr, __be32 saddr,
1171 const struct tcphdr *th, int nbytes)
1172{
1173 struct tcp4_pseudohdr *bp;
1174 struct scatterlist sg;
1175 struct tcphdr *_th;
1176
1177 bp = hp->scratch;
1178 bp->saddr = saddr;
1179 bp->daddr = daddr;
1180 bp->pad = 0;
1181 bp->protocol = IPPROTO_TCP;
1182 bp->len = cpu_to_be16(nbytes);
1183
1184 _th = (struct tcphdr *)(bp + 1);
1185 memcpy(_th, th, sizeof(*th));
1186 _th->check = 0;
1187
1188 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
1189 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
1190 sizeof(*bp) + sizeof(*th));
1191 return crypto_ahash_update(hp->md5_req);
1192}
1193
1194static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1195 __be32 daddr, __be32 saddr, const struct tcphdr *th)
1196{
1197 struct tcp_md5sig_pool *hp;
1198 struct ahash_request *req;
1199
1200 hp = tcp_get_md5sig_pool();
1201 if (!hp)
1202 goto clear_hash_noput;
1203 req = hp->md5_req;
1204
1205 if (crypto_ahash_init(req))
1206 goto clear_hash;
1207 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
1208 goto clear_hash;
1209 if (tcp_md5_hash_key(hp, key))
1210 goto clear_hash;
1211 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1212 if (crypto_ahash_final(req))
1213 goto clear_hash;
1214
1215 tcp_put_md5sig_pool();
1216 return 0;
1217
1218clear_hash:
1219 tcp_put_md5sig_pool();
1220clear_hash_noput:
1221 memset(md5_hash, 0, 16);
1222 return 1;
1223}
1224
1225int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1226 const struct sock *sk,
1227 const struct sk_buff *skb)
1228{
1229 struct tcp_md5sig_pool *hp;
1230 struct ahash_request *req;
1231 const struct tcphdr *th = tcp_hdr(skb);
1232 __be32 saddr, daddr;
1233
1234 if (sk) {
1235 saddr = sk->sk_rcv_saddr;
1236 daddr = sk->sk_daddr;
1237 } else {
1238 const struct iphdr *iph = ip_hdr(skb);
1239 saddr = iph->saddr;
1240 daddr = iph->daddr;
1241 }
1242
1243 hp = tcp_get_md5sig_pool();
1244 if (!hp)
1245 goto clear_hash_noput;
1246 req = hp->md5_req;
1247
1248 if (crypto_ahash_init(req))
1249 goto clear_hash;
1250
1251 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
1252 goto clear_hash;
1253 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1254 goto clear_hash;
1255 if (tcp_md5_hash_key(hp, key))
1256 goto clear_hash;
1257 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1258 if (crypto_ahash_final(req))
1259 goto clear_hash;
1260
1261 tcp_put_md5sig_pool();
1262 return 0;
1263
1264clear_hash:
1265 tcp_put_md5sig_pool();
1266clear_hash_noput:
1267 memset(md5_hash, 0, 16);
1268 return 1;
1269}
1270EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1271
1272#endif
1273
1274
1275static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
1276 const struct sk_buff *skb)
1277{
1278#ifdef CONFIG_TCP_MD5SIG
1279
1280
1281
1282
1283
1284
1285
1286
1287 const __u8 *hash_location = NULL;
1288 struct tcp_md5sig_key *hash_expected;
1289 const struct iphdr *iph = ip_hdr(skb);
1290 const struct tcphdr *th = tcp_hdr(skb);
1291 int genhash;
1292 unsigned char newhash[16];
1293
1294 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1295 AF_INET);
1296 hash_location = tcp_parse_md5sig_option(th);
1297
1298
1299 if (!hash_expected && !hash_location)
1300 return false;
1301
1302 if (hash_expected && !hash_location) {
1303 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1304 return true;
1305 }
1306
1307 if (!hash_expected && hash_location) {
1308 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1309 return true;
1310 }
1311
1312
1313
1314
1315 genhash = tcp_v4_md5_hash_skb(newhash,
1316 hash_expected,
1317 NULL, skb);
1318
1319 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1320 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
1321 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1322 &iph->saddr, ntohs(th->source),
1323 &iph->daddr, ntohs(th->dest),
1324 genhash ? " tcp_v4_calc_md5_hash failed"
1325 : "");
1326 return true;
1327 }
1328 return false;
1329#endif
1330 return false;
1331}
1332
1333static void tcp_v4_init_req(struct request_sock *req,
1334 const struct sock *sk_listener,
1335 struct sk_buff *skb)
1336{
1337 struct inet_request_sock *ireq = inet_rsk(req);
1338 struct net *net = sock_net(sk_listener);
1339
1340 sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
1341 sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
1342 RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
1343}
1344
1345static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
1346 struct flowi *fl,
1347 const struct request_sock *req)
1348{
1349 return inet_csk_route_req(sk, &fl->u.ip4, req);
1350}
1351
1352struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1353 .family = PF_INET,
1354 .obj_size = sizeof(struct tcp_request_sock),
1355 .rtx_syn_ack = tcp_rtx_synack,
1356 .send_ack = tcp_v4_reqsk_send_ack,
1357 .destructor = tcp_v4_reqsk_destructor,
1358 .send_reset = tcp_v4_send_reset,
1359 .syn_ack_timeout = tcp_syn_ack_timeout,
1360};
1361
1362static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1363 .mss_clamp = TCP_MSS_DEFAULT,
1364#ifdef CONFIG_TCP_MD5SIG
1365 .req_md5_lookup = tcp_v4_md5_lookup,
1366 .calc_md5_hash = tcp_v4_md5_hash_skb,
1367#endif
1368 .init_req = tcp_v4_init_req,
1369#ifdef CONFIG_SYN_COOKIES
1370 .cookie_init_seq = cookie_v4_init_sequence,
1371#endif
1372 .route_req = tcp_v4_route_req,
1373 .init_seq = tcp_v4_init_seq,
1374 .init_ts_off = tcp_v4_init_ts_off,
1375 .send_synack = tcp_v4_send_synack,
1376};
1377
1378int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1379{
1380
1381 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1382 goto drop;
1383
1384 return tcp_conn_request(&tcp_request_sock_ops,
1385 &tcp_request_sock_ipv4_ops, sk, skb);
1386
1387drop:
1388 tcp_listendrop(sk);
1389 return 0;
1390}
1391EXPORT_SYMBOL(tcp_v4_conn_request);
1392
1393
1394
1395
1396
1397
1398struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1399 struct request_sock *req,
1400 struct dst_entry *dst,
1401 struct request_sock *req_unhash,
1402 bool *own_req)
1403{
1404 struct inet_request_sock *ireq;
1405 struct inet_sock *newinet;
1406 struct tcp_sock *newtp;
1407 struct sock *newsk;
1408#ifdef CONFIG_TCP_MD5SIG
1409 struct tcp_md5sig_key *key;
1410#endif
1411 struct ip_options_rcu *inet_opt;
1412
1413 if (sk_acceptq_is_full(sk))
1414 goto exit_overflow;
1415
1416 newsk = tcp_create_openreq_child(sk, req, skb);
1417 if (!newsk)
1418 goto exit_nonewsk;
1419
1420 newsk->sk_gso_type = SKB_GSO_TCPV4;
1421 inet_sk_rx_dst_set(newsk, skb);
1422
1423 newtp = tcp_sk(newsk);
1424 newinet = inet_sk(newsk);
1425 ireq = inet_rsk(req);
1426 sk_daddr_set(newsk, ireq->ir_rmt_addr);
1427 sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
1428 newsk->sk_bound_dev_if = ireq->ir_iif;
1429 newinet->inet_saddr = ireq->ir_loc_addr;
1430 inet_opt = rcu_dereference(ireq->ireq_opt);
1431 RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
1432 newinet->mc_index = inet_iif(skb);
1433 newinet->mc_ttl = ip_hdr(skb)->ttl;
1434 newinet->rcv_tos = ip_hdr(skb)->tos;
1435 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1436 if (inet_opt)
1437 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1438 newinet->inet_id = newtp->write_seq ^ jiffies;
1439
1440 if (!dst) {
1441 dst = inet_csk_route_child_sock(sk, newsk, req);
1442 if (!dst)
1443 goto put_and_exit;
1444 } else {
1445
1446 }
1447 sk_setup_caps(newsk, dst);
1448
1449 tcp_ca_openreq_child(newsk, dst);
1450
1451 tcp_sync_mss(newsk, dst_mtu(dst));
1452 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1453
1454 tcp_initialize_rcv_mss(newsk);
1455
1456#ifdef CONFIG_TCP_MD5SIG
1457
1458 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1459 AF_INET);
1460 if (key) {
1461
1462
1463
1464
1465
1466
1467 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1468 AF_INET, 32, key->key, key->keylen, GFP_ATOMIC);
1469 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1470 }
1471#endif
1472
1473 if (__inet_inherit_port(sk, newsk) < 0)
1474 goto put_and_exit;
1475 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1476 if (likely(*own_req)) {
1477 tcp_move_syn(newtp, req);
1478 ireq->ireq_opt = NULL;
1479 } else {
1480 newinet->inet_opt = NULL;
1481 }
1482 return newsk;
1483
1484exit_overflow:
1485 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1486exit_nonewsk:
1487 dst_release(dst);
1488exit:
1489 tcp_listendrop(sk);
1490 return NULL;
1491put_and_exit:
1492 newinet->inet_opt = NULL;
1493 inet_csk_prepare_forced_close(newsk);
1494 tcp_done(newsk);
1495 goto exit;
1496}
1497EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1498
1499static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
1500{
1501#ifdef CONFIG_SYN_COOKIES
1502 const struct tcphdr *th = tcp_hdr(skb);
1503
1504 if (!th->syn)
1505 sk = cookie_v4_check(sk, skb);
1506#endif
1507 return sk;
1508}
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1519{
1520 struct sock *rsk;
1521
1522 if (sk->sk_state == TCP_ESTABLISHED) {
1523 struct dst_entry *dst = sk->sk_rx_dst;
1524
1525 sock_rps_save_rxhash(sk, skb);
1526 sk_mark_napi_id(sk, skb);
1527 if (dst) {
1528 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1529 !dst->ops->check(dst, 0)) {
1530 dst_release(dst);
1531 sk->sk_rx_dst = NULL;
1532 }
1533 }
1534 tcp_rcv_established(sk, skb);
1535 return 0;
1536 }
1537
1538 if (tcp_checksum_complete(skb))
1539 goto csum_err;
1540
1541 if (sk->sk_state == TCP_LISTEN) {
1542 struct sock *nsk = tcp_v4_cookie_check(sk, skb);
1543
1544 if (!nsk)
1545 goto discard;
1546 if (nsk != sk) {
1547 if (tcp_child_process(sk, nsk, skb)) {
1548 rsk = nsk;
1549 goto reset;
1550 }
1551 return 0;
1552 }
1553 } else
1554 sock_rps_save_rxhash(sk, skb);
1555
1556 if (tcp_rcv_state_process(sk, skb)) {
1557 rsk = sk;
1558 goto reset;
1559 }
1560 return 0;
1561
1562reset:
1563 tcp_v4_send_reset(rsk, skb);
1564discard:
1565 kfree_skb(skb);
1566
1567
1568
1569
1570
1571 return 0;
1572
1573csum_err:
1574 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1575 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1576 goto discard;
1577}
1578EXPORT_SYMBOL(tcp_v4_do_rcv);
1579
1580int tcp_v4_early_demux(struct sk_buff *skb)
1581{
1582 const struct iphdr *iph;
1583 const struct tcphdr *th;
1584 struct sock *sk;
1585
1586 if (skb->pkt_type != PACKET_HOST)
1587 return 0;
1588
1589 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1590 return 0;
1591
1592 iph = ip_hdr(skb);
1593 th = tcp_hdr(skb);
1594
1595 if (th->doff < sizeof(struct tcphdr) / 4)
1596 return 0;
1597
1598 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1599 iph->saddr, th->source,
1600 iph->daddr, ntohs(th->dest),
1601 skb->skb_iif, inet_sdif(skb));
1602 if (sk) {
1603 skb->sk = sk;
1604 skb->destructor = sock_edemux;
1605 if (sk_fullsock(sk)) {
1606 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1607
1608 if (dst)
1609 dst = dst_check(dst, 0);
1610 if (dst &&
1611 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1612 skb_dst_set_noref(skb, dst);
1613 }
1614 }
1615 return 0;
1616}
1617
1618bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
1619{
1620 u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
1621
1622
1623
1624
1625
1626 limit += 64*1024;
1627
1628
1629
1630
1631
1632
1633
1634 skb_condense(skb);
1635
1636 if (unlikely(sk_add_backlog(sk, skb, limit))) {
1637 bh_unlock_sock(sk);
1638 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
1639 return true;
1640 }
1641 return false;
1642}
1643EXPORT_SYMBOL(tcp_add_backlog);
1644
1645int tcp_filter(struct sock *sk, struct sk_buff *skb)
1646{
1647 struct tcphdr *th = (struct tcphdr *)skb->data;
1648 unsigned int eaten = skb->len;
1649 int err;
1650
1651 err = sk_filter_trim_cap(sk, skb, th->doff * 4);
1652 if (!err) {
1653 eaten -= skb->len;
1654 TCP_SKB_CB(skb)->end_seq -= eaten;
1655 }
1656 return err;
1657}
1658EXPORT_SYMBOL(tcp_filter);
1659
1660static void tcp_v4_restore_cb(struct sk_buff *skb)
1661{
1662 memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4,
1663 sizeof(struct inet_skb_parm));
1664}
1665
1666static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
1667 const struct tcphdr *th)
1668{
1669
1670
1671
1672 memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1673 sizeof(struct inet_skb_parm));
1674 barrier();
1675
1676 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1677 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1678 skb->len - th->doff * 4);
1679 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1680 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1681 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1682 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1683 TCP_SKB_CB(skb)->sacked = 0;
1684 TCP_SKB_CB(skb)->has_rxtstamp =
1685 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1686}
1687
1688
1689
1690
1691
1692int tcp_v4_rcv(struct sk_buff *skb)
1693{
1694 struct net *net = dev_net(skb->dev);
1695 int sdif = inet_sdif(skb);
1696 const struct iphdr *iph;
1697 const struct tcphdr *th;
1698 bool refcounted;
1699 struct sock *sk;
1700 int ret;
1701
1702 if (skb->pkt_type != PACKET_HOST)
1703 goto discard_it;
1704
1705
1706 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1707
1708 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1709 goto discard_it;
1710
1711 th = (const struct tcphdr *)skb->data;
1712
1713 if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
1714 goto bad_packet;
1715 if (!pskb_may_pull(skb, th->doff * 4))
1716 goto discard_it;
1717
1718
1719
1720
1721
1722
1723 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
1724 goto csum_error;
1725
1726 th = (const struct tcphdr *)skb->data;
1727 iph = ip_hdr(skb);
1728lookup:
1729 sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
1730 th->dest, sdif, &refcounted);
1731 if (!sk)
1732 goto no_tcp_socket;
1733
1734process:
1735 if (sk->sk_state == TCP_TIME_WAIT)
1736 goto do_time_wait;
1737
1738 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1739 struct request_sock *req = inet_reqsk(sk);
1740 bool req_stolen = false;
1741 struct sock *nsk;
1742
1743 sk = req->rsk_listener;
1744 if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
1745 sk_drops_add(sk, skb);
1746 reqsk_put(req);
1747 goto discard_it;
1748 }
1749 if (tcp_checksum_complete(skb)) {
1750 reqsk_put(req);
1751 goto csum_error;
1752 }
1753 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1754 inet_csk_reqsk_queue_drop_and_put(sk, req);
1755 goto lookup;
1756 }
1757
1758
1759
1760 sock_hold(sk);
1761 refcounted = true;
1762 nsk = NULL;
1763 if (!tcp_filter(sk, skb)) {
1764 th = (const struct tcphdr *)skb->data;
1765 iph = ip_hdr(skb);
1766 tcp_v4_fill_cb(skb, iph, th);
1767 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1768 }
1769 if (!nsk) {
1770 reqsk_put(req);
1771 if (req_stolen) {
1772
1773
1774
1775
1776
1777 tcp_v4_restore_cb(skb);
1778 sock_put(sk);
1779 goto lookup;
1780 }
1781 goto discard_and_relse;
1782 }
1783 if (nsk == sk) {
1784 reqsk_put(req);
1785 tcp_v4_restore_cb(skb);
1786 } else if (tcp_child_process(sk, nsk, skb)) {
1787 tcp_v4_send_reset(nsk, skb);
1788 goto discard_and_relse;
1789 } else {
1790 sock_put(sk);
1791 return 0;
1792 }
1793 }
1794 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1795 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1796 goto discard_and_relse;
1797 }
1798
1799 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1800 goto discard_and_relse;
1801
1802 if (tcp_v4_inbound_md5_hash(sk, skb))
1803 goto discard_and_relse;
1804
1805 nf_reset(skb);
1806
1807 if (tcp_filter(sk, skb))
1808 goto discard_and_relse;
1809 th = (const struct tcphdr *)skb->data;
1810 iph = ip_hdr(skb);
1811 tcp_v4_fill_cb(skb, iph, th);
1812
1813 skb->dev = NULL;
1814
1815 if (sk->sk_state == TCP_LISTEN) {
1816 ret = tcp_v4_do_rcv(sk, skb);
1817 goto put_and_return;
1818 }
1819
1820 sk_incoming_cpu_update(sk);
1821
1822 bh_lock_sock_nested(sk);
1823 tcp_segs_in(tcp_sk(sk), skb);
1824 ret = 0;
1825 if (!sock_owned_by_user(sk)) {
1826 ret = tcp_v4_do_rcv(sk, skb);
1827 } else if (tcp_add_backlog(sk, skb)) {
1828 goto discard_and_relse;
1829 }
1830 bh_unlock_sock(sk);
1831
1832put_and_return:
1833 if (refcounted)
1834 sock_put(sk);
1835
1836 return ret;
1837
1838no_tcp_socket:
1839 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1840 goto discard_it;
1841
1842 tcp_v4_fill_cb(skb, iph, th);
1843
1844 if (tcp_checksum_complete(skb)) {
1845csum_error:
1846 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1847bad_packet:
1848 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1849 } else {
1850 tcp_v4_send_reset(NULL, skb);
1851 }
1852
1853discard_it:
1854
1855 kfree_skb(skb);
1856 return 0;
1857
1858discard_and_relse:
1859 sk_drops_add(sk, skb);
1860 if (refcounted)
1861 sock_put(sk);
1862 goto discard_it;
1863
1864do_time_wait:
1865 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1866 inet_twsk_put(inet_twsk(sk));
1867 goto discard_it;
1868 }
1869
1870 tcp_v4_fill_cb(skb, iph, th);
1871
1872 if (tcp_checksum_complete(skb)) {
1873 inet_twsk_put(inet_twsk(sk));
1874 goto csum_error;
1875 }
1876 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1877 case TCP_TW_SYN: {
1878 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1879 &tcp_hashinfo, skb,
1880 __tcp_hdrlen(th),
1881 iph->saddr, th->source,
1882 iph->daddr, th->dest,
1883 inet_iif(skb),
1884 sdif);
1885 if (sk2) {
1886 inet_twsk_deschedule_put(inet_twsk(sk));
1887 sk = sk2;
1888 tcp_v4_restore_cb(skb);
1889 refcounted = false;
1890 goto process;
1891 }
1892 }
1893
1894
1895 case TCP_TW_ACK:
1896 tcp_v4_timewait_ack(sk, skb);
1897 break;
1898 case TCP_TW_RST:
1899 tcp_v4_send_reset(sk, skb);
1900 inet_twsk_deschedule_put(inet_twsk(sk));
1901 goto discard_it;
1902 case TCP_TW_SUCCESS:;
1903 }
1904 goto discard_it;
1905}
1906
1907static struct timewait_sock_ops tcp_timewait_sock_ops = {
1908 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1909 .twsk_unique = tcp_twsk_unique,
1910 .twsk_destructor= tcp_twsk_destructor,
1911};
1912
1913void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
1914{
1915 struct dst_entry *dst = skb_dst(skb);
1916
1917 if (dst && dst_hold_safe(dst)) {
1918 sk->sk_rx_dst = dst;
1919 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1920 }
1921}
1922EXPORT_SYMBOL(inet_sk_rx_dst_set);
1923
1924const struct inet_connection_sock_af_ops ipv4_specific = {
1925 .queue_xmit = ip_queue_xmit,
1926 .send_check = tcp_v4_send_check,
1927 .rebuild_header = inet_sk_rebuild_header,
1928 .sk_rx_dst_set = inet_sk_rx_dst_set,
1929 .conn_request = tcp_v4_conn_request,
1930 .syn_recv_sock = tcp_v4_syn_recv_sock,
1931 .net_header_len = sizeof(struct iphdr),
1932 .setsockopt = ip_setsockopt,
1933 .getsockopt = ip_getsockopt,
1934 .addr2sockaddr = inet_csk_addr2sockaddr,
1935 .sockaddr_len = sizeof(struct sockaddr_in),
1936#ifdef CONFIG_COMPAT
1937 .compat_setsockopt = compat_ip_setsockopt,
1938 .compat_getsockopt = compat_ip_getsockopt,
1939#endif
1940 .mtu_reduced = tcp_v4_mtu_reduced,
1941};
1942EXPORT_SYMBOL(ipv4_specific);
1943
1944#ifdef CONFIG_TCP_MD5SIG
1945static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1946 .md5_lookup = tcp_v4_md5_lookup,
1947 .calc_md5_hash = tcp_v4_md5_hash_skb,
1948 .md5_parse = tcp_v4_parse_md5_keys,
1949};
1950#endif
1951
1952
1953
1954
1955static int tcp_v4_init_sock(struct sock *sk)
1956{
1957 struct inet_connection_sock *icsk = inet_csk(sk);
1958
1959 tcp_init_sock(sk);
1960
1961 icsk->icsk_af_ops = &ipv4_specific;
1962
1963#ifdef CONFIG_TCP_MD5SIG
1964 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1965#endif
1966
1967 return 0;
1968}
1969
1970void tcp_v4_destroy_sock(struct sock *sk)
1971{
1972 struct tcp_sock *tp = tcp_sk(sk);
1973
1974 trace_tcp_destroy_sock(sk);
1975
1976 tcp_clear_xmit_timers(sk);
1977
1978 tcp_cleanup_congestion_control(sk);
1979
1980 tcp_cleanup_ulp(sk);
1981
1982
1983 tcp_write_queue_purge(sk);
1984
1985
1986 tcp_fastopen_active_disable_ofo_check(sk);
1987
1988
1989 skb_rbtree_purge(&tp->out_of_order_queue);
1990
1991#ifdef CONFIG_TCP_MD5SIG
1992
1993 if (tp->md5sig_info) {
1994 tcp_clear_md5_list(sk);
1995 kfree_rcu(rcu_dereference_protected(tp->md5sig_info, 1), rcu);
1996 tp->md5sig_info = NULL;
1997 }
1998#endif
1999
2000
2001 if (inet_csk(sk)->icsk_bind_hash)
2002 inet_put_port(sk);
2003
2004 BUG_ON(tp->fastopen_rsk);
2005
2006
2007 tcp_free_fastopen_req(tp);
2008 tcp_fastopen_destroy_cipher(sk);
2009 tcp_saved_syn_free(tp);
2010
2011 sk_sockets_allocated_dec(sk);
2012}
2013EXPORT_SYMBOL(tcp_v4_destroy_sock);
2014
2015#ifdef CONFIG_PROC_FS
2016
2017
2018
2019
2020
2021
2022
2023static void *listening_get_next(struct seq_file *seq, void *cur)
2024{
2025 struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
2026 struct tcp_iter_state *st = seq->private;
2027 struct net *net = seq_file_net(seq);
2028 struct inet_listen_hashbucket *ilb;
2029 struct sock *sk = cur;
2030
2031 if (!sk) {
2032get_head:
2033 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2034 spin_lock(&ilb->lock);
2035 sk = sk_head(&ilb->head);
2036 st->offset = 0;
2037 goto get_sk;
2038 }
2039 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2040 ++st->num;
2041 ++st->offset;
2042
2043 sk = sk_next(sk);
2044get_sk:
2045 sk_for_each_from(sk) {
2046 if (!net_eq(sock_net(sk), net))
2047 continue;
2048 if (sk->sk_family == afinfo->family)
2049 return sk;
2050 }
2051 spin_unlock(&ilb->lock);
2052 st->offset = 0;
2053 if (++st->bucket < INET_LHTABLE_SIZE)
2054 goto get_head;
2055 return NULL;
2056}
2057
2058static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2059{
2060 struct tcp_iter_state *st = seq->private;
2061 void *rc;
2062
2063 st->bucket = 0;
2064 st->offset = 0;
2065 rc = listening_get_next(seq, NULL);
2066
2067 while (rc && *pos) {
2068 rc = listening_get_next(seq, rc);
2069 --*pos;
2070 }
2071 return rc;
2072}
2073
2074static inline bool empty_bucket(const struct tcp_iter_state *st)
2075{
2076 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
2077}
2078
2079
2080
2081
2082
2083static void *established_get_first(struct seq_file *seq)
2084{
2085 struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
2086 struct tcp_iter_state *st = seq->private;
2087 struct net *net = seq_file_net(seq);
2088 void *rc = NULL;
2089
2090 st->offset = 0;
2091 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2092 struct sock *sk;
2093 struct hlist_nulls_node *node;
2094 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2095
2096
2097 if (empty_bucket(st))
2098 continue;
2099
2100 spin_lock_bh(lock);
2101 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2102 if (sk->sk_family != afinfo->family ||
2103 !net_eq(sock_net(sk), net)) {
2104 continue;
2105 }
2106 rc = sk;
2107 goto out;
2108 }
2109 spin_unlock_bh(lock);
2110 }
2111out:
2112 return rc;
2113}
2114
2115static void *established_get_next(struct seq_file *seq, void *cur)
2116{
2117 struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
2118 struct sock *sk = cur;
2119 struct hlist_nulls_node *node;
2120 struct tcp_iter_state *st = seq->private;
2121 struct net *net = seq_file_net(seq);
2122
2123 ++st->num;
2124 ++st->offset;
2125
2126 sk = sk_nulls_next(sk);
2127
2128 sk_nulls_for_each_from(sk, node) {
2129 if (sk->sk_family == afinfo->family &&
2130 net_eq(sock_net(sk), net))
2131 return sk;
2132 }
2133
2134 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2135 ++st->bucket;
2136 return established_get_first(seq);
2137}
2138
2139static void *established_get_idx(struct seq_file *seq, loff_t pos)
2140{
2141 struct tcp_iter_state *st = seq->private;
2142 void *rc;
2143
2144 st->bucket = 0;
2145 rc = established_get_first(seq);
2146
2147 while (rc && pos) {
2148 rc = established_get_next(seq, rc);
2149 --pos;
2150 }
2151 return rc;
2152}
2153
2154static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2155{
2156 void *rc;
2157 struct tcp_iter_state *st = seq->private;
2158
2159 st->state = TCP_SEQ_STATE_LISTENING;
2160 rc = listening_get_idx(seq, &pos);
2161
2162 if (!rc) {
2163 st->state = TCP_SEQ_STATE_ESTABLISHED;
2164 rc = established_get_idx(seq, pos);
2165 }
2166
2167 return rc;
2168}
2169
2170static void *tcp_seek_last_pos(struct seq_file *seq)
2171{
2172 struct tcp_iter_state *st = seq->private;
2173 int offset = st->offset;
2174 int orig_num = st->num;
2175 void *rc = NULL;
2176
2177 switch (st->state) {
2178 case TCP_SEQ_STATE_LISTENING:
2179 if (st->bucket >= INET_LHTABLE_SIZE)
2180 break;
2181 st->state = TCP_SEQ_STATE_LISTENING;
2182 rc = listening_get_next(seq, NULL);
2183 while (offset-- && rc)
2184 rc = listening_get_next(seq, rc);
2185 if (rc)
2186 break;
2187 st->bucket = 0;
2188 st->state = TCP_SEQ_STATE_ESTABLISHED;
2189
2190 case TCP_SEQ_STATE_ESTABLISHED:
2191 if (st->bucket > tcp_hashinfo.ehash_mask)
2192 break;
2193 rc = established_get_first(seq);
2194 while (offset-- && rc)
2195 rc = established_get_next(seq, rc);
2196 }
2197
2198 st->num = orig_num;
2199
2200 return rc;
2201}
2202
2203void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2204{
2205 struct tcp_iter_state *st = seq->private;
2206 void *rc;
2207
2208 if (*pos && *pos == st->last_pos) {
2209 rc = tcp_seek_last_pos(seq);
2210 if (rc)
2211 goto out;
2212 }
2213
2214 st->state = TCP_SEQ_STATE_LISTENING;
2215 st->num = 0;
2216 st->bucket = 0;
2217 st->offset = 0;
2218 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2219
2220out:
2221 st->last_pos = *pos;
2222 return rc;
2223}
2224EXPORT_SYMBOL(tcp_seq_start);
2225
2226void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2227{
2228 struct tcp_iter_state *st = seq->private;
2229 void *rc = NULL;
2230
2231 if (v == SEQ_START_TOKEN) {
2232 rc = tcp_get_idx(seq, 0);
2233 goto out;
2234 }
2235
2236 switch (st->state) {
2237 case TCP_SEQ_STATE_LISTENING:
2238 rc = listening_get_next(seq, v);
2239 if (!rc) {
2240 st->state = TCP_SEQ_STATE_ESTABLISHED;
2241 st->bucket = 0;
2242 st->offset = 0;
2243 rc = established_get_first(seq);
2244 }
2245 break;
2246 case TCP_SEQ_STATE_ESTABLISHED:
2247 rc = established_get_next(seq, v);
2248 break;
2249 }
2250out:
2251 ++*pos;
2252 st->last_pos = *pos;
2253 return rc;
2254}
2255EXPORT_SYMBOL(tcp_seq_next);
2256
2257void tcp_seq_stop(struct seq_file *seq, void *v)
2258{
2259 struct tcp_iter_state *st = seq->private;
2260
2261 switch (st->state) {
2262 case TCP_SEQ_STATE_LISTENING:
2263 if (v != SEQ_START_TOKEN)
2264 spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
2265 break;
2266 case TCP_SEQ_STATE_ESTABLISHED:
2267 if (v)
2268 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2269 break;
2270 }
2271}
2272EXPORT_SYMBOL(tcp_seq_stop);
2273
2274static void get_openreq4(const struct request_sock *req,
2275 struct seq_file *f, int i)
2276{
2277 const struct inet_request_sock *ireq = inet_rsk(req);
2278 long delta = req->rsk_timer.expires - jiffies;
2279
2280 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2281 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
2282 i,
2283 ireq->ir_loc_addr,
2284 ireq->ir_num,
2285 ireq->ir_rmt_addr,
2286 ntohs(ireq->ir_rmt_port),
2287 TCP_SYN_RECV,
2288 0, 0,
2289 1,
2290 jiffies_delta_to_clock_t(delta),
2291 req->num_timeout,
2292 from_kuid_munged(seq_user_ns(f),
2293 sock_i_uid(req->rsk_listener)),
2294 0,
2295 0,
2296 0,
2297 req);
2298}
2299
2300static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2301{
2302 int timer_active;
2303 unsigned long timer_expires;
2304 const struct tcp_sock *tp = tcp_sk(sk);
2305 const struct inet_connection_sock *icsk = inet_csk(sk);
2306 const struct inet_sock *inet = inet_sk(sk);
2307 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2308 __be32 dest = inet->inet_daddr;
2309 __be32 src = inet->inet_rcv_saddr;
2310 __u16 destp = ntohs(inet->inet_dport);
2311 __u16 srcp = ntohs(inet->inet_sport);
2312 int rx_queue;
2313 int state;
2314
2315 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2316 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2317 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2318 timer_active = 1;
2319 timer_expires = icsk->icsk_timeout;
2320 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2321 timer_active = 4;
2322 timer_expires = icsk->icsk_timeout;
2323 } else if (timer_pending(&sk->sk_timer)) {
2324 timer_active = 2;
2325 timer_expires = sk->sk_timer.expires;
2326 } else {
2327 timer_active = 0;
2328 timer_expires = jiffies;
2329 }
2330
2331 state = inet_sk_state_load(sk);
2332 if (state == TCP_LISTEN)
2333 rx_queue = sk->sk_ack_backlog;
2334 else
2335
2336
2337
2338 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2339
2340 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2341 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
2342 i, src, srcp, dest, destp, state,
2343 tp->write_seq - tp->snd_una,
2344 rx_queue,
2345 timer_active,
2346 jiffies_delta_to_clock_t(timer_expires - jiffies),
2347 icsk->icsk_retransmits,
2348 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
2349 icsk->icsk_probes_out,
2350 sock_i_ino(sk),
2351 refcount_read(&sk->sk_refcnt), sk,
2352 jiffies_to_clock_t(icsk->icsk_rto),
2353 jiffies_to_clock_t(icsk->icsk_ack.ato),
2354 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2355 tp->snd_cwnd,
2356 state == TCP_LISTEN ?
2357 fastopenq->max_qlen :
2358 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
2359}
2360
2361static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2362 struct seq_file *f, int i)
2363{
2364 long delta = tw->tw_timer.expires - jiffies;
2365 __be32 dest, src;
2366 __u16 destp, srcp;
2367
2368 dest = tw->tw_daddr;
2369 src = tw->tw_rcv_saddr;
2370 destp = ntohs(tw->tw_dport);
2371 srcp = ntohs(tw->tw_sport);
2372
2373 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2374 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
2375 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2376 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2377 refcount_read(&tw->tw_refcnt), tw);
2378}
2379
2380#define TMPSZ 150
2381
2382static int tcp4_seq_show(struct seq_file *seq, void *v)
2383{
2384 struct tcp_iter_state *st;
2385 struct sock *sk = v;
2386
2387 seq_setwidth(seq, TMPSZ - 1);
2388 if (v == SEQ_START_TOKEN) {
2389 seq_puts(seq, " sl local_address rem_address st tx_queue "
2390 "rx_queue tr tm->when retrnsmt uid timeout "
2391 "inode");
2392 goto out;
2393 }
2394 st = seq->private;
2395
2396 if (sk->sk_state == TCP_TIME_WAIT)
2397 get_timewait4_sock(v, seq, st->num);
2398 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2399 get_openreq4(v, seq, st->num);
2400 else
2401 get_tcp4_sock(v, seq, st->num);
2402out:
2403 seq_pad(seq, '\n');
2404 return 0;
2405}
2406
2407static const struct seq_operations tcp4_seq_ops = {
2408 .show = tcp4_seq_show,
2409 .start = tcp_seq_start,
2410 .next = tcp_seq_next,
2411 .stop = tcp_seq_stop,
2412};
2413
2414static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2415 .family = AF_INET,
2416};
2417
2418static int __net_init tcp4_proc_init_net(struct net *net)
2419{
2420 if (!proc_create_net_data("tcp", 0444, net->proc_net, &tcp4_seq_ops,
2421 sizeof(struct tcp_iter_state), &tcp4_seq_afinfo))
2422 return -ENOMEM;
2423 return 0;
2424}
2425
2426static void __net_exit tcp4_proc_exit_net(struct net *net)
2427{
2428 remove_proc_entry("tcp", net->proc_net);
2429}
2430
2431static struct pernet_operations tcp4_net_ops = {
2432 .init = tcp4_proc_init_net,
2433 .exit = tcp4_proc_exit_net,
2434};
2435
2436int __init tcp4_proc_init(void)
2437{
2438 return register_pernet_subsys(&tcp4_net_ops);
2439}
2440
2441void tcp4_proc_exit(void)
2442{
2443 unregister_pernet_subsys(&tcp4_net_ops);
2444}
2445#endif
2446
2447struct proto tcp_prot = {
2448 .name = "TCP",
2449 .owner = THIS_MODULE,
2450 .close = tcp_close,
2451 .pre_connect = tcp_v4_pre_connect,
2452 .connect = tcp_v4_connect,
2453 .disconnect = tcp_disconnect,
2454 .accept = inet_csk_accept,
2455 .ioctl = tcp_ioctl,
2456 .init = tcp_v4_init_sock,
2457 .destroy = tcp_v4_destroy_sock,
2458 .shutdown = tcp_shutdown,
2459 .setsockopt = tcp_setsockopt,
2460 .getsockopt = tcp_getsockopt,
2461 .keepalive = tcp_set_keepalive,
2462 .recvmsg = tcp_recvmsg,
2463 .sendmsg = tcp_sendmsg,
2464 .sendpage = tcp_sendpage,
2465 .backlog_rcv = tcp_v4_do_rcv,
2466 .release_cb = tcp_release_cb,
2467 .hash = inet_hash,
2468 .unhash = inet_unhash,
2469 .get_port = inet_csk_get_port,
2470 .enter_memory_pressure = tcp_enter_memory_pressure,
2471 .leave_memory_pressure = tcp_leave_memory_pressure,
2472 .stream_memory_free = tcp_stream_memory_free,
2473 .sockets_allocated = &tcp_sockets_allocated,
2474 .orphan_count = &tcp_orphan_count,
2475 .memory_allocated = &tcp_memory_allocated,
2476 .memory_pressure = &tcp_memory_pressure,
2477 .sysctl_mem = sysctl_tcp_mem,
2478 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2479 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2480 .max_header = MAX_TCP_HEADER,
2481 .obj_size = sizeof(struct tcp_sock),
2482 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2483 .twsk_prot = &tcp_timewait_sock_ops,
2484 .rsk_prot = &tcp_request_sock_ops,
2485 .h.hashinfo = &tcp_hashinfo,
2486 .no_autobind = true,
2487#ifdef CONFIG_COMPAT
2488 .compat_setsockopt = compat_tcp_setsockopt,
2489 .compat_getsockopt = compat_tcp_getsockopt,
2490#endif
2491 .diag_destroy = tcp_abort,
2492};
2493EXPORT_SYMBOL(tcp_prot);
2494
2495static void __net_exit tcp_sk_exit(struct net *net)
2496{
2497 int cpu;
2498
2499 module_put(net->ipv4.tcp_congestion_control->owner);
2500
2501 for_each_possible_cpu(cpu)
2502 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2503 free_percpu(net->ipv4.tcp_sk);
2504}
2505
2506static int __net_init tcp_sk_init(struct net *net)
2507{
2508 int res, cpu, cnt;
2509
2510 net->ipv4.tcp_sk = alloc_percpu(struct sock *);
2511 if (!net->ipv4.tcp_sk)
2512 return -ENOMEM;
2513
2514 for_each_possible_cpu(cpu) {
2515 struct sock *sk;
2516
2517 res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
2518 IPPROTO_TCP, net);
2519 if (res)
2520 goto fail;
2521 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2522
2523
2524
2525
2526 inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
2527
2528 *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2529 }
2530
2531 net->ipv4.sysctl_tcp_ecn = 2;
2532 net->ipv4.sysctl_tcp_ecn_fallback = 1;
2533
2534 net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
2535 net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
2536 net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
2537
2538 net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
2539 net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
2540 net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
2541
2542 net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
2543 net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
2544 net->ipv4.sysctl_tcp_syncookies = 1;
2545 net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
2546 net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
2547 net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
2548 net->ipv4.sysctl_tcp_orphan_retries = 0;
2549 net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
2550 net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
2551 net->ipv4.sysctl_tcp_tw_reuse = 2;
2552
2553 cnt = tcp_hashinfo.ehash_mask + 1;
2554 net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2;
2555 net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
2556
2557 net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256);
2558 net->ipv4.sysctl_tcp_sack = 1;
2559 net->ipv4.sysctl_tcp_window_scaling = 1;
2560 net->ipv4.sysctl_tcp_timestamps = 1;
2561 net->ipv4.sysctl_tcp_early_retrans = 3;
2562 net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION;
2563 net->ipv4.sysctl_tcp_slow_start_after_idle = 1;
2564 net->ipv4.sysctl_tcp_retrans_collapse = 1;
2565 net->ipv4.sysctl_tcp_max_reordering = 300;
2566 net->ipv4.sysctl_tcp_dsack = 1;
2567 net->ipv4.sysctl_tcp_app_win = 31;
2568 net->ipv4.sysctl_tcp_adv_win_scale = 1;
2569 net->ipv4.sysctl_tcp_frto = 2;
2570 net->ipv4.sysctl_tcp_moderate_rcvbuf = 1;
2571
2572
2573
2574
2575 net->ipv4.sysctl_tcp_tso_win_divisor = 3;
2576
2577 net->ipv4.sysctl_tcp_limit_output_bytes = 262144;
2578
2579 net->ipv4.sysctl_tcp_challenge_ack_limit = 1000;
2580 net->ipv4.sysctl_tcp_min_tso_segs = 2;
2581 net->ipv4.sysctl_tcp_min_rtt_wlen = 300;
2582 net->ipv4.sysctl_tcp_autocorking = 1;
2583 net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2;
2584 net->ipv4.sysctl_tcp_pacing_ss_ratio = 200;
2585 net->ipv4.sysctl_tcp_pacing_ca_ratio = 120;
2586 if (net != &init_net) {
2587 memcpy(net->ipv4.sysctl_tcp_rmem,
2588 init_net.ipv4.sysctl_tcp_rmem,
2589 sizeof(init_net.ipv4.sysctl_tcp_rmem));
2590 memcpy(net->ipv4.sysctl_tcp_wmem,
2591 init_net.ipv4.sysctl_tcp_wmem,
2592 sizeof(init_net.ipv4.sysctl_tcp_wmem));
2593 }
2594 net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC;
2595 net->ipv4.sysctl_tcp_comp_sack_nr = 44;
2596 net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
2597 spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
2598 net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
2599 atomic_set(&net->ipv4.tfo_active_disable_times, 0);
2600
2601
2602 if (!net_eq(net, &init_net) &&
2603 try_module_get(init_net.ipv4.tcp_congestion_control->owner))
2604 net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control;
2605 else
2606 net->ipv4.tcp_congestion_control = &tcp_reno;
2607
2608 return 0;
2609fail:
2610 tcp_sk_exit(net);
2611
2612 return res;
2613}
2614
2615static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2616{
2617 struct net *net;
2618
2619 inet_twsk_purge(&tcp_hashinfo, AF_INET);
2620
2621 list_for_each_entry(net, net_exit_list, exit_list)
2622 tcp_fastopen_ctx_destroy(net);
2623}
2624
2625static struct pernet_operations __net_initdata tcp_sk_ops = {
2626 .init = tcp_sk_init,
2627 .exit = tcp_sk_exit,
2628 .exit_batch = tcp_sk_exit_batch,
2629};
2630
2631void __init tcp_v4_init(void)
2632{
2633 if (register_pernet_subsys(&tcp_sk_ops))
2634 panic("Failed to create the TCP control socket.\n");
2635}
2636