1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53#define pr_fmt(fmt) "TCP: " fmt
54
55#include <linux/bottom_half.h>
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
64#include <linux/slab.h>
65
66#include <net/net_namespace.h>
67#include <net/icmp.h>
68#include <net/inet_hashtables.h>
69#include <net/tcp.h>
70#include <net/transp_v6.h>
71#include <net/ipv6.h>
72#include <net/inet_common.h>
73#include <net/timewait_sock.h>
74#include <net/xfrm.h>
75#include <net/secure_seq.h>
76#include <net/busy_poll.h>
77
78#include <linux/inet.h>
79#include <linux/ipv6.h>
80#include <linux/stddef.h>
81#include <linux/proc_fs.h>
82#include <linux/seq_file.h>
83#include <linux/inetdevice.h>
84
85#include <crypto/hash.h>
86#include <linux/scatterlist.h>
87
88#include <trace/events/tcp.h>
89
90#ifdef CONFIG_TCP_MD5SIG
91static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
92 __be32 daddr, __be32 saddr, const struct tcphdr *th);
93#endif
94
95struct inet_hashinfo tcp_hashinfo;
96EXPORT_SYMBOL(tcp_hashinfo);
97
98static u32 tcp_v4_init_seq(const struct sk_buff *skb)
99{
100 return secure_tcp_seq(ip_hdr(skb)->daddr,
101 ip_hdr(skb)->saddr,
102 tcp_hdr(skb)->dest,
103 tcp_hdr(skb)->source);
104}
105
106static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
107{
108 return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
109}
110
111int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
112{
113 const struct inet_timewait_sock *tw = inet_twsk(sktw);
114 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
115 struct tcp_sock *tp = tcp_sk(sk);
116 int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse;
117
118 if (reuse == 2) {
119
120
121
122
123 bool loopback = false;
124 if (tw->tw_bound_dev_if == LOOPBACK_IFINDEX)
125 loopback = true;
126#if IS_ENABLED(CONFIG_IPV6)
127 if (tw->tw_family == AF_INET6) {
128 if (ipv6_addr_loopback(&tw->tw_v6_daddr) ||
129 (ipv6_addr_v4mapped(&tw->tw_v6_daddr) &&
130 (tw->tw_v6_daddr.s6_addr[12] == 127)) ||
131 ipv6_addr_loopback(&tw->tw_v6_rcv_saddr) ||
132 (ipv6_addr_v4mapped(&tw->tw_v6_rcv_saddr) &&
133 (tw->tw_v6_rcv_saddr.s6_addr[12] == 127)))
134 loopback = true;
135 } else
136#endif
137 {
138 if (ipv4_is_loopback(tw->tw_daddr) ||
139 ipv4_is_loopback(tw->tw_rcv_saddr))
140 loopback = true;
141 }
142 if (!loopback)
143 reuse = 0;
144 }
145
146
147
148
149
150
151
152
153
154
155
156
157 if (tcptw->tw_ts_recent_stamp &&
158 (!twp || (reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
159
160
161
162
163
164
165
166
167
168
169
170 if (likely(!tp->repair)) {
171 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
172 if (tp->write_seq == 0)
173 tp->write_seq = 1;
174 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
175 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
176 }
177 sock_hold(sktw);
178 return 1;
179 }
180
181 return 0;
182}
183EXPORT_SYMBOL_GPL(tcp_twsk_unique);
184
185static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
186 int addr_len)
187{
188
189
190
191
192 if (addr_len < sizeof(struct sockaddr_in))
193 return -EINVAL;
194
195 sock_owned_by_me(sk);
196
197 return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr);
198}
199
200
201int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
202{
203 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
204 struct inet_sock *inet = inet_sk(sk);
205 struct tcp_sock *tp = tcp_sk(sk);
206 __be16 orig_sport, orig_dport;
207 __be32 daddr, nexthop;
208 struct flowi4 *fl4;
209 struct rtable *rt;
210 int err;
211 struct ip_options_rcu *inet_opt;
212 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
213
214 if (addr_len < sizeof(struct sockaddr_in))
215 return -EINVAL;
216
217 if (usin->sin_family != AF_INET)
218 return -EAFNOSUPPORT;
219
220 nexthop = daddr = usin->sin_addr.s_addr;
221 inet_opt = rcu_dereference_protected(inet->inet_opt,
222 lockdep_sock_is_held(sk));
223 if (inet_opt && inet_opt->opt.srr) {
224 if (!daddr)
225 return -EINVAL;
226 nexthop = inet_opt->opt.faddr;
227 }
228
229 orig_sport = inet->inet_sport;
230 orig_dport = usin->sin_port;
231 fl4 = &inet->cork.fl.u.ip4;
232 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
233 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
234 IPPROTO_TCP,
235 orig_sport, orig_dport, sk);
236 if (IS_ERR(rt)) {
237 err = PTR_ERR(rt);
238 if (err == -ENETUNREACH)
239 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
240 return err;
241 }
242
243 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
244 ip_rt_put(rt);
245 return -ENETUNREACH;
246 }
247
248 if (!inet_opt || !inet_opt->opt.srr)
249 daddr = fl4->daddr;
250
251 if (!inet->inet_saddr)
252 inet->inet_saddr = fl4->saddr;
253 sk_rcv_saddr_set(sk, inet->inet_saddr);
254
255 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
256
257 tp->rx_opt.ts_recent = 0;
258 tp->rx_opt.ts_recent_stamp = 0;
259 if (likely(!tp->repair))
260 tp->write_seq = 0;
261 }
262
263 inet->inet_dport = usin->sin_port;
264 sk_daddr_set(sk, daddr);
265
266 inet_csk(sk)->icsk_ext_hdr_len = 0;
267 if (inet_opt)
268 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
269
270 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
271
272
273
274
275
276
277 tcp_set_state(sk, TCP_SYN_SENT);
278 err = inet_hash_connect(tcp_death_row, sk);
279 if (err)
280 goto failure;
281
282 sk_set_txhash(sk);
283
284 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
285 inet->inet_sport, inet->inet_dport, sk);
286 if (IS_ERR(rt)) {
287 err = PTR_ERR(rt);
288 rt = NULL;
289 goto failure;
290 }
291
292 sk->sk_gso_type = SKB_GSO_TCPV4;
293 sk_setup_caps(sk, &rt->dst);
294 rt = NULL;
295
296 if (likely(!tp->repair)) {
297 if (!tp->write_seq)
298 tp->write_seq = secure_tcp_seq(inet->inet_saddr,
299 inet->inet_daddr,
300 inet->inet_sport,
301 usin->sin_port);
302 tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
303 inet->inet_saddr,
304 inet->inet_daddr);
305 }
306
307 inet->inet_id = tp->write_seq ^ jiffies;
308
309 if (tcp_fastopen_defer_connect(sk, &err))
310 return err;
311 if (err)
312 goto failure;
313
314 err = tcp_connect(sk);
315
316 if (err)
317 goto failure;
318
319 return 0;
320
321failure:
322
323
324
325
326 tcp_set_state(sk, TCP_CLOSE);
327 ip_rt_put(rt);
328 sk->sk_route_caps = 0;
329 inet->inet_dport = 0;
330 return err;
331}
332EXPORT_SYMBOL(tcp_v4_connect);
333
334
335
336
337
338
339void tcp_v4_mtu_reduced(struct sock *sk)
340{
341 struct inet_sock *inet = inet_sk(sk);
342 struct dst_entry *dst;
343 u32 mtu;
344
345 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
346 return;
347 mtu = tcp_sk(sk)->mtu_info;
348 dst = inet_csk_update_pmtu(sk, mtu);
349 if (!dst)
350 return;
351
352
353
354
355 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
356 sk->sk_err_soft = EMSGSIZE;
357
358 mtu = dst_mtu(dst);
359
360 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
361 ip_sk_accept_pmtu(sk) &&
362 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
363 tcp_sync_mss(sk, mtu);
364
365
366
367
368
369
370 tcp_simple_retransmit(sk);
371 }
372}
373EXPORT_SYMBOL(tcp_v4_mtu_reduced);
374
375static void do_redirect(struct sk_buff *skb, struct sock *sk)
376{
377 struct dst_entry *dst = __sk_dst_check(sk, 0);
378
379 if (dst)
380 dst->ops->redirect(dst, sk, skb);
381}
382
383
384
385void tcp_req_err(struct sock *sk, u32 seq, bool abort)
386{
387 struct request_sock *req = inet_reqsk(sk);
388 struct net *net = sock_net(sk);
389
390
391
392
393 if (seq != tcp_rsk(req)->snt_isn) {
394 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
395 } else if (abort) {
396
397
398
399
400
401
402 inet_csk_reqsk_queue_drop(req->rsk_listener, req);
403 tcp_listendrop(req->rsk_listener);
404 }
405 reqsk_put(req);
406}
407EXPORT_SYMBOL(tcp_req_err);
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
426{
427 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
428 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
429 struct inet_connection_sock *icsk;
430 struct tcp_sock *tp;
431 struct inet_sock *inet;
432 const int type = icmp_hdr(icmp_skb)->type;
433 const int code = icmp_hdr(icmp_skb)->code;
434 struct sock *sk;
435 struct sk_buff *skb;
436 struct request_sock *fastopen;
437 u32 seq, snd_una;
438 s32 remaining;
439 u32 delta_us;
440 int err;
441 struct net *net = dev_net(icmp_skb->dev);
442
443 sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
444 th->dest, iph->saddr, ntohs(th->source),
445 inet_iif(icmp_skb), 0);
446 if (!sk) {
447 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
448 return;
449 }
450 if (sk->sk_state == TCP_TIME_WAIT) {
451 inet_twsk_put(inet_twsk(sk));
452 return;
453 }
454 seq = ntohl(th->seq);
455 if (sk->sk_state == TCP_NEW_SYN_RECV)
456 return tcp_req_err(sk, seq,
457 type == ICMP_PARAMETERPROB ||
458 type == ICMP_TIME_EXCEEDED ||
459 (type == ICMP_DEST_UNREACH &&
460 (code == ICMP_NET_UNREACH ||
461 code == ICMP_HOST_UNREACH)));
462
463 bh_lock_sock(sk);
464
465
466
467
468
469 if (sock_owned_by_user(sk)) {
470 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
471 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
472 }
473 if (sk->sk_state == TCP_CLOSE)
474 goto out;
475
476 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
477 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
478 goto out;
479 }
480
481 icsk = inet_csk(sk);
482 tp = tcp_sk(sk);
483
484 fastopen = tp->fastopen_rsk;
485 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
486 if (sk->sk_state != TCP_LISTEN &&
487 !between(seq, snd_una, tp->snd_nxt)) {
488 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
489 goto out;
490 }
491
492 switch (type) {
493 case ICMP_REDIRECT:
494 if (!sock_owned_by_user(sk))
495 do_redirect(icmp_skb, sk);
496 goto out;
497 case ICMP_SOURCE_QUENCH:
498
499 goto out;
500 case ICMP_PARAMETERPROB:
501 err = EPROTO;
502 break;
503 case ICMP_DEST_UNREACH:
504 if (code > NR_ICMP_UNREACH)
505 goto out;
506
507 if (code == ICMP_FRAG_NEEDED) {
508
509
510
511
512 if (sk->sk_state == TCP_LISTEN)
513 goto out;
514
515 tp->mtu_info = info;
516 if (!sock_owned_by_user(sk)) {
517 tcp_v4_mtu_reduced(sk);
518 } else {
519 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
520 sock_hold(sk);
521 }
522 goto out;
523 }
524
525 err = icmp_err_convert[code].errno;
526
527
528 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
529 break;
530 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
531 !icsk->icsk_backoff || fastopen)
532 break;
533
534 if (sock_owned_by_user(sk))
535 break;
536
537 icsk->icsk_backoff--;
538 icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
539 TCP_TIMEOUT_INIT;
540 icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
541
542 skb = tcp_rtx_queue_head(sk);
543 BUG_ON(!skb);
544
545 tcp_mstamp_refresh(tp);
546 delta_us = (u32)(tp->tcp_mstamp - skb->skb_mstamp);
547 remaining = icsk->icsk_rto -
548 usecs_to_jiffies(delta_us);
549
550 if (remaining > 0) {
551 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
552 remaining, TCP_RTO_MAX);
553 } else {
554
555
556 tcp_retransmit_timer(sk);
557 }
558
559 break;
560 case ICMP_TIME_EXCEEDED:
561 err = EHOSTUNREACH;
562 break;
563 default:
564 goto out;
565 }
566
567 switch (sk->sk_state) {
568 case TCP_SYN_SENT:
569 case TCP_SYN_RECV:
570
571
572
573 if (fastopen && !fastopen->sk)
574 break;
575
576 if (!sock_owned_by_user(sk)) {
577 sk->sk_err = err;
578
579 sk->sk_error_report(sk);
580
581 tcp_done(sk);
582 } else {
583 sk->sk_err_soft = err;
584 }
585 goto out;
586 }
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604 inet = inet_sk(sk);
605 if (!sock_owned_by_user(sk) && inet->recverr) {
606 sk->sk_err = err;
607 sk->sk_error_report(sk);
608 } else {
609 sk->sk_err_soft = err;
610 }
611
612out:
613 bh_unlock_sock(sk);
614 sock_put(sk);
615}
616
617void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
618{
619 struct tcphdr *th = tcp_hdr(skb);
620
621 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
622 skb->csum_start = skb_transport_header(skb) - skb->head;
623 skb->csum_offset = offsetof(struct tcphdr, check);
624}
625
626
627void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
628{
629 const struct inet_sock *inet = inet_sk(sk);
630
631 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
632}
633EXPORT_SYMBOL(tcp_v4_send_check);
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
649{
650 const struct tcphdr *th = tcp_hdr(skb);
651 struct {
652 struct tcphdr th;
653#ifdef CONFIG_TCP_MD5SIG
654 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
655#endif
656 } rep;
657 struct ip_reply_arg arg;
658#ifdef CONFIG_TCP_MD5SIG
659 struct tcp_md5sig_key *key = NULL;
660 const __u8 *hash_location = NULL;
661 unsigned char newhash[16];
662 int genhash;
663 struct sock *sk1 = NULL;
664#endif
665 struct net *net;
666 struct sock *ctl_sk;
667
668
669 if (th->rst)
670 return;
671
672
673
674
675 if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
676 return;
677
678
679 memset(&rep, 0, sizeof(rep));
680 rep.th.dest = th->source;
681 rep.th.source = th->dest;
682 rep.th.doff = sizeof(struct tcphdr) / 4;
683 rep.th.rst = 1;
684
685 if (th->ack) {
686 rep.th.seq = th->ack_seq;
687 } else {
688 rep.th.ack = 1;
689 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
690 skb->len - (th->doff << 2));
691 }
692
693 memset(&arg, 0, sizeof(arg));
694 arg.iov[0].iov_base = (unsigned char *)&rep;
695 arg.iov[0].iov_len = sizeof(rep.th);
696
697 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
698#ifdef CONFIG_TCP_MD5SIG
699 rcu_read_lock();
700 hash_location = tcp_parse_md5sig_option(th);
701 if (sk && sk_fullsock(sk)) {
702 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
703 &ip_hdr(skb)->saddr, AF_INET);
704 } else if (hash_location) {
705
706
707
708
709
710
711
712 sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
713 ip_hdr(skb)->saddr,
714 th->source, ip_hdr(skb)->daddr,
715 ntohs(th->source), inet_iif(skb),
716 tcp_v4_sdif(skb));
717
718 if (!sk1)
719 goto out;
720
721 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
722 &ip_hdr(skb)->saddr, AF_INET);
723 if (!key)
724 goto out;
725
726
727 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
728 if (genhash || memcmp(hash_location, newhash, 16) != 0)
729 goto out;
730
731 }
732
733 if (key) {
734 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
735 (TCPOPT_NOP << 16) |
736 (TCPOPT_MD5SIG << 8) |
737 TCPOLEN_MD5SIG);
738
739 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
740 rep.th.doff = arg.iov[0].iov_len / 4;
741
742 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
743 key, ip_hdr(skb)->saddr,
744 ip_hdr(skb)->daddr, &rep.th);
745 }
746#endif
747 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
748 ip_hdr(skb)->saddr,
749 arg.iov[0].iov_len, IPPROTO_TCP, 0);
750 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
751 arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
752
753
754
755
756
757 if (sk) {
758 arg.bound_dev_if = sk->sk_bound_dev_if;
759 if (sk_fullsock(sk))
760 trace_tcp_send_reset(sk, skb);
761 }
762
763 BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
764 offsetof(struct inet_timewait_sock, tw_bound_dev_if));
765
766 arg.tos = ip_hdr(skb)->tos;
767 arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
768 local_bh_disable();
769 ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk);
770 if (sk)
771 ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
772 inet_twsk(sk)->tw_mark : sk->sk_mark;
773 ip_send_unicast_reply(ctl_sk,
774 skb, &TCP_SKB_CB(skb)->header.h4.opt,
775 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
776 &arg, arg.iov[0].iov_len);
777
778 ctl_sk->sk_mark = 0;
779 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
780 __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
781 local_bh_enable();
782
783#ifdef CONFIG_TCP_MD5SIG
784out:
785 rcu_read_unlock();
786#endif
787}
788
789
790
791
792
793static void tcp_v4_send_ack(const struct sock *sk,
794 struct sk_buff *skb, u32 seq, u32 ack,
795 u32 win, u32 tsval, u32 tsecr, int oif,
796 struct tcp_md5sig_key *key,
797 int reply_flags, u8 tos)
798{
799 const struct tcphdr *th = tcp_hdr(skb);
800 struct {
801 struct tcphdr th;
802 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
803#ifdef CONFIG_TCP_MD5SIG
804 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
805#endif
806 ];
807 } rep;
808 struct net *net = sock_net(sk);
809 struct ip_reply_arg arg;
810 struct sock *ctl_sk;
811
812 memset(&rep.th, 0, sizeof(struct tcphdr));
813 memset(&arg, 0, sizeof(arg));
814
815 arg.iov[0].iov_base = (unsigned char *)&rep;
816 arg.iov[0].iov_len = sizeof(rep.th);
817 if (tsecr) {
818 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
819 (TCPOPT_TIMESTAMP << 8) |
820 TCPOLEN_TIMESTAMP);
821 rep.opt[1] = htonl(tsval);
822 rep.opt[2] = htonl(tsecr);
823 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
824 }
825
826
827 rep.th.dest = th->source;
828 rep.th.source = th->dest;
829 rep.th.doff = arg.iov[0].iov_len / 4;
830 rep.th.seq = htonl(seq);
831 rep.th.ack_seq = htonl(ack);
832 rep.th.ack = 1;
833 rep.th.window = htons(win);
834
835#ifdef CONFIG_TCP_MD5SIG
836 if (key) {
837 int offset = (tsecr) ? 3 : 0;
838
839 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
840 (TCPOPT_NOP << 16) |
841 (TCPOPT_MD5SIG << 8) |
842 TCPOLEN_MD5SIG);
843 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
844 rep.th.doff = arg.iov[0].iov_len/4;
845
846 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
847 key, ip_hdr(skb)->saddr,
848 ip_hdr(skb)->daddr, &rep.th);
849 }
850#endif
851 arg.flags = reply_flags;
852 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
853 ip_hdr(skb)->saddr,
854 arg.iov[0].iov_len, IPPROTO_TCP, 0);
855 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
856 if (oif)
857 arg.bound_dev_if = oif;
858 arg.tos = tos;
859 arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
860 local_bh_disable();
861 ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk);
862 if (sk)
863 ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
864 inet_twsk(sk)->tw_mark : sk->sk_mark;
865 ip_send_unicast_reply(ctl_sk,
866 skb, &TCP_SKB_CB(skb)->header.h4.opt,
867 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
868 &arg, arg.iov[0].iov_len);
869
870 ctl_sk->sk_mark = 0;
871 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
872 local_bh_enable();
873}
874
875static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
876{
877 struct inet_timewait_sock *tw = inet_twsk(sk);
878 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
879
880 tcp_v4_send_ack(sk, skb,
881 tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
882 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
883 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
884 tcptw->tw_ts_recent,
885 tw->tw_bound_dev_if,
886 tcp_twsk_md5_key(tcptw),
887 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
888 tw->tw_tos
889 );
890
891 inet_twsk_put(tw);
892}
893
894static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
895 struct request_sock *req)
896{
897
898
899
900 u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
901 tcp_sk(sk)->snd_nxt;
902
903
904
905
906
907
908 tcp_v4_send_ack(sk, skb, seq,
909 tcp_rsk(req)->rcv_nxt,
910 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
911 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
912 req->ts_recent,
913 0,
914 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
915 AF_INET),
916 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
917 ip_hdr(skb)->tos);
918}
919
920
921
922
923
924
925static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
926 struct flowi *fl,
927 struct request_sock *req,
928 struct tcp_fastopen_cookie *foc,
929 enum tcp_synack_type synack_type)
930{
931 const struct inet_request_sock *ireq = inet_rsk(req);
932 struct flowi4 fl4;
933 int err = -1;
934 struct sk_buff *skb;
935
936
937 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
938 return -1;
939
940 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
941
942 if (skb) {
943 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
944
945 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
946 ireq->ir_rmt_addr,
947 ireq_opt_deref(ireq));
948 err = net_xmit_eval(err);
949 }
950
951 return err;
952}
953
954
955
956
957static void tcp_v4_reqsk_destructor(struct request_sock *req)
958{
959 kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
960}
961
962#ifdef CONFIG_TCP_MD5SIG
963
964
965
966
967
968
969
970struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
971 const union tcp_md5_addr *addr,
972 int family)
973{
974 const struct tcp_sock *tp = tcp_sk(sk);
975 struct tcp_md5sig_key *key;
976 const struct tcp_md5sig_info *md5sig;
977 __be32 mask;
978 struct tcp_md5sig_key *best_match = NULL;
979 bool match;
980
981
982 md5sig = rcu_dereference_check(tp->md5sig_info,
983 lockdep_sock_is_held(sk));
984 if (!md5sig)
985 return NULL;
986
987 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
988 if (key->family != family)
989 continue;
990
991 if (family == AF_INET) {
992 mask = inet_make_mask(key->prefixlen);
993 match = (key->addr.a4.s_addr & mask) ==
994 (addr->a4.s_addr & mask);
995#if IS_ENABLED(CONFIG_IPV6)
996 } else if (family == AF_INET6) {
997 match = ipv6_prefix_equal(&key->addr.a6, &addr->a6,
998 key->prefixlen);
999#endif
1000 } else {
1001 match = false;
1002 }
1003
1004 if (match && (!best_match ||
1005 key->prefixlen > best_match->prefixlen))
1006 best_match = key;
1007 }
1008 return best_match;
1009}
1010EXPORT_SYMBOL(tcp_md5_do_lookup);
1011
1012static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
1013 const union tcp_md5_addr *addr,
1014 int family, u8 prefixlen)
1015{
1016 const struct tcp_sock *tp = tcp_sk(sk);
1017 struct tcp_md5sig_key *key;
1018 unsigned int size = sizeof(struct in_addr);
1019 const struct tcp_md5sig_info *md5sig;
1020
1021
1022 md5sig = rcu_dereference_check(tp->md5sig_info,
1023 lockdep_sock_is_held(sk));
1024 if (!md5sig)
1025 return NULL;
1026#if IS_ENABLED(CONFIG_IPV6)
1027 if (family == AF_INET6)
1028 size = sizeof(struct in6_addr);
1029#endif
1030 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
1031 if (key->family != family)
1032 continue;
1033 if (!memcmp(&key->addr, addr, size) &&
1034 key->prefixlen == prefixlen)
1035 return key;
1036 }
1037 return NULL;
1038}
1039
1040struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
1041 const struct sock *addr_sk)
1042{
1043 const union tcp_md5_addr *addr;
1044
1045 addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
1046 return tcp_md5_do_lookup(sk, addr, AF_INET);
1047}
1048EXPORT_SYMBOL(tcp_v4_md5_lookup);
1049
1050
1051int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
1052 int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
1053 gfp_t gfp)
1054{
1055
1056 struct tcp_md5sig_key *key;
1057 struct tcp_sock *tp = tcp_sk(sk);
1058 struct tcp_md5sig_info *md5sig;
1059
1060 key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
1061 if (key) {
1062
1063 memcpy(key->key, newkey, newkeylen);
1064 key->keylen = newkeylen;
1065 return 0;
1066 }
1067
1068 md5sig = rcu_dereference_protected(tp->md5sig_info,
1069 lockdep_sock_is_held(sk));
1070 if (!md5sig) {
1071 md5sig = kmalloc(sizeof(*md5sig), gfp);
1072 if (!md5sig)
1073 return -ENOMEM;
1074
1075 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1076 INIT_HLIST_HEAD(&md5sig->head);
1077 rcu_assign_pointer(tp->md5sig_info, md5sig);
1078 }
1079
1080 key = sock_kmalloc(sk, sizeof(*key), gfp);
1081 if (!key)
1082 return -ENOMEM;
1083 if (!tcp_alloc_md5sig_pool()) {
1084 sock_kfree_s(sk, key, sizeof(*key));
1085 return -ENOMEM;
1086 }
1087
1088 memcpy(key->key, newkey, newkeylen);
1089 key->keylen = newkeylen;
1090 key->family = family;
1091 key->prefixlen = prefixlen;
1092 memcpy(&key->addr, addr,
1093 (family == AF_INET6) ? sizeof(struct in6_addr) :
1094 sizeof(struct in_addr));
1095 hlist_add_head_rcu(&key->node, &md5sig->head);
1096 return 0;
1097}
1098EXPORT_SYMBOL(tcp_md5_do_add);
1099
1100int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
1101 u8 prefixlen)
1102{
1103 struct tcp_md5sig_key *key;
1104
1105 key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
1106 if (!key)
1107 return -ENOENT;
1108 hlist_del_rcu(&key->node);
1109 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1110 kfree_rcu(key, rcu);
1111 return 0;
1112}
1113EXPORT_SYMBOL(tcp_md5_do_del);
1114
1115static void tcp_clear_md5_list(struct sock *sk)
1116{
1117 struct tcp_sock *tp = tcp_sk(sk);
1118 struct tcp_md5sig_key *key;
1119 struct hlist_node *n;
1120 struct tcp_md5sig_info *md5sig;
1121
1122 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1123
1124 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
1125 hlist_del_rcu(&key->node);
1126 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1127 kfree_rcu(key, rcu);
1128 }
1129}
1130
1131static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
1132 char __user *optval, int optlen)
1133{
1134 struct tcp_md5sig cmd;
1135 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1136 u8 prefixlen = 32;
1137
1138 if (optlen < sizeof(cmd))
1139 return -EINVAL;
1140
1141 if (copy_from_user(&cmd, optval, sizeof(cmd)))
1142 return -EFAULT;
1143
1144 if (sin->sin_family != AF_INET)
1145 return -EINVAL;
1146
1147 if (optname == TCP_MD5SIG_EXT &&
1148 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
1149 prefixlen = cmd.tcpm_prefixlen;
1150 if (prefixlen > 32)
1151 return -EINVAL;
1152 }
1153
1154 if (!cmd.tcpm_keylen)
1155 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1156 AF_INET, prefixlen);
1157
1158 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1159 return -EINVAL;
1160
1161 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1162 AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen,
1163 GFP_KERNEL);
1164}
1165
1166static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
1167 __be32 daddr, __be32 saddr,
1168 const struct tcphdr *th, int nbytes)
1169{
1170 struct tcp4_pseudohdr *bp;
1171 struct scatterlist sg;
1172 struct tcphdr *_th;
1173
1174 bp = hp->scratch;
1175 bp->saddr = saddr;
1176 bp->daddr = daddr;
1177 bp->pad = 0;
1178 bp->protocol = IPPROTO_TCP;
1179 bp->len = cpu_to_be16(nbytes);
1180
1181 _th = (struct tcphdr *)(bp + 1);
1182 memcpy(_th, th, sizeof(*th));
1183 _th->check = 0;
1184
1185 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
1186 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
1187 sizeof(*bp) + sizeof(*th));
1188 return crypto_ahash_update(hp->md5_req);
1189}
1190
1191static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1192 __be32 daddr, __be32 saddr, const struct tcphdr *th)
1193{
1194 struct tcp_md5sig_pool *hp;
1195 struct ahash_request *req;
1196
1197 hp = tcp_get_md5sig_pool();
1198 if (!hp)
1199 goto clear_hash_noput;
1200 req = hp->md5_req;
1201
1202 if (crypto_ahash_init(req))
1203 goto clear_hash;
1204 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
1205 goto clear_hash;
1206 if (tcp_md5_hash_key(hp, key))
1207 goto clear_hash;
1208 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1209 if (crypto_ahash_final(req))
1210 goto clear_hash;
1211
1212 tcp_put_md5sig_pool();
1213 return 0;
1214
1215clear_hash:
1216 tcp_put_md5sig_pool();
1217clear_hash_noput:
1218 memset(md5_hash, 0, 16);
1219 return 1;
1220}
1221
1222int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1223 const struct sock *sk,
1224 const struct sk_buff *skb)
1225{
1226 struct tcp_md5sig_pool *hp;
1227 struct ahash_request *req;
1228 const struct tcphdr *th = tcp_hdr(skb);
1229 __be32 saddr, daddr;
1230
1231 if (sk) {
1232 saddr = sk->sk_rcv_saddr;
1233 daddr = sk->sk_daddr;
1234 } else {
1235 const struct iphdr *iph = ip_hdr(skb);
1236 saddr = iph->saddr;
1237 daddr = iph->daddr;
1238 }
1239
1240 hp = tcp_get_md5sig_pool();
1241 if (!hp)
1242 goto clear_hash_noput;
1243 req = hp->md5_req;
1244
1245 if (crypto_ahash_init(req))
1246 goto clear_hash;
1247
1248 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
1249 goto clear_hash;
1250 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1251 goto clear_hash;
1252 if (tcp_md5_hash_key(hp, key))
1253 goto clear_hash;
1254 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1255 if (crypto_ahash_final(req))
1256 goto clear_hash;
1257
1258 tcp_put_md5sig_pool();
1259 return 0;
1260
1261clear_hash:
1262 tcp_put_md5sig_pool();
1263clear_hash_noput:
1264 memset(md5_hash, 0, 16);
1265 return 1;
1266}
1267EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1268
1269#endif
1270
1271
1272static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
1273 const struct sk_buff *skb)
1274{
1275#ifdef CONFIG_TCP_MD5SIG
1276
1277
1278
1279
1280
1281
1282
1283
1284 const __u8 *hash_location = NULL;
1285 struct tcp_md5sig_key *hash_expected;
1286 const struct iphdr *iph = ip_hdr(skb);
1287 const struct tcphdr *th = tcp_hdr(skb);
1288 int genhash;
1289 unsigned char newhash[16];
1290
1291 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1292 AF_INET);
1293 hash_location = tcp_parse_md5sig_option(th);
1294
1295
1296 if (!hash_expected && !hash_location)
1297 return false;
1298
1299 if (hash_expected && !hash_location) {
1300 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1301 return true;
1302 }
1303
1304 if (!hash_expected && hash_location) {
1305 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1306 return true;
1307 }
1308
1309
1310
1311
1312 genhash = tcp_v4_md5_hash_skb(newhash,
1313 hash_expected,
1314 NULL, skb);
1315
1316 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1317 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
1318 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1319 &iph->saddr, ntohs(th->source),
1320 &iph->daddr, ntohs(th->dest),
1321 genhash ? " tcp_v4_calc_md5_hash failed"
1322 : "");
1323 return true;
1324 }
1325 return false;
1326#endif
1327 return false;
1328}
1329
1330static void tcp_v4_init_req(struct request_sock *req,
1331 const struct sock *sk_listener,
1332 struct sk_buff *skb)
1333{
1334 struct inet_request_sock *ireq = inet_rsk(req);
1335 struct net *net = sock_net(sk_listener);
1336
1337 sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
1338 sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
1339 RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
1340}
1341
1342static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
1343 struct flowi *fl,
1344 const struct request_sock *req)
1345{
1346 return inet_csk_route_req(sk, &fl->u.ip4, req);
1347}
1348
1349struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1350 .family = PF_INET,
1351 .obj_size = sizeof(struct tcp_request_sock),
1352 .rtx_syn_ack = tcp_rtx_synack,
1353 .send_ack = tcp_v4_reqsk_send_ack,
1354 .destructor = tcp_v4_reqsk_destructor,
1355 .send_reset = tcp_v4_send_reset,
1356 .syn_ack_timeout = tcp_syn_ack_timeout,
1357};
1358
1359static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1360 .mss_clamp = TCP_MSS_DEFAULT,
1361#ifdef CONFIG_TCP_MD5SIG
1362 .req_md5_lookup = tcp_v4_md5_lookup,
1363 .calc_md5_hash = tcp_v4_md5_hash_skb,
1364#endif
1365 .init_req = tcp_v4_init_req,
1366#ifdef CONFIG_SYN_COOKIES
1367 .cookie_init_seq = cookie_v4_init_sequence,
1368#endif
1369 .route_req = tcp_v4_route_req,
1370 .init_seq = tcp_v4_init_seq,
1371 .init_ts_off = tcp_v4_init_ts_off,
1372 .send_synack = tcp_v4_send_synack,
1373};
1374
1375int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1376{
1377
1378 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1379 goto drop;
1380
1381 return tcp_conn_request(&tcp_request_sock_ops,
1382 &tcp_request_sock_ipv4_ops, sk, skb);
1383
1384drop:
1385 tcp_listendrop(sk);
1386 return 0;
1387}
1388EXPORT_SYMBOL(tcp_v4_conn_request);
1389
1390
1391
1392
1393
1394
1395struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1396 struct request_sock *req,
1397 struct dst_entry *dst,
1398 struct request_sock *req_unhash,
1399 bool *own_req)
1400{
1401 struct inet_request_sock *ireq;
1402 struct inet_sock *newinet;
1403 struct tcp_sock *newtp;
1404 struct sock *newsk;
1405#ifdef CONFIG_TCP_MD5SIG
1406 struct tcp_md5sig_key *key;
1407#endif
1408 struct ip_options_rcu *inet_opt;
1409
1410 if (sk_acceptq_is_full(sk))
1411 goto exit_overflow;
1412
1413 newsk = tcp_create_openreq_child(sk, req, skb);
1414 if (!newsk)
1415 goto exit_nonewsk;
1416
1417 newsk->sk_gso_type = SKB_GSO_TCPV4;
1418 inet_sk_rx_dst_set(newsk, skb);
1419
1420 newtp = tcp_sk(newsk);
1421 newinet = inet_sk(newsk);
1422 ireq = inet_rsk(req);
1423 sk_daddr_set(newsk, ireq->ir_rmt_addr);
1424 sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
1425 newsk->sk_bound_dev_if = ireq->ir_iif;
1426 newinet->inet_saddr = ireq->ir_loc_addr;
1427 inet_opt = rcu_dereference(ireq->ireq_opt);
1428 RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
1429 newinet->mc_index = inet_iif(skb);
1430 newinet->mc_ttl = ip_hdr(skb)->ttl;
1431 newinet->rcv_tos = ip_hdr(skb)->tos;
1432 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1433 if (inet_opt)
1434 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1435 newinet->inet_id = newtp->write_seq ^ jiffies;
1436
1437 if (!dst) {
1438 dst = inet_csk_route_child_sock(sk, newsk, req);
1439 if (!dst)
1440 goto put_and_exit;
1441 } else {
1442
1443 }
1444 sk_setup_caps(newsk, dst);
1445
1446 tcp_ca_openreq_child(newsk, dst);
1447
1448 tcp_sync_mss(newsk, dst_mtu(dst));
1449 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1450
1451 tcp_initialize_rcv_mss(newsk);
1452
1453#ifdef CONFIG_TCP_MD5SIG
1454
1455 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1456 AF_INET);
1457 if (key) {
1458
1459
1460
1461
1462
1463
1464 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1465 AF_INET, 32, key->key, key->keylen, GFP_ATOMIC);
1466 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1467 }
1468#endif
1469
1470 if (__inet_inherit_port(sk, newsk) < 0)
1471 goto put_and_exit;
1472 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1473 if (likely(*own_req)) {
1474 tcp_move_syn(newtp, req);
1475 ireq->ireq_opt = NULL;
1476 } else {
1477 newinet->inet_opt = NULL;
1478 }
1479 return newsk;
1480
1481exit_overflow:
1482 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1483exit_nonewsk:
1484 dst_release(dst);
1485exit:
1486 tcp_listendrop(sk);
1487 return NULL;
1488put_and_exit:
1489 newinet->inet_opt = NULL;
1490 inet_csk_prepare_forced_close(newsk);
1491 tcp_done(newsk);
1492 goto exit;
1493}
1494EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1495
1496static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
1497{
1498#ifdef CONFIG_SYN_COOKIES
1499 const struct tcphdr *th = tcp_hdr(skb);
1500
1501 if (!th->syn)
1502 sk = cookie_v4_check(sk, skb);
1503#endif
1504 return sk;
1505}
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1516{
1517 struct sock *rsk;
1518
1519 if (sk->sk_state == TCP_ESTABLISHED) {
1520 struct dst_entry *dst = sk->sk_rx_dst;
1521
1522 sock_rps_save_rxhash(sk, skb);
1523 sk_mark_napi_id(sk, skb);
1524 if (dst) {
1525 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1526 !dst->ops->check(dst, 0)) {
1527 dst_release(dst);
1528 sk->sk_rx_dst = NULL;
1529 }
1530 }
1531 tcp_rcv_established(sk, skb);
1532 return 0;
1533 }
1534
1535 if (tcp_checksum_complete(skb))
1536 goto csum_err;
1537
1538 if (sk->sk_state == TCP_LISTEN) {
1539 struct sock *nsk = tcp_v4_cookie_check(sk, skb);
1540
1541 if (!nsk)
1542 goto discard;
1543 if (nsk != sk) {
1544 if (tcp_child_process(sk, nsk, skb)) {
1545 rsk = nsk;
1546 goto reset;
1547 }
1548 return 0;
1549 }
1550 } else
1551 sock_rps_save_rxhash(sk, skb);
1552
1553 if (tcp_rcv_state_process(sk, skb)) {
1554 rsk = sk;
1555 goto reset;
1556 }
1557 return 0;
1558
1559reset:
1560 tcp_v4_send_reset(rsk, skb);
1561discard:
1562 kfree_skb(skb);
1563
1564
1565
1566
1567
1568 return 0;
1569
1570csum_err:
1571 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1572 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1573 goto discard;
1574}
1575EXPORT_SYMBOL(tcp_v4_do_rcv);
1576
1577int tcp_v4_early_demux(struct sk_buff *skb)
1578{
1579 const struct iphdr *iph;
1580 const struct tcphdr *th;
1581 struct sock *sk;
1582
1583 if (skb->pkt_type != PACKET_HOST)
1584 return 0;
1585
1586 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1587 return 0;
1588
1589 iph = ip_hdr(skb);
1590 th = tcp_hdr(skb);
1591
1592 if (th->doff < sizeof(struct tcphdr) / 4)
1593 return 0;
1594
1595 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1596 iph->saddr, th->source,
1597 iph->daddr, ntohs(th->dest),
1598 skb->skb_iif, inet_sdif(skb));
1599 if (sk) {
1600 skb->sk = sk;
1601 skb->destructor = sock_edemux;
1602 if (sk_fullsock(sk)) {
1603 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1604
1605 if (dst)
1606 dst = dst_check(dst, 0);
1607 if (dst &&
1608 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1609 skb_dst_set_noref(skb, dst);
1610 }
1611 }
1612 return 0;
1613}
1614
1615bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
1616{
1617 u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
1618
1619
1620
1621
1622
1623 limit += 64*1024;
1624
1625
1626
1627
1628
1629
1630
1631 skb_condense(skb);
1632
1633 if (unlikely(sk_add_backlog(sk, skb, limit))) {
1634 bh_unlock_sock(sk);
1635 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
1636 return true;
1637 }
1638 return false;
1639}
1640EXPORT_SYMBOL(tcp_add_backlog);
1641
1642int tcp_filter(struct sock *sk, struct sk_buff *skb)
1643{
1644 struct tcphdr *th = (struct tcphdr *)skb->data;
1645 unsigned int eaten = skb->len;
1646 int err;
1647
1648 err = sk_filter_trim_cap(sk, skb, th->doff * 4);
1649 if (!err) {
1650 eaten -= skb->len;
1651 TCP_SKB_CB(skb)->end_seq -= eaten;
1652 }
1653 return err;
1654}
1655EXPORT_SYMBOL(tcp_filter);
1656
1657static void tcp_v4_restore_cb(struct sk_buff *skb)
1658{
1659 memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4,
1660 sizeof(struct inet_skb_parm));
1661}
1662
1663static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
1664 const struct tcphdr *th)
1665{
1666
1667
1668
1669 memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1670 sizeof(struct inet_skb_parm));
1671 barrier();
1672
1673 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1674 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1675 skb->len - th->doff * 4);
1676 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1677 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1678 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1679 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1680 TCP_SKB_CB(skb)->sacked = 0;
1681 TCP_SKB_CB(skb)->has_rxtstamp =
1682 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1683}
1684
1685
1686
1687
1688
1689int tcp_v4_rcv(struct sk_buff *skb)
1690{
1691 struct net *net = dev_net(skb->dev);
1692 int sdif = inet_sdif(skb);
1693 const struct iphdr *iph;
1694 const struct tcphdr *th;
1695 bool refcounted;
1696 struct sock *sk;
1697 int ret;
1698
1699 if (skb->pkt_type != PACKET_HOST)
1700 goto discard_it;
1701
1702
1703 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1704
1705 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1706 goto discard_it;
1707
1708 th = (const struct tcphdr *)skb->data;
1709
1710 if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
1711 goto bad_packet;
1712 if (!pskb_may_pull(skb, th->doff * 4))
1713 goto discard_it;
1714
1715
1716
1717
1718
1719
1720 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
1721 goto csum_error;
1722
1723 th = (const struct tcphdr *)skb->data;
1724 iph = ip_hdr(skb);
1725lookup:
1726 sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
1727 th->dest, sdif, &refcounted);
1728 if (!sk)
1729 goto no_tcp_socket;
1730
1731process:
1732 if (sk->sk_state == TCP_TIME_WAIT)
1733 goto do_time_wait;
1734
1735 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1736 struct request_sock *req = inet_reqsk(sk);
1737 bool req_stolen = false;
1738 struct sock *nsk;
1739
1740 sk = req->rsk_listener;
1741 if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
1742 sk_drops_add(sk, skb);
1743 reqsk_put(req);
1744 goto discard_it;
1745 }
1746 if (tcp_checksum_complete(skb)) {
1747 reqsk_put(req);
1748 goto csum_error;
1749 }
1750 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1751 inet_csk_reqsk_queue_drop_and_put(sk, req);
1752 goto lookup;
1753 }
1754
1755
1756
1757 sock_hold(sk);
1758 refcounted = true;
1759 nsk = NULL;
1760 if (!tcp_filter(sk, skb)) {
1761 th = (const struct tcphdr *)skb->data;
1762 iph = ip_hdr(skb);
1763 tcp_v4_fill_cb(skb, iph, th);
1764 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1765 }
1766 if (!nsk) {
1767 reqsk_put(req);
1768 if (req_stolen) {
1769
1770
1771
1772
1773
1774 tcp_v4_restore_cb(skb);
1775 sock_put(sk);
1776 goto lookup;
1777 }
1778 goto discard_and_relse;
1779 }
1780 if (nsk == sk) {
1781 reqsk_put(req);
1782 tcp_v4_restore_cb(skb);
1783 } else if (tcp_child_process(sk, nsk, skb)) {
1784 tcp_v4_send_reset(nsk, skb);
1785 goto discard_and_relse;
1786 } else {
1787 sock_put(sk);
1788 return 0;
1789 }
1790 }
1791 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1792 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1793 goto discard_and_relse;
1794 }
1795
1796 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1797 goto discard_and_relse;
1798
1799 if (tcp_v4_inbound_md5_hash(sk, skb))
1800 goto discard_and_relse;
1801
1802 nf_reset(skb);
1803
1804 if (tcp_filter(sk, skb))
1805 goto discard_and_relse;
1806 th = (const struct tcphdr *)skb->data;
1807 iph = ip_hdr(skb);
1808 tcp_v4_fill_cb(skb, iph, th);
1809
1810 skb->dev = NULL;
1811
1812 if (sk->sk_state == TCP_LISTEN) {
1813 ret = tcp_v4_do_rcv(sk, skb);
1814 goto put_and_return;
1815 }
1816
1817 sk_incoming_cpu_update(sk);
1818
1819 bh_lock_sock_nested(sk);
1820 tcp_segs_in(tcp_sk(sk), skb);
1821 ret = 0;
1822 if (!sock_owned_by_user(sk)) {
1823 ret = tcp_v4_do_rcv(sk, skb);
1824 } else if (tcp_add_backlog(sk, skb)) {
1825 goto discard_and_relse;
1826 }
1827 bh_unlock_sock(sk);
1828
1829put_and_return:
1830 if (refcounted)
1831 sock_put(sk);
1832
1833 return ret;
1834
1835no_tcp_socket:
1836 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1837 goto discard_it;
1838
1839 tcp_v4_fill_cb(skb, iph, th);
1840
1841 if (tcp_checksum_complete(skb)) {
1842csum_error:
1843 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1844bad_packet:
1845 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1846 } else {
1847 tcp_v4_send_reset(NULL, skb);
1848 }
1849
1850discard_it:
1851
1852 kfree_skb(skb);
1853 return 0;
1854
1855discard_and_relse:
1856 sk_drops_add(sk, skb);
1857 if (refcounted)
1858 sock_put(sk);
1859 goto discard_it;
1860
1861do_time_wait:
1862 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1863 inet_twsk_put(inet_twsk(sk));
1864 goto discard_it;
1865 }
1866
1867 tcp_v4_fill_cb(skb, iph, th);
1868
1869 if (tcp_checksum_complete(skb)) {
1870 inet_twsk_put(inet_twsk(sk));
1871 goto csum_error;
1872 }
1873 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1874 case TCP_TW_SYN: {
1875 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1876 &tcp_hashinfo, skb,
1877 __tcp_hdrlen(th),
1878 iph->saddr, th->source,
1879 iph->daddr, th->dest,
1880 inet_iif(skb),
1881 sdif);
1882 if (sk2) {
1883 inet_twsk_deschedule_put(inet_twsk(sk));
1884 sk = sk2;
1885 tcp_v4_restore_cb(skb);
1886 refcounted = false;
1887 goto process;
1888 }
1889 }
1890
1891
1892 case TCP_TW_ACK:
1893 tcp_v4_timewait_ack(sk, skb);
1894 break;
1895 case TCP_TW_RST:
1896 tcp_v4_send_reset(sk, skb);
1897 inet_twsk_deschedule_put(inet_twsk(sk));
1898 goto discard_it;
1899 case TCP_TW_SUCCESS:;
1900 }
1901 goto discard_it;
1902}
1903
1904static struct timewait_sock_ops tcp_timewait_sock_ops = {
1905 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1906 .twsk_unique = tcp_twsk_unique,
1907 .twsk_destructor= tcp_twsk_destructor,
1908};
1909
1910void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
1911{
1912 struct dst_entry *dst = skb_dst(skb);
1913
1914 if (dst && dst_hold_safe(dst)) {
1915 sk->sk_rx_dst = dst;
1916 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1917 }
1918}
1919EXPORT_SYMBOL(inet_sk_rx_dst_set);
1920
1921const struct inet_connection_sock_af_ops ipv4_specific = {
1922 .queue_xmit = ip_queue_xmit,
1923 .send_check = tcp_v4_send_check,
1924 .rebuild_header = inet_sk_rebuild_header,
1925 .sk_rx_dst_set = inet_sk_rx_dst_set,
1926 .conn_request = tcp_v4_conn_request,
1927 .syn_recv_sock = tcp_v4_syn_recv_sock,
1928 .net_header_len = sizeof(struct iphdr),
1929 .setsockopt = ip_setsockopt,
1930 .getsockopt = ip_getsockopt,
1931 .addr2sockaddr = inet_csk_addr2sockaddr,
1932 .sockaddr_len = sizeof(struct sockaddr_in),
1933#ifdef CONFIG_COMPAT
1934 .compat_setsockopt = compat_ip_setsockopt,
1935 .compat_getsockopt = compat_ip_getsockopt,
1936#endif
1937 .mtu_reduced = tcp_v4_mtu_reduced,
1938};
1939EXPORT_SYMBOL(ipv4_specific);
1940
1941#ifdef CONFIG_TCP_MD5SIG
1942static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1943 .md5_lookup = tcp_v4_md5_lookup,
1944 .calc_md5_hash = tcp_v4_md5_hash_skb,
1945 .md5_parse = tcp_v4_parse_md5_keys,
1946};
1947#endif
1948
1949
1950
1951
1952static int tcp_v4_init_sock(struct sock *sk)
1953{
1954 struct inet_connection_sock *icsk = inet_csk(sk);
1955
1956 tcp_init_sock(sk);
1957
1958 icsk->icsk_af_ops = &ipv4_specific;
1959
1960#ifdef CONFIG_TCP_MD5SIG
1961 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1962#endif
1963
1964 return 0;
1965}
1966
1967void tcp_v4_destroy_sock(struct sock *sk)
1968{
1969 struct tcp_sock *tp = tcp_sk(sk);
1970
1971 trace_tcp_destroy_sock(sk);
1972
1973 tcp_clear_xmit_timers(sk);
1974
1975 tcp_cleanup_congestion_control(sk);
1976
1977 tcp_cleanup_ulp(sk);
1978
1979
1980 tcp_write_queue_purge(sk);
1981
1982
1983 tcp_fastopen_active_disable_ofo_check(sk);
1984
1985
1986 skb_rbtree_purge(&tp->out_of_order_queue);
1987
1988#ifdef CONFIG_TCP_MD5SIG
1989
1990 if (tp->md5sig_info) {
1991 tcp_clear_md5_list(sk);
1992 kfree_rcu(rcu_dereference_protected(tp->md5sig_info, 1), rcu);
1993 tp->md5sig_info = NULL;
1994 }
1995#endif
1996
1997
1998 if (inet_csk(sk)->icsk_bind_hash)
1999 inet_put_port(sk);
2000
2001 BUG_ON(tp->fastopen_rsk);
2002
2003
2004 tcp_free_fastopen_req(tp);
2005 tcp_fastopen_destroy_cipher(sk);
2006 tcp_saved_syn_free(tp);
2007
2008 sk_sockets_allocated_dec(sk);
2009}
2010EXPORT_SYMBOL(tcp_v4_destroy_sock);
2011
2012#ifdef CONFIG_PROC_FS
2013
2014
2015
2016
2017
2018
2019
2020static void *listening_get_next(struct seq_file *seq, void *cur)
2021{
2022 struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
2023 struct tcp_iter_state *st = seq->private;
2024 struct net *net = seq_file_net(seq);
2025 struct inet_listen_hashbucket *ilb;
2026 struct sock *sk = cur;
2027
2028 if (!sk) {
2029get_head:
2030 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2031 spin_lock(&ilb->lock);
2032 sk = sk_head(&ilb->head);
2033 st->offset = 0;
2034 goto get_sk;
2035 }
2036 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2037 ++st->num;
2038 ++st->offset;
2039
2040 sk = sk_next(sk);
2041get_sk:
2042 sk_for_each_from(sk) {
2043 if (!net_eq(sock_net(sk), net))
2044 continue;
2045 if (sk->sk_family == afinfo->family)
2046 return sk;
2047 }
2048 spin_unlock(&ilb->lock);
2049 st->offset = 0;
2050 if (++st->bucket < INET_LHTABLE_SIZE)
2051 goto get_head;
2052 return NULL;
2053}
2054
2055static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2056{
2057 struct tcp_iter_state *st = seq->private;
2058 void *rc;
2059
2060 st->bucket = 0;
2061 st->offset = 0;
2062 rc = listening_get_next(seq, NULL);
2063
2064 while (rc && *pos) {
2065 rc = listening_get_next(seq, rc);
2066 --*pos;
2067 }
2068 return rc;
2069}
2070
2071static inline bool empty_bucket(const struct tcp_iter_state *st)
2072{
2073 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
2074}
2075
2076
2077
2078
2079
2080static void *established_get_first(struct seq_file *seq)
2081{
2082 struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
2083 struct tcp_iter_state *st = seq->private;
2084 struct net *net = seq_file_net(seq);
2085 void *rc = NULL;
2086
2087 st->offset = 0;
2088 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2089 struct sock *sk;
2090 struct hlist_nulls_node *node;
2091 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2092
2093
2094 if (empty_bucket(st))
2095 continue;
2096
2097 spin_lock_bh(lock);
2098 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2099 if (sk->sk_family != afinfo->family ||
2100 !net_eq(sock_net(sk), net)) {
2101 continue;
2102 }
2103 rc = sk;
2104 goto out;
2105 }
2106 spin_unlock_bh(lock);
2107 }
2108out:
2109 return rc;
2110}
2111
2112static void *established_get_next(struct seq_file *seq, void *cur)
2113{
2114 struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
2115 struct sock *sk = cur;
2116 struct hlist_nulls_node *node;
2117 struct tcp_iter_state *st = seq->private;
2118 struct net *net = seq_file_net(seq);
2119
2120 ++st->num;
2121 ++st->offset;
2122
2123 sk = sk_nulls_next(sk);
2124
2125 sk_nulls_for_each_from(sk, node) {
2126 if (sk->sk_family == afinfo->family &&
2127 net_eq(sock_net(sk), net))
2128 return sk;
2129 }
2130
2131 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2132 ++st->bucket;
2133 return established_get_first(seq);
2134}
2135
2136static void *established_get_idx(struct seq_file *seq, loff_t pos)
2137{
2138 struct tcp_iter_state *st = seq->private;
2139 void *rc;
2140
2141 st->bucket = 0;
2142 rc = established_get_first(seq);
2143
2144 while (rc && pos) {
2145 rc = established_get_next(seq, rc);
2146 --pos;
2147 }
2148 return rc;
2149}
2150
2151static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2152{
2153 void *rc;
2154 struct tcp_iter_state *st = seq->private;
2155
2156 st->state = TCP_SEQ_STATE_LISTENING;
2157 rc = listening_get_idx(seq, &pos);
2158
2159 if (!rc) {
2160 st->state = TCP_SEQ_STATE_ESTABLISHED;
2161 rc = established_get_idx(seq, pos);
2162 }
2163
2164 return rc;
2165}
2166
2167static void *tcp_seek_last_pos(struct seq_file *seq)
2168{
2169 struct tcp_iter_state *st = seq->private;
2170 int offset = st->offset;
2171 int orig_num = st->num;
2172 void *rc = NULL;
2173
2174 switch (st->state) {
2175 case TCP_SEQ_STATE_LISTENING:
2176 if (st->bucket >= INET_LHTABLE_SIZE)
2177 break;
2178 st->state = TCP_SEQ_STATE_LISTENING;
2179 rc = listening_get_next(seq, NULL);
2180 while (offset-- && rc)
2181 rc = listening_get_next(seq, rc);
2182 if (rc)
2183 break;
2184 st->bucket = 0;
2185 st->state = TCP_SEQ_STATE_ESTABLISHED;
2186
2187 case TCP_SEQ_STATE_ESTABLISHED:
2188 if (st->bucket > tcp_hashinfo.ehash_mask)
2189 break;
2190 rc = established_get_first(seq);
2191 while (offset-- && rc)
2192 rc = established_get_next(seq, rc);
2193 }
2194
2195 st->num = orig_num;
2196
2197 return rc;
2198}
2199
2200void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2201{
2202 struct tcp_iter_state *st = seq->private;
2203 void *rc;
2204
2205 if (*pos && *pos == st->last_pos) {
2206 rc = tcp_seek_last_pos(seq);
2207 if (rc)
2208 goto out;
2209 }
2210
2211 st->state = TCP_SEQ_STATE_LISTENING;
2212 st->num = 0;
2213 st->bucket = 0;
2214 st->offset = 0;
2215 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2216
2217out:
2218 st->last_pos = *pos;
2219 return rc;
2220}
2221EXPORT_SYMBOL(tcp_seq_start);
2222
2223void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2224{
2225 struct tcp_iter_state *st = seq->private;
2226 void *rc = NULL;
2227
2228 if (v == SEQ_START_TOKEN) {
2229 rc = tcp_get_idx(seq, 0);
2230 goto out;
2231 }
2232
2233 switch (st->state) {
2234 case TCP_SEQ_STATE_LISTENING:
2235 rc = listening_get_next(seq, v);
2236 if (!rc) {
2237 st->state = TCP_SEQ_STATE_ESTABLISHED;
2238 st->bucket = 0;
2239 st->offset = 0;
2240 rc = established_get_first(seq);
2241 }
2242 break;
2243 case TCP_SEQ_STATE_ESTABLISHED:
2244 rc = established_get_next(seq, v);
2245 break;
2246 }
2247out:
2248 ++*pos;
2249 st->last_pos = *pos;
2250 return rc;
2251}
2252EXPORT_SYMBOL(tcp_seq_next);
2253
2254void tcp_seq_stop(struct seq_file *seq, void *v)
2255{
2256 struct tcp_iter_state *st = seq->private;
2257
2258 switch (st->state) {
2259 case TCP_SEQ_STATE_LISTENING:
2260 if (v != SEQ_START_TOKEN)
2261 spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
2262 break;
2263 case TCP_SEQ_STATE_ESTABLISHED:
2264 if (v)
2265 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2266 break;
2267 }
2268}
2269EXPORT_SYMBOL(tcp_seq_stop);
2270
2271static void get_openreq4(const struct request_sock *req,
2272 struct seq_file *f, int i)
2273{
2274 const struct inet_request_sock *ireq = inet_rsk(req);
2275 long delta = req->rsk_timer.expires - jiffies;
2276
2277 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2278 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
2279 i,
2280 ireq->ir_loc_addr,
2281 ireq->ir_num,
2282 ireq->ir_rmt_addr,
2283 ntohs(ireq->ir_rmt_port),
2284 TCP_SYN_RECV,
2285 0, 0,
2286 1,
2287 jiffies_delta_to_clock_t(delta),
2288 req->num_timeout,
2289 from_kuid_munged(seq_user_ns(f),
2290 sock_i_uid(req->rsk_listener)),
2291 0,
2292 0,
2293 0,
2294 req);
2295}
2296
2297static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2298{
2299 int timer_active;
2300 unsigned long timer_expires;
2301 const struct tcp_sock *tp = tcp_sk(sk);
2302 const struct inet_connection_sock *icsk = inet_csk(sk);
2303 const struct inet_sock *inet = inet_sk(sk);
2304 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2305 __be32 dest = inet->inet_daddr;
2306 __be32 src = inet->inet_rcv_saddr;
2307 __u16 destp = ntohs(inet->inet_dport);
2308 __u16 srcp = ntohs(inet->inet_sport);
2309 int rx_queue;
2310 int state;
2311
2312 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2313 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2314 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2315 timer_active = 1;
2316 timer_expires = icsk->icsk_timeout;
2317 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2318 timer_active = 4;
2319 timer_expires = icsk->icsk_timeout;
2320 } else if (timer_pending(&sk->sk_timer)) {
2321 timer_active = 2;
2322 timer_expires = sk->sk_timer.expires;
2323 } else {
2324 timer_active = 0;
2325 timer_expires = jiffies;
2326 }
2327
2328 state = inet_sk_state_load(sk);
2329 if (state == TCP_LISTEN)
2330 rx_queue = sk->sk_ack_backlog;
2331 else
2332
2333
2334
2335 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2336
2337 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2338 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
2339 i, src, srcp, dest, destp, state,
2340 tp->write_seq - tp->snd_una,
2341 rx_queue,
2342 timer_active,
2343 jiffies_delta_to_clock_t(timer_expires - jiffies),
2344 icsk->icsk_retransmits,
2345 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
2346 icsk->icsk_probes_out,
2347 sock_i_ino(sk),
2348 refcount_read(&sk->sk_refcnt), sk,
2349 jiffies_to_clock_t(icsk->icsk_rto),
2350 jiffies_to_clock_t(icsk->icsk_ack.ato),
2351 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2352 tp->snd_cwnd,
2353 state == TCP_LISTEN ?
2354 fastopenq->max_qlen :
2355 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
2356}
2357
2358static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2359 struct seq_file *f, int i)
2360{
2361 long delta = tw->tw_timer.expires - jiffies;
2362 __be32 dest, src;
2363 __u16 destp, srcp;
2364
2365 dest = tw->tw_daddr;
2366 src = tw->tw_rcv_saddr;
2367 destp = ntohs(tw->tw_dport);
2368 srcp = ntohs(tw->tw_sport);
2369
2370 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2371 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
2372 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2373 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2374 refcount_read(&tw->tw_refcnt), tw);
2375}
2376
2377#define TMPSZ 150
2378
2379static int tcp4_seq_show(struct seq_file *seq, void *v)
2380{
2381 struct tcp_iter_state *st;
2382 struct sock *sk = v;
2383
2384 seq_setwidth(seq, TMPSZ - 1);
2385 if (v == SEQ_START_TOKEN) {
2386 seq_puts(seq, " sl local_address rem_address st tx_queue "
2387 "rx_queue tr tm->when retrnsmt uid timeout "
2388 "inode");
2389 goto out;
2390 }
2391 st = seq->private;
2392
2393 if (sk->sk_state == TCP_TIME_WAIT)
2394 get_timewait4_sock(v, seq, st->num);
2395 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2396 get_openreq4(v, seq, st->num);
2397 else
2398 get_tcp4_sock(v, seq, st->num);
2399out:
2400 seq_pad(seq, '\n');
2401 return 0;
2402}
2403
2404static const struct seq_operations tcp4_seq_ops = {
2405 .show = tcp4_seq_show,
2406 .start = tcp_seq_start,
2407 .next = tcp_seq_next,
2408 .stop = tcp_seq_stop,
2409};
2410
2411static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2412 .family = AF_INET,
2413};
2414
2415static int __net_init tcp4_proc_init_net(struct net *net)
2416{
2417 if (!proc_create_net_data("tcp", 0444, net->proc_net, &tcp4_seq_ops,
2418 sizeof(struct tcp_iter_state), &tcp4_seq_afinfo))
2419 return -ENOMEM;
2420 return 0;
2421}
2422
2423static void __net_exit tcp4_proc_exit_net(struct net *net)
2424{
2425 remove_proc_entry("tcp", net->proc_net);
2426}
2427
2428static struct pernet_operations tcp4_net_ops = {
2429 .init = tcp4_proc_init_net,
2430 .exit = tcp4_proc_exit_net,
2431};
2432
2433int __init tcp4_proc_init(void)
2434{
2435 return register_pernet_subsys(&tcp4_net_ops);
2436}
2437
2438void tcp4_proc_exit(void)
2439{
2440 unregister_pernet_subsys(&tcp4_net_ops);
2441}
2442#endif
2443
2444struct proto tcp_prot = {
2445 .name = "TCP",
2446 .owner = THIS_MODULE,
2447 .close = tcp_close,
2448 .pre_connect = tcp_v4_pre_connect,
2449 .connect = tcp_v4_connect,
2450 .disconnect = tcp_disconnect,
2451 .accept = inet_csk_accept,
2452 .ioctl = tcp_ioctl,
2453 .init = tcp_v4_init_sock,
2454 .destroy = tcp_v4_destroy_sock,
2455 .shutdown = tcp_shutdown,
2456 .setsockopt = tcp_setsockopt,
2457 .getsockopt = tcp_getsockopt,
2458 .keepalive = tcp_set_keepalive,
2459 .recvmsg = tcp_recvmsg,
2460 .sendmsg = tcp_sendmsg,
2461 .sendpage = tcp_sendpage,
2462 .backlog_rcv = tcp_v4_do_rcv,
2463 .release_cb = tcp_release_cb,
2464 .hash = inet_hash,
2465 .unhash = inet_unhash,
2466 .get_port = inet_csk_get_port,
2467 .enter_memory_pressure = tcp_enter_memory_pressure,
2468 .leave_memory_pressure = tcp_leave_memory_pressure,
2469 .stream_memory_free = tcp_stream_memory_free,
2470 .sockets_allocated = &tcp_sockets_allocated,
2471 .orphan_count = &tcp_orphan_count,
2472 .memory_allocated = &tcp_memory_allocated,
2473 .memory_pressure = &tcp_memory_pressure,
2474 .sysctl_mem = sysctl_tcp_mem,
2475 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2476 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2477 .max_header = MAX_TCP_HEADER,
2478 .obj_size = sizeof(struct tcp_sock),
2479 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2480 .twsk_prot = &tcp_timewait_sock_ops,
2481 .rsk_prot = &tcp_request_sock_ops,
2482 .h.hashinfo = &tcp_hashinfo,
2483 .no_autobind = true,
2484#ifdef CONFIG_COMPAT
2485 .compat_setsockopt = compat_tcp_setsockopt,
2486 .compat_getsockopt = compat_tcp_getsockopt,
2487#endif
2488 .diag_destroy = tcp_abort,
2489};
2490EXPORT_SYMBOL(tcp_prot);
2491
2492static void __net_exit tcp_sk_exit(struct net *net)
2493{
2494 int cpu;
2495
2496 module_put(net->ipv4.tcp_congestion_control->owner);
2497
2498 for_each_possible_cpu(cpu)
2499 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2500 free_percpu(net->ipv4.tcp_sk);
2501}
2502
2503static int __net_init tcp_sk_init(struct net *net)
2504{
2505 int res, cpu, cnt;
2506
2507 net->ipv4.tcp_sk = alloc_percpu(struct sock *);
2508 if (!net->ipv4.tcp_sk)
2509 return -ENOMEM;
2510
2511 for_each_possible_cpu(cpu) {
2512 struct sock *sk;
2513
2514 res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
2515 IPPROTO_TCP, net);
2516 if (res)
2517 goto fail;
2518 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2519 *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2520 }
2521
2522 net->ipv4.sysctl_tcp_ecn = 2;
2523 net->ipv4.sysctl_tcp_ecn_fallback = 1;
2524
2525 net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
2526 net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
2527 net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
2528
2529 net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
2530 net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
2531 net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
2532
2533 net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
2534 net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
2535 net->ipv4.sysctl_tcp_syncookies = 1;
2536 net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
2537 net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
2538 net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
2539 net->ipv4.sysctl_tcp_orphan_retries = 0;
2540 net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
2541 net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
2542 net->ipv4.sysctl_tcp_tw_reuse = 2;
2543
2544 cnt = tcp_hashinfo.ehash_mask + 1;
2545 net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2;
2546 net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
2547
2548 net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256);
2549 net->ipv4.sysctl_tcp_sack = 1;
2550 net->ipv4.sysctl_tcp_window_scaling = 1;
2551 net->ipv4.sysctl_tcp_timestamps = 1;
2552 net->ipv4.sysctl_tcp_early_retrans = 3;
2553 net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION;
2554 net->ipv4.sysctl_tcp_slow_start_after_idle = 1;
2555 net->ipv4.sysctl_tcp_retrans_collapse = 1;
2556 net->ipv4.sysctl_tcp_max_reordering = 300;
2557 net->ipv4.sysctl_tcp_dsack = 1;
2558 net->ipv4.sysctl_tcp_app_win = 31;
2559 net->ipv4.sysctl_tcp_adv_win_scale = 1;
2560 net->ipv4.sysctl_tcp_frto = 2;
2561 net->ipv4.sysctl_tcp_moderate_rcvbuf = 1;
2562
2563
2564
2565
2566 net->ipv4.sysctl_tcp_tso_win_divisor = 3;
2567
2568 net->ipv4.sysctl_tcp_limit_output_bytes = 262144;
2569
2570 net->ipv4.sysctl_tcp_challenge_ack_limit = 1000;
2571 net->ipv4.sysctl_tcp_min_tso_segs = 2;
2572 net->ipv4.sysctl_tcp_min_rtt_wlen = 300;
2573 net->ipv4.sysctl_tcp_autocorking = 1;
2574 net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2;
2575 net->ipv4.sysctl_tcp_pacing_ss_ratio = 200;
2576 net->ipv4.sysctl_tcp_pacing_ca_ratio = 120;
2577 if (net != &init_net) {
2578 memcpy(net->ipv4.sysctl_tcp_rmem,
2579 init_net.ipv4.sysctl_tcp_rmem,
2580 sizeof(init_net.ipv4.sysctl_tcp_rmem));
2581 memcpy(net->ipv4.sysctl_tcp_wmem,
2582 init_net.ipv4.sysctl_tcp_wmem,
2583 sizeof(init_net.ipv4.sysctl_tcp_wmem));
2584 }
2585 net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC;
2586 net->ipv4.sysctl_tcp_comp_sack_nr = 44;
2587 net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
2588 spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
2589 net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
2590 atomic_set(&net->ipv4.tfo_active_disable_times, 0);
2591
2592
2593 if (!net_eq(net, &init_net) &&
2594 try_module_get(init_net.ipv4.tcp_congestion_control->owner))
2595 net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control;
2596 else
2597 net->ipv4.tcp_congestion_control = &tcp_reno;
2598
2599 return 0;
2600fail:
2601 tcp_sk_exit(net);
2602
2603 return res;
2604}
2605
2606static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2607{
2608 struct net *net;
2609
2610 inet_twsk_purge(&tcp_hashinfo, AF_INET);
2611
2612 list_for_each_entry(net, net_exit_list, exit_list)
2613 tcp_fastopen_ctx_destroy(net);
2614}
2615
2616static struct pernet_operations __net_initdata tcp_sk_ops = {
2617 .init = tcp_sk_init,
2618 .exit = tcp_sk_exit,
2619 .exit_batch = tcp_sk_exit_batch,
2620};
2621
2622void __init tcp_v4_init(void)
2623{
2624 if (register_pernet_subsys(&tcp_sk_ops))
2625 panic("Failed to create the TCP control socket.\n");
2626}
2627