1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53#define pr_fmt(fmt) "TCP: " fmt
54
55#include <linux/bottom_half.h>
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
64#include <linux/slab.h>
65
66#include <net/net_namespace.h>
67#include <net/icmp.h>
68#include <net/inet_hashtables.h>
69#include <net/tcp.h>
70#include <net/transp_v6.h>
71#include <net/ipv6.h>
72#include <net/inet_common.h>
73#include <net/timewait_sock.h>
74#include <net/xfrm.h>
75#include <net/secure_seq.h>
76#include <net/busy_poll.h>
77
78#include <linux/inet.h>
79#include <linux/ipv6.h>
80#include <linux/stddef.h>
81#include <linux/proc_fs.h>
82#include <linux/seq_file.h>
83#include <linux/inetdevice.h>
84
85#include <crypto/hash.h>
86#include <linux/scatterlist.h>
87
88int sysctl_tcp_low_latency __read_mostly;
89
90#ifdef CONFIG_TCP_MD5SIG
91static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
92 __be32 daddr, __be32 saddr, const struct tcphdr *th);
93#endif
94
95struct inet_hashinfo tcp_hashinfo;
96EXPORT_SYMBOL(tcp_hashinfo);
97
98static u32 tcp_v4_init_seq(const struct sk_buff *skb)
99{
100 return secure_tcp_seq(ip_hdr(skb)->daddr,
101 ip_hdr(skb)->saddr,
102 tcp_hdr(skb)->dest,
103 tcp_hdr(skb)->source);
104}
105
106static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
107{
108 return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
109}
110
111int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
112{
113 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
114 struct tcp_sock *tp = tcp_sk(sk);
115
116
117
118
119
120
121
122
123
124
125
126
127 if (tcptw->tw_ts_recent_stamp &&
128 (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse &&
129 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
130 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
131 if (tp->write_seq == 0)
132 tp->write_seq = 1;
133 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
134 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
135 sock_hold(sktw);
136 return 1;
137 }
138
139 return 0;
140}
141EXPORT_SYMBOL_GPL(tcp_twsk_unique);
142
143
144int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
145{
146 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
147 struct inet_sock *inet = inet_sk(sk);
148 struct tcp_sock *tp = tcp_sk(sk);
149 __be16 orig_sport, orig_dport;
150 __be32 daddr, nexthop;
151 struct flowi4 *fl4;
152 struct rtable *rt;
153 int err;
154 struct ip_options_rcu *inet_opt;
155 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
156
157 if (addr_len < sizeof(struct sockaddr_in))
158 return -EINVAL;
159
160 if (usin->sin_family != AF_INET)
161 return -EAFNOSUPPORT;
162
163 nexthop = daddr = usin->sin_addr.s_addr;
164 inet_opt = rcu_dereference_protected(inet->inet_opt,
165 lockdep_sock_is_held(sk));
166 if (inet_opt && inet_opt->opt.srr) {
167 if (!daddr)
168 return -EINVAL;
169 nexthop = inet_opt->opt.faddr;
170 }
171
172 orig_sport = inet->inet_sport;
173 orig_dport = usin->sin_port;
174 fl4 = &inet->cork.fl.u.ip4;
175 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
176 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
177 IPPROTO_TCP,
178 orig_sport, orig_dport, sk);
179 if (IS_ERR(rt)) {
180 err = PTR_ERR(rt);
181 if (err == -ENETUNREACH)
182 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
183 return err;
184 }
185
186 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
187 ip_rt_put(rt);
188 return -ENETUNREACH;
189 }
190
191 if (!inet_opt || !inet_opt->opt.srr)
192 daddr = fl4->daddr;
193
194 if (!inet->inet_saddr)
195 inet->inet_saddr = fl4->saddr;
196 sk_rcv_saddr_set(sk, inet->inet_saddr);
197
198 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
199
200 tp->rx_opt.ts_recent = 0;
201 tp->rx_opt.ts_recent_stamp = 0;
202 if (likely(!tp->repair))
203 tp->write_seq = 0;
204 }
205
206 inet->inet_dport = usin->sin_port;
207 sk_daddr_set(sk, daddr);
208
209 inet_csk(sk)->icsk_ext_hdr_len = 0;
210 if (inet_opt)
211 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
212
213 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
214
215
216
217
218
219
220 tcp_set_state(sk, TCP_SYN_SENT);
221 err = inet_hash_connect(tcp_death_row, sk);
222 if (err)
223 goto failure;
224
225 sk_set_txhash(sk);
226
227 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
228 inet->inet_sport, inet->inet_dport, sk);
229 if (IS_ERR(rt)) {
230 err = PTR_ERR(rt);
231 rt = NULL;
232 goto failure;
233 }
234
235 sk->sk_gso_type = SKB_GSO_TCPV4;
236 sk_setup_caps(sk, &rt->dst);
237 rt = NULL;
238
239 if (likely(!tp->repair)) {
240 if (!tp->write_seq)
241 tp->write_seq = secure_tcp_seq(inet->inet_saddr,
242 inet->inet_daddr,
243 inet->inet_sport,
244 usin->sin_port);
245 tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
246 inet->inet_saddr,
247 inet->inet_daddr);
248 }
249
250 inet->inet_id = tp->write_seq ^ jiffies;
251
252 if (tcp_fastopen_defer_connect(sk, &err))
253 return err;
254 if (err)
255 goto failure;
256
257 err = tcp_connect(sk);
258
259 if (err)
260 goto failure;
261
262 return 0;
263
264failure:
265
266
267
268
269 tcp_set_state(sk, TCP_CLOSE);
270 ip_rt_put(rt);
271 sk->sk_route_caps = 0;
272 inet->inet_dport = 0;
273 return err;
274}
275EXPORT_SYMBOL(tcp_v4_connect);
276
277
278
279
280
281
282void tcp_v4_mtu_reduced(struct sock *sk)
283{
284 struct inet_sock *inet = inet_sk(sk);
285 struct dst_entry *dst;
286 u32 mtu;
287
288 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
289 return;
290 mtu = tcp_sk(sk)->mtu_info;
291 dst = inet_csk_update_pmtu(sk, mtu);
292 if (!dst)
293 return;
294
295
296
297
298 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
299 sk->sk_err_soft = EMSGSIZE;
300
301 mtu = dst_mtu(dst);
302
303 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
304 ip_sk_accept_pmtu(sk) &&
305 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
306 tcp_sync_mss(sk, mtu);
307
308
309
310
311
312
313 tcp_simple_retransmit(sk);
314 }
315}
316EXPORT_SYMBOL(tcp_v4_mtu_reduced);
317
318static void do_redirect(struct sk_buff *skb, struct sock *sk)
319{
320 struct dst_entry *dst = __sk_dst_check(sk, 0);
321
322 if (dst)
323 dst->ops->redirect(dst, sk, skb);
324}
325
326
327
328void tcp_req_err(struct sock *sk, u32 seq, bool abort)
329{
330 struct request_sock *req = inet_reqsk(sk);
331 struct net *net = sock_net(sk);
332
333
334
335
336 if (seq != tcp_rsk(req)->snt_isn) {
337 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
338 } else if (abort) {
339
340
341
342
343
344
345 inet_csk_reqsk_queue_drop(req->rsk_listener, req);
346 tcp_listendrop(req->rsk_listener);
347 }
348 reqsk_put(req);
349}
350EXPORT_SYMBOL(tcp_req_err);
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
369{
370 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
371 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
372 struct inet_connection_sock *icsk;
373 struct tcp_sock *tp;
374 struct inet_sock *inet;
375 const int type = icmp_hdr(icmp_skb)->type;
376 const int code = icmp_hdr(icmp_skb)->code;
377 struct sock *sk;
378 struct sk_buff *skb;
379 struct request_sock *fastopen;
380 u32 seq, snd_una;
381 s32 remaining;
382 u32 delta_us;
383 int err;
384 struct net *net = dev_net(icmp_skb->dev);
385
386 sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
387 th->dest, iph->saddr, ntohs(th->source),
388 inet_iif(icmp_skb));
389 if (!sk) {
390 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
391 return;
392 }
393 if (sk->sk_state == TCP_TIME_WAIT) {
394 inet_twsk_put(inet_twsk(sk));
395 return;
396 }
397 seq = ntohl(th->seq);
398 if (sk->sk_state == TCP_NEW_SYN_RECV)
399 return tcp_req_err(sk, seq,
400 type == ICMP_PARAMETERPROB ||
401 type == ICMP_TIME_EXCEEDED ||
402 (type == ICMP_DEST_UNREACH &&
403 (code == ICMP_NET_UNREACH ||
404 code == ICMP_HOST_UNREACH)));
405
406 bh_lock_sock(sk);
407
408
409
410
411
412 if (sock_owned_by_user(sk)) {
413 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
414 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
415 }
416 if (sk->sk_state == TCP_CLOSE)
417 goto out;
418
419 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
420 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
421 goto out;
422 }
423
424 icsk = inet_csk(sk);
425 tp = tcp_sk(sk);
426
427 fastopen = tp->fastopen_rsk;
428 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
429 if (sk->sk_state != TCP_LISTEN &&
430 !between(seq, snd_una, tp->snd_nxt)) {
431 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
432 goto out;
433 }
434
435 switch (type) {
436 case ICMP_REDIRECT:
437 if (!sock_owned_by_user(sk))
438 do_redirect(icmp_skb, sk);
439 goto out;
440 case ICMP_SOURCE_QUENCH:
441
442 goto out;
443 case ICMP_PARAMETERPROB:
444 err = EPROTO;
445 break;
446 case ICMP_DEST_UNREACH:
447 if (code > NR_ICMP_UNREACH)
448 goto out;
449
450 if (code == ICMP_FRAG_NEEDED) {
451
452
453
454
455 if (sk->sk_state == TCP_LISTEN)
456 goto out;
457
458 tp->mtu_info = info;
459 if (!sock_owned_by_user(sk)) {
460 tcp_v4_mtu_reduced(sk);
461 } else {
462 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
463 sock_hold(sk);
464 }
465 goto out;
466 }
467
468 err = icmp_err_convert[code].errno;
469
470
471 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
472 break;
473 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
474 !icsk->icsk_backoff || fastopen)
475 break;
476
477 if (sock_owned_by_user(sk))
478 break;
479
480 icsk->icsk_backoff--;
481 icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
482 TCP_TIMEOUT_INIT;
483 icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
484
485 skb = tcp_write_queue_head(sk);
486 BUG_ON(!skb);
487
488 tcp_mstamp_refresh(tp);
489 delta_us = (u32)(tp->tcp_mstamp - skb->skb_mstamp);
490 remaining = icsk->icsk_rto -
491 usecs_to_jiffies(delta_us);
492
493 if (remaining > 0) {
494 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
495 remaining, TCP_RTO_MAX);
496 } else {
497
498
499 tcp_retransmit_timer(sk);
500 }
501
502 break;
503 case ICMP_TIME_EXCEEDED:
504 err = EHOSTUNREACH;
505 break;
506 default:
507 goto out;
508 }
509
510 switch (sk->sk_state) {
511 case TCP_SYN_SENT:
512 case TCP_SYN_RECV:
513
514
515
516 if (fastopen && !fastopen->sk)
517 break;
518
519 if (!sock_owned_by_user(sk)) {
520 sk->sk_err = err;
521
522 sk->sk_error_report(sk);
523
524 tcp_done(sk);
525 } else {
526 sk->sk_err_soft = err;
527 }
528 goto out;
529 }
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547 inet = inet_sk(sk);
548 if (!sock_owned_by_user(sk) && inet->recverr) {
549 sk->sk_err = err;
550 sk->sk_error_report(sk);
551 } else {
552 sk->sk_err_soft = err;
553 }
554
555out:
556 bh_unlock_sock(sk);
557 sock_put(sk);
558}
559
560void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
561{
562 struct tcphdr *th = tcp_hdr(skb);
563
564 if (skb->ip_summed == CHECKSUM_PARTIAL) {
565 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
566 skb->csum_start = skb_transport_header(skb) - skb->head;
567 skb->csum_offset = offsetof(struct tcphdr, check);
568 } else {
569 th->check = tcp_v4_check(skb->len, saddr, daddr,
570 csum_partial(th,
571 th->doff << 2,
572 skb->csum));
573 }
574}
575
576
577void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
578{
579 const struct inet_sock *inet = inet_sk(sk);
580
581 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
582}
583EXPORT_SYMBOL(tcp_v4_send_check);
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
599{
600 const struct tcphdr *th = tcp_hdr(skb);
601 struct {
602 struct tcphdr th;
603#ifdef CONFIG_TCP_MD5SIG
604 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
605#endif
606 } rep;
607 struct ip_reply_arg arg;
608#ifdef CONFIG_TCP_MD5SIG
609 struct tcp_md5sig_key *key = NULL;
610 const __u8 *hash_location = NULL;
611 unsigned char newhash[16];
612 int genhash;
613 struct sock *sk1 = NULL;
614#endif
615 struct net *net;
616
617
618 if (th->rst)
619 return;
620
621
622
623
624 if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
625 return;
626
627
628 memset(&rep, 0, sizeof(rep));
629 rep.th.dest = th->source;
630 rep.th.source = th->dest;
631 rep.th.doff = sizeof(struct tcphdr) / 4;
632 rep.th.rst = 1;
633
634 if (th->ack) {
635 rep.th.seq = th->ack_seq;
636 } else {
637 rep.th.ack = 1;
638 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
639 skb->len - (th->doff << 2));
640 }
641
642 memset(&arg, 0, sizeof(arg));
643 arg.iov[0].iov_base = (unsigned char *)&rep;
644 arg.iov[0].iov_len = sizeof(rep.th);
645
646 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
647#ifdef CONFIG_TCP_MD5SIG
648 rcu_read_lock();
649 hash_location = tcp_parse_md5sig_option(th);
650 if (sk && sk_fullsock(sk)) {
651 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
652 &ip_hdr(skb)->saddr, AF_INET);
653 } else if (hash_location) {
654
655
656
657
658
659
660
661 sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
662 ip_hdr(skb)->saddr,
663 th->source, ip_hdr(skb)->daddr,
664 ntohs(th->source), inet_iif(skb));
665
666 if (!sk1)
667 goto out;
668
669 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
670 &ip_hdr(skb)->saddr, AF_INET);
671 if (!key)
672 goto out;
673
674
675 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
676 if (genhash || memcmp(hash_location, newhash, 16) != 0)
677 goto out;
678
679 }
680
681 if (key) {
682 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
683 (TCPOPT_NOP << 16) |
684 (TCPOPT_MD5SIG << 8) |
685 TCPOLEN_MD5SIG);
686
687 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
688 rep.th.doff = arg.iov[0].iov_len / 4;
689
690 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
691 key, ip_hdr(skb)->saddr,
692 ip_hdr(skb)->daddr, &rep.th);
693 }
694#endif
695 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
696 ip_hdr(skb)->saddr,
697 arg.iov[0].iov_len, IPPROTO_TCP, 0);
698 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
699 arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
700
701
702
703
704
705 if (sk)
706 arg.bound_dev_if = sk->sk_bound_dev_if;
707
708 BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
709 offsetof(struct inet_timewait_sock, tw_bound_dev_if));
710
711 arg.tos = ip_hdr(skb)->tos;
712 arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
713 local_bh_disable();
714 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
715 skb, &TCP_SKB_CB(skb)->header.h4.opt,
716 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
717 &arg, arg.iov[0].iov_len);
718
719 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
720 __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
721 local_bh_enable();
722
723#ifdef CONFIG_TCP_MD5SIG
724out:
725 rcu_read_unlock();
726#endif
727}
728
729
730
731
732
733static void tcp_v4_send_ack(const struct sock *sk,
734 struct sk_buff *skb, u32 seq, u32 ack,
735 u32 win, u32 tsval, u32 tsecr, int oif,
736 struct tcp_md5sig_key *key,
737 int reply_flags, u8 tos)
738{
739 const struct tcphdr *th = tcp_hdr(skb);
740 struct {
741 struct tcphdr th;
742 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
743#ifdef CONFIG_TCP_MD5SIG
744 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
745#endif
746 ];
747 } rep;
748 struct net *net = sock_net(sk);
749 struct ip_reply_arg arg;
750
751 memset(&rep.th, 0, sizeof(struct tcphdr));
752 memset(&arg, 0, sizeof(arg));
753
754 arg.iov[0].iov_base = (unsigned char *)&rep;
755 arg.iov[0].iov_len = sizeof(rep.th);
756 if (tsecr) {
757 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
758 (TCPOPT_TIMESTAMP << 8) |
759 TCPOLEN_TIMESTAMP);
760 rep.opt[1] = htonl(tsval);
761 rep.opt[2] = htonl(tsecr);
762 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
763 }
764
765
766 rep.th.dest = th->source;
767 rep.th.source = th->dest;
768 rep.th.doff = arg.iov[0].iov_len / 4;
769 rep.th.seq = htonl(seq);
770 rep.th.ack_seq = htonl(ack);
771 rep.th.ack = 1;
772 rep.th.window = htons(win);
773
774#ifdef CONFIG_TCP_MD5SIG
775 if (key) {
776 int offset = (tsecr) ? 3 : 0;
777
778 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
779 (TCPOPT_NOP << 16) |
780 (TCPOPT_MD5SIG << 8) |
781 TCPOLEN_MD5SIG);
782 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
783 rep.th.doff = arg.iov[0].iov_len/4;
784
785 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
786 key, ip_hdr(skb)->saddr,
787 ip_hdr(skb)->daddr, &rep.th);
788 }
789#endif
790 arg.flags = reply_flags;
791 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
792 ip_hdr(skb)->saddr,
793 arg.iov[0].iov_len, IPPROTO_TCP, 0);
794 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
795 if (oif)
796 arg.bound_dev_if = oif;
797 arg.tos = tos;
798 arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
799 local_bh_disable();
800 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
801 skb, &TCP_SKB_CB(skb)->header.h4.opt,
802 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
803 &arg, arg.iov[0].iov_len);
804
805 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
806 local_bh_enable();
807}
808
809static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
810{
811 struct inet_timewait_sock *tw = inet_twsk(sk);
812 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
813
814 tcp_v4_send_ack(sk, skb,
815 tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
816 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
817 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
818 tcptw->tw_ts_recent,
819 tw->tw_bound_dev_if,
820 tcp_twsk_md5_key(tcptw),
821 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
822 tw->tw_tos
823 );
824
825 inet_twsk_put(tw);
826}
827
828static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
829 struct request_sock *req)
830{
831
832
833
834 u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
835 tcp_sk(sk)->snd_nxt;
836
837
838
839
840
841
842 tcp_v4_send_ack(sk, skb, seq,
843 tcp_rsk(req)->rcv_nxt,
844 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
845 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
846 req->ts_recent,
847 0,
848 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
849 AF_INET),
850 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
851 ip_hdr(skb)->tos);
852}
853
854
855
856
857
858
859static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
860 struct flowi *fl,
861 struct request_sock *req,
862 struct tcp_fastopen_cookie *foc,
863 enum tcp_synack_type synack_type)
864{
865 const struct inet_request_sock *ireq = inet_rsk(req);
866 struct flowi4 fl4;
867 int err = -1;
868 struct sk_buff *skb;
869
870
871 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
872 return -1;
873
874 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
875
876 if (skb) {
877 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
878
879 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
880 ireq->ir_rmt_addr,
881 ireq->opt);
882 err = net_xmit_eval(err);
883 }
884
885 return err;
886}
887
888
889
890
891static void tcp_v4_reqsk_destructor(struct request_sock *req)
892{
893 kfree(inet_rsk(req)->opt);
894}
895
896#ifdef CONFIG_TCP_MD5SIG
897
898
899
900
901
902
903
904struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
905 const union tcp_md5_addr *addr,
906 int family)
907{
908 const struct tcp_sock *tp = tcp_sk(sk);
909 struct tcp_md5sig_key *key;
910 const struct tcp_md5sig_info *md5sig;
911 __be32 mask;
912 struct tcp_md5sig_key *best_match = NULL;
913 bool match;
914
915
916 md5sig = rcu_dereference_check(tp->md5sig_info,
917 lockdep_sock_is_held(sk));
918 if (!md5sig)
919 return NULL;
920
921 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
922 if (key->family != family)
923 continue;
924
925 if (family == AF_INET) {
926 mask = inet_make_mask(key->prefixlen);
927 match = (key->addr.a4.s_addr & mask) ==
928 (addr->a4.s_addr & mask);
929#if IS_ENABLED(CONFIG_IPV6)
930 } else if (family == AF_INET6) {
931 match = ipv6_prefix_equal(&key->addr.a6, &addr->a6,
932 key->prefixlen);
933#endif
934 } else {
935 match = false;
936 }
937
938 if (match && (!best_match ||
939 key->prefixlen > best_match->prefixlen))
940 best_match = key;
941 }
942 return best_match;
943}
944EXPORT_SYMBOL(tcp_md5_do_lookup);
945
946static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
947 const union tcp_md5_addr *addr,
948 int family, u8 prefixlen)
949{
950 const struct tcp_sock *tp = tcp_sk(sk);
951 struct tcp_md5sig_key *key;
952 unsigned int size = sizeof(struct in_addr);
953 const struct tcp_md5sig_info *md5sig;
954
955
956 md5sig = rcu_dereference_check(tp->md5sig_info,
957 lockdep_sock_is_held(sk));
958 if (!md5sig)
959 return NULL;
960#if IS_ENABLED(CONFIG_IPV6)
961 if (family == AF_INET6)
962 size = sizeof(struct in6_addr);
963#endif
964 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
965 if (key->family != family)
966 continue;
967 if (!memcmp(&key->addr, addr, size) &&
968 key->prefixlen == prefixlen)
969 return key;
970 }
971 return NULL;
972}
973
974struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
975 const struct sock *addr_sk)
976{
977 const union tcp_md5_addr *addr;
978
979 addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
980 return tcp_md5_do_lookup(sk, addr, AF_INET);
981}
982EXPORT_SYMBOL(tcp_v4_md5_lookup);
983
984
985int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
986 int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
987 gfp_t gfp)
988{
989
990 struct tcp_md5sig_key *key;
991 struct tcp_sock *tp = tcp_sk(sk);
992 struct tcp_md5sig_info *md5sig;
993
994 key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
995 if (key) {
996
997 memcpy(key->key, newkey, newkeylen);
998 key->keylen = newkeylen;
999 return 0;
1000 }
1001
1002 md5sig = rcu_dereference_protected(tp->md5sig_info,
1003 lockdep_sock_is_held(sk));
1004 if (!md5sig) {
1005 md5sig = kmalloc(sizeof(*md5sig), gfp);
1006 if (!md5sig)
1007 return -ENOMEM;
1008
1009 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1010 INIT_HLIST_HEAD(&md5sig->head);
1011 rcu_assign_pointer(tp->md5sig_info, md5sig);
1012 }
1013
1014 key = sock_kmalloc(sk, sizeof(*key), gfp);
1015 if (!key)
1016 return -ENOMEM;
1017 if (!tcp_alloc_md5sig_pool()) {
1018 sock_kfree_s(sk, key, sizeof(*key));
1019 return -ENOMEM;
1020 }
1021
1022 memcpy(key->key, newkey, newkeylen);
1023 key->keylen = newkeylen;
1024 key->family = family;
1025 key->prefixlen = prefixlen;
1026 memcpy(&key->addr, addr,
1027 (family == AF_INET6) ? sizeof(struct in6_addr) :
1028 sizeof(struct in_addr));
1029 hlist_add_head_rcu(&key->node, &md5sig->head);
1030 return 0;
1031}
1032EXPORT_SYMBOL(tcp_md5_do_add);
1033
1034int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
1035 u8 prefixlen)
1036{
1037 struct tcp_md5sig_key *key;
1038
1039 key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
1040 if (!key)
1041 return -ENOENT;
1042 hlist_del_rcu(&key->node);
1043 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1044 kfree_rcu(key, rcu);
1045 return 0;
1046}
1047EXPORT_SYMBOL(tcp_md5_do_del);
1048
1049static void tcp_clear_md5_list(struct sock *sk)
1050{
1051 struct tcp_sock *tp = tcp_sk(sk);
1052 struct tcp_md5sig_key *key;
1053 struct hlist_node *n;
1054 struct tcp_md5sig_info *md5sig;
1055
1056 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1057
1058 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
1059 hlist_del_rcu(&key->node);
1060 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1061 kfree_rcu(key, rcu);
1062 }
1063}
1064
1065static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
1066 char __user *optval, int optlen)
1067{
1068 struct tcp_md5sig cmd;
1069 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1070 u8 prefixlen = 32;
1071
1072 if (optlen < sizeof(cmd))
1073 return -EINVAL;
1074
1075 if (copy_from_user(&cmd, optval, sizeof(cmd)))
1076 return -EFAULT;
1077
1078 if (sin->sin_family != AF_INET)
1079 return -EINVAL;
1080
1081 if (optname == TCP_MD5SIG_EXT &&
1082 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
1083 prefixlen = cmd.tcpm_prefixlen;
1084 if (prefixlen > 32)
1085 return -EINVAL;
1086 }
1087
1088 if (!cmd.tcpm_keylen)
1089 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1090 AF_INET, prefixlen);
1091
1092 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1093 return -EINVAL;
1094
1095 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1096 AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen,
1097 GFP_KERNEL);
1098}
1099
1100static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
1101 __be32 daddr, __be32 saddr,
1102 const struct tcphdr *th, int nbytes)
1103{
1104 struct tcp4_pseudohdr *bp;
1105 struct scatterlist sg;
1106 struct tcphdr *_th;
1107
1108 bp = hp->scratch;
1109 bp->saddr = saddr;
1110 bp->daddr = daddr;
1111 bp->pad = 0;
1112 bp->protocol = IPPROTO_TCP;
1113 bp->len = cpu_to_be16(nbytes);
1114
1115 _th = (struct tcphdr *)(bp + 1);
1116 memcpy(_th, th, sizeof(*th));
1117 _th->check = 0;
1118
1119 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
1120 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
1121 sizeof(*bp) + sizeof(*th));
1122 return crypto_ahash_update(hp->md5_req);
1123}
1124
1125static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1126 __be32 daddr, __be32 saddr, const struct tcphdr *th)
1127{
1128 struct tcp_md5sig_pool *hp;
1129 struct ahash_request *req;
1130
1131 hp = tcp_get_md5sig_pool();
1132 if (!hp)
1133 goto clear_hash_noput;
1134 req = hp->md5_req;
1135
1136 if (crypto_ahash_init(req))
1137 goto clear_hash;
1138 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
1139 goto clear_hash;
1140 if (tcp_md5_hash_key(hp, key))
1141 goto clear_hash;
1142 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1143 if (crypto_ahash_final(req))
1144 goto clear_hash;
1145
1146 tcp_put_md5sig_pool();
1147 return 0;
1148
1149clear_hash:
1150 tcp_put_md5sig_pool();
1151clear_hash_noput:
1152 memset(md5_hash, 0, 16);
1153 return 1;
1154}
1155
1156int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1157 const struct sock *sk,
1158 const struct sk_buff *skb)
1159{
1160 struct tcp_md5sig_pool *hp;
1161 struct ahash_request *req;
1162 const struct tcphdr *th = tcp_hdr(skb);
1163 __be32 saddr, daddr;
1164
1165 if (sk) {
1166 saddr = sk->sk_rcv_saddr;
1167 daddr = sk->sk_daddr;
1168 } else {
1169 const struct iphdr *iph = ip_hdr(skb);
1170 saddr = iph->saddr;
1171 daddr = iph->daddr;
1172 }
1173
1174 hp = tcp_get_md5sig_pool();
1175 if (!hp)
1176 goto clear_hash_noput;
1177 req = hp->md5_req;
1178
1179 if (crypto_ahash_init(req))
1180 goto clear_hash;
1181
1182 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
1183 goto clear_hash;
1184 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1185 goto clear_hash;
1186 if (tcp_md5_hash_key(hp, key))
1187 goto clear_hash;
1188 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1189 if (crypto_ahash_final(req))
1190 goto clear_hash;
1191
1192 tcp_put_md5sig_pool();
1193 return 0;
1194
1195clear_hash:
1196 tcp_put_md5sig_pool();
1197clear_hash_noput:
1198 memset(md5_hash, 0, 16);
1199 return 1;
1200}
1201EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1202
1203#endif
1204
1205
1206static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
1207 const struct sk_buff *skb)
1208{
1209#ifdef CONFIG_TCP_MD5SIG
1210
1211
1212
1213
1214
1215
1216
1217
1218 const __u8 *hash_location = NULL;
1219 struct tcp_md5sig_key *hash_expected;
1220 const struct iphdr *iph = ip_hdr(skb);
1221 const struct tcphdr *th = tcp_hdr(skb);
1222 int genhash;
1223 unsigned char newhash[16];
1224
1225 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1226 AF_INET);
1227 hash_location = tcp_parse_md5sig_option(th);
1228
1229
1230 if (!hash_expected && !hash_location)
1231 return false;
1232
1233 if (hash_expected && !hash_location) {
1234 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1235 return true;
1236 }
1237
1238 if (!hash_expected && hash_location) {
1239 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1240 return true;
1241 }
1242
1243
1244
1245
1246 genhash = tcp_v4_md5_hash_skb(newhash,
1247 hash_expected,
1248 NULL, skb);
1249
1250 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1251 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
1252 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1253 &iph->saddr, ntohs(th->source),
1254 &iph->daddr, ntohs(th->dest),
1255 genhash ? " tcp_v4_calc_md5_hash failed"
1256 : "");
1257 return true;
1258 }
1259 return false;
1260#endif
1261 return false;
1262}
1263
1264static void tcp_v4_init_req(struct request_sock *req,
1265 const struct sock *sk_listener,
1266 struct sk_buff *skb)
1267{
1268 struct inet_request_sock *ireq = inet_rsk(req);
1269
1270 sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
1271 sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
1272 ireq->opt = tcp_v4_save_options(skb);
1273}
1274
1275static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
1276 struct flowi *fl,
1277 const struct request_sock *req)
1278{
1279 return inet_csk_route_req(sk, &fl->u.ip4, req);
1280}
1281
1282struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1283 .family = PF_INET,
1284 .obj_size = sizeof(struct tcp_request_sock),
1285 .rtx_syn_ack = tcp_rtx_synack,
1286 .send_ack = tcp_v4_reqsk_send_ack,
1287 .destructor = tcp_v4_reqsk_destructor,
1288 .send_reset = tcp_v4_send_reset,
1289 .syn_ack_timeout = tcp_syn_ack_timeout,
1290};
1291
1292static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1293 .mss_clamp = TCP_MSS_DEFAULT,
1294#ifdef CONFIG_TCP_MD5SIG
1295 .req_md5_lookup = tcp_v4_md5_lookup,
1296 .calc_md5_hash = tcp_v4_md5_hash_skb,
1297#endif
1298 .init_req = tcp_v4_init_req,
1299#ifdef CONFIG_SYN_COOKIES
1300 .cookie_init_seq = cookie_v4_init_sequence,
1301#endif
1302 .route_req = tcp_v4_route_req,
1303 .init_seq = tcp_v4_init_seq,
1304 .init_ts_off = tcp_v4_init_ts_off,
1305 .send_synack = tcp_v4_send_synack,
1306};
1307
1308int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1309{
1310
1311 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1312 goto drop;
1313
1314 return tcp_conn_request(&tcp_request_sock_ops,
1315 &tcp_request_sock_ipv4_ops, sk, skb);
1316
1317drop:
1318 tcp_listendrop(sk);
1319 return 0;
1320}
1321EXPORT_SYMBOL(tcp_v4_conn_request);
1322
1323
1324
1325
1326
1327
1328struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1329 struct request_sock *req,
1330 struct dst_entry *dst,
1331 struct request_sock *req_unhash,
1332 bool *own_req)
1333{
1334 struct inet_request_sock *ireq;
1335 struct inet_sock *newinet;
1336 struct tcp_sock *newtp;
1337 struct sock *newsk;
1338#ifdef CONFIG_TCP_MD5SIG
1339 struct tcp_md5sig_key *key;
1340#endif
1341 struct ip_options_rcu *inet_opt;
1342
1343 if (sk_acceptq_is_full(sk))
1344 goto exit_overflow;
1345
1346 newsk = tcp_create_openreq_child(sk, req, skb);
1347 if (!newsk)
1348 goto exit_nonewsk;
1349
1350 newsk->sk_gso_type = SKB_GSO_TCPV4;
1351 inet_sk_rx_dst_set(newsk, skb);
1352
1353 newtp = tcp_sk(newsk);
1354 newinet = inet_sk(newsk);
1355 ireq = inet_rsk(req);
1356 sk_daddr_set(newsk, ireq->ir_rmt_addr);
1357 sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
1358 newsk->sk_bound_dev_if = ireq->ir_iif;
1359 newinet->inet_saddr = ireq->ir_loc_addr;
1360 inet_opt = ireq->opt;
1361 rcu_assign_pointer(newinet->inet_opt, inet_opt);
1362 ireq->opt = NULL;
1363 newinet->mc_index = inet_iif(skb);
1364 newinet->mc_ttl = ip_hdr(skb)->ttl;
1365 newinet->rcv_tos = ip_hdr(skb)->tos;
1366 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1367 if (inet_opt)
1368 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1369 newinet->inet_id = newtp->write_seq ^ jiffies;
1370
1371 if (!dst) {
1372 dst = inet_csk_route_child_sock(sk, newsk, req);
1373 if (!dst)
1374 goto put_and_exit;
1375 } else {
1376
1377 }
1378 sk_setup_caps(newsk, dst);
1379
1380 tcp_ca_openreq_child(newsk, dst);
1381
1382 tcp_sync_mss(newsk, dst_mtu(dst));
1383 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1384
1385 tcp_initialize_rcv_mss(newsk);
1386
1387#ifdef CONFIG_TCP_MD5SIG
1388
1389 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1390 AF_INET);
1391 if (key) {
1392
1393
1394
1395
1396
1397
1398 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1399 AF_INET, 32, key->key, key->keylen, GFP_ATOMIC);
1400 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1401 }
1402#endif
1403
1404 if (__inet_inherit_port(sk, newsk) < 0)
1405 goto put_and_exit;
1406 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1407 if (*own_req)
1408 tcp_move_syn(newtp, req);
1409
1410 return newsk;
1411
1412exit_overflow:
1413 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1414exit_nonewsk:
1415 dst_release(dst);
1416exit:
1417 tcp_listendrop(sk);
1418 return NULL;
1419put_and_exit:
1420 inet_csk_prepare_forced_close(newsk);
1421 tcp_done(newsk);
1422 goto exit;
1423}
1424EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1425
1426static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
1427{
1428#ifdef CONFIG_SYN_COOKIES
1429 const struct tcphdr *th = tcp_hdr(skb);
1430
1431 if (!th->syn)
1432 sk = cookie_v4_check(sk, skb);
1433#endif
1434 return sk;
1435}
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1446{
1447 struct sock *rsk;
1448
1449 if (sk->sk_state == TCP_ESTABLISHED) {
1450 struct dst_entry *dst = sk->sk_rx_dst;
1451
1452 sock_rps_save_rxhash(sk, skb);
1453 sk_mark_napi_id(sk, skb);
1454 if (dst) {
1455 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1456 !dst->ops->check(dst, 0)) {
1457 dst_release(dst);
1458 sk->sk_rx_dst = NULL;
1459 }
1460 }
1461 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1462 return 0;
1463 }
1464
1465 if (tcp_checksum_complete(skb))
1466 goto csum_err;
1467
1468 if (sk->sk_state == TCP_LISTEN) {
1469 struct sock *nsk = tcp_v4_cookie_check(sk, skb);
1470
1471 if (!nsk)
1472 goto discard;
1473 if (nsk != sk) {
1474 if (tcp_child_process(sk, nsk, skb)) {
1475 rsk = nsk;
1476 goto reset;
1477 }
1478 return 0;
1479 }
1480 } else
1481 sock_rps_save_rxhash(sk, skb);
1482
1483 if (tcp_rcv_state_process(sk, skb)) {
1484 rsk = sk;
1485 goto reset;
1486 }
1487 return 0;
1488
1489reset:
1490 tcp_v4_send_reset(rsk, skb);
1491discard:
1492 kfree_skb(skb);
1493
1494
1495
1496
1497
1498 return 0;
1499
1500csum_err:
1501 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1502 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1503 goto discard;
1504}
1505EXPORT_SYMBOL(tcp_v4_do_rcv);
1506
1507void tcp_v4_early_demux(struct sk_buff *skb)
1508{
1509 const struct iphdr *iph;
1510 const struct tcphdr *th;
1511 struct sock *sk;
1512
1513 if (skb->pkt_type != PACKET_HOST)
1514 return;
1515
1516 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1517 return;
1518
1519 iph = ip_hdr(skb);
1520 th = tcp_hdr(skb);
1521
1522 if (th->doff < sizeof(struct tcphdr) / 4)
1523 return;
1524
1525 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1526 iph->saddr, th->source,
1527 iph->daddr, ntohs(th->dest),
1528 skb->skb_iif);
1529 if (sk) {
1530 skb->sk = sk;
1531 skb->destructor = sock_edemux;
1532 if (sk_fullsock(sk)) {
1533 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1534
1535 if (dst)
1536 dst = dst_check(dst, 0);
1537 if (dst &&
1538 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1539 skb_dst_set_noref(skb, dst);
1540 }
1541 }
1542}
1543
1544
1545
1546
1547
1548
1549
1550
1551bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1552{
1553 struct tcp_sock *tp = tcp_sk(sk);
1554
1555 if (sysctl_tcp_low_latency || !tp->ucopy.task)
1556 return false;
1557
1558 if (skb->len <= tcp_hdrlen(skb) &&
1559 skb_queue_len(&tp->ucopy.prequeue) == 0)
1560 return false;
1561
1562
1563
1564
1565
1566
1567
1568 if (likely(sk->sk_rx_dst))
1569 skb_dst_drop(skb);
1570 else
1571 skb_dst_force_safe(skb);
1572
1573 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1574 tp->ucopy.memory += skb->truesize;
1575 if (skb_queue_len(&tp->ucopy.prequeue) >= 32 ||
1576 tp->ucopy.memory + atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
1577 struct sk_buff *skb1;
1578
1579 BUG_ON(sock_owned_by_user(sk));
1580 __NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED,
1581 skb_queue_len(&tp->ucopy.prequeue));
1582
1583 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
1584 sk_backlog_rcv(sk, skb1);
1585
1586 tp->ucopy.memory = 0;
1587 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1588 wake_up_interruptible_sync_poll(sk_sleep(sk),
1589 POLLIN | POLLRDNORM | POLLRDBAND);
1590 if (!inet_csk_ack_scheduled(sk))
1591 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1592 (3 * tcp_rto_min(sk)) / 4,
1593 TCP_RTO_MAX);
1594 }
1595 return true;
1596}
1597EXPORT_SYMBOL(tcp_prequeue);
1598
1599bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
1600{
1601 u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
1602
1603
1604
1605
1606
1607 limit += 64*1024;
1608
1609
1610
1611
1612
1613
1614
1615 skb_condense(skb);
1616
1617 if (unlikely(sk_add_backlog(sk, skb, limit))) {
1618 bh_unlock_sock(sk);
1619 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
1620 return true;
1621 }
1622 return false;
1623}
1624EXPORT_SYMBOL(tcp_add_backlog);
1625
1626int tcp_filter(struct sock *sk, struct sk_buff *skb)
1627{
1628 struct tcphdr *th = (struct tcphdr *)skb->data;
1629 unsigned int eaten = skb->len;
1630 int err;
1631
1632 err = sk_filter_trim_cap(sk, skb, th->doff * 4);
1633 if (!err) {
1634 eaten -= skb->len;
1635 TCP_SKB_CB(skb)->end_seq -= eaten;
1636 }
1637 return err;
1638}
1639EXPORT_SYMBOL(tcp_filter);
1640
1641
1642
1643
1644
1645int tcp_v4_rcv(struct sk_buff *skb)
1646{
1647 struct net *net = dev_net(skb->dev);
1648 const struct iphdr *iph;
1649 const struct tcphdr *th;
1650 bool refcounted;
1651 struct sock *sk;
1652 int ret;
1653
1654 if (skb->pkt_type != PACKET_HOST)
1655 goto discard_it;
1656
1657
1658 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1659
1660 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1661 goto discard_it;
1662
1663 th = (const struct tcphdr *)skb->data;
1664
1665 if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
1666 goto bad_packet;
1667 if (!pskb_may_pull(skb, th->doff * 4))
1668 goto discard_it;
1669
1670
1671
1672
1673
1674
1675 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
1676 goto csum_error;
1677
1678 th = (const struct tcphdr *)skb->data;
1679 iph = ip_hdr(skb);
1680
1681
1682
1683 memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1684 sizeof(struct inet_skb_parm));
1685 barrier();
1686
1687 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1688 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1689 skb->len - th->doff * 4);
1690 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1691 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1692 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1693 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1694 TCP_SKB_CB(skb)->sacked = 0;
1695
1696lookup:
1697 sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
1698 th->dest, &refcounted);
1699 if (!sk)
1700 goto no_tcp_socket;
1701
1702process:
1703 if (sk->sk_state == TCP_TIME_WAIT)
1704 goto do_time_wait;
1705
1706 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1707 struct request_sock *req = inet_reqsk(sk);
1708 struct sock *nsk;
1709
1710 sk = req->rsk_listener;
1711 if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
1712 sk_drops_add(sk, skb);
1713 reqsk_put(req);
1714 goto discard_it;
1715 }
1716 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1717 inet_csk_reqsk_queue_drop_and_put(sk, req);
1718 goto lookup;
1719 }
1720
1721
1722
1723 sock_hold(sk);
1724 refcounted = true;
1725 if (tcp_filter(sk, skb))
1726 goto discard_and_relse;
1727 nsk = tcp_check_req(sk, skb, req, false);
1728 if (!nsk) {
1729 reqsk_put(req);
1730 goto discard_and_relse;
1731 }
1732 if (nsk == sk) {
1733 reqsk_put(req);
1734 } else if (tcp_child_process(sk, nsk, skb)) {
1735 tcp_v4_send_reset(nsk, skb);
1736 goto discard_and_relse;
1737 } else {
1738 sock_put(sk);
1739 return 0;
1740 }
1741 }
1742 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1743 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1744 goto discard_and_relse;
1745 }
1746
1747 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1748 goto discard_and_relse;
1749
1750 if (tcp_v4_inbound_md5_hash(sk, skb))
1751 goto discard_and_relse;
1752
1753 nf_reset(skb);
1754
1755 if (tcp_filter(sk, skb))
1756 goto discard_and_relse;
1757 th = (const struct tcphdr *)skb->data;
1758 iph = ip_hdr(skb);
1759
1760 skb->dev = NULL;
1761
1762 if (sk->sk_state == TCP_LISTEN) {
1763 ret = tcp_v4_do_rcv(sk, skb);
1764 goto put_and_return;
1765 }
1766
1767 sk_incoming_cpu_update(sk);
1768
1769 bh_lock_sock_nested(sk);
1770 tcp_segs_in(tcp_sk(sk), skb);
1771 ret = 0;
1772 if (!sock_owned_by_user(sk)) {
1773 if (!tcp_prequeue(sk, skb))
1774 ret = tcp_v4_do_rcv(sk, skb);
1775 } else if (tcp_add_backlog(sk, skb)) {
1776 goto discard_and_relse;
1777 }
1778 bh_unlock_sock(sk);
1779
1780put_and_return:
1781 if (refcounted)
1782 sock_put(sk);
1783
1784 return ret;
1785
1786no_tcp_socket:
1787 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1788 goto discard_it;
1789
1790 if (tcp_checksum_complete(skb)) {
1791csum_error:
1792 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1793bad_packet:
1794 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1795 } else {
1796 tcp_v4_send_reset(NULL, skb);
1797 }
1798
1799discard_it:
1800
1801 kfree_skb(skb);
1802 return 0;
1803
1804discard_and_relse:
1805 sk_drops_add(sk, skb);
1806 if (refcounted)
1807 sock_put(sk);
1808 goto discard_it;
1809
1810do_time_wait:
1811 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1812 inet_twsk_put(inet_twsk(sk));
1813 goto discard_it;
1814 }
1815
1816 if (tcp_checksum_complete(skb)) {
1817 inet_twsk_put(inet_twsk(sk));
1818 goto csum_error;
1819 }
1820 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1821 case TCP_TW_SYN: {
1822 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1823 &tcp_hashinfo, skb,
1824 __tcp_hdrlen(th),
1825 iph->saddr, th->source,
1826 iph->daddr, th->dest,
1827 inet_iif(skb));
1828 if (sk2) {
1829 inet_twsk_deschedule_put(inet_twsk(sk));
1830 sk = sk2;
1831 refcounted = false;
1832 goto process;
1833 }
1834
1835 }
1836 case TCP_TW_ACK:
1837 tcp_v4_timewait_ack(sk, skb);
1838 break;
1839 case TCP_TW_RST:
1840 tcp_v4_send_reset(sk, skb);
1841 inet_twsk_deschedule_put(inet_twsk(sk));
1842 goto discard_it;
1843 case TCP_TW_SUCCESS:;
1844 }
1845 goto discard_it;
1846}
1847
1848static struct timewait_sock_ops tcp_timewait_sock_ops = {
1849 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1850 .twsk_unique = tcp_twsk_unique,
1851 .twsk_destructor= tcp_twsk_destructor,
1852};
1853
1854void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
1855{
1856 struct dst_entry *dst = skb_dst(skb);
1857
1858 if (dst && dst_hold_safe(dst)) {
1859 sk->sk_rx_dst = dst;
1860 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1861 }
1862}
1863EXPORT_SYMBOL(inet_sk_rx_dst_set);
1864
1865const struct inet_connection_sock_af_ops ipv4_specific = {
1866 .queue_xmit = ip_queue_xmit,
1867 .send_check = tcp_v4_send_check,
1868 .rebuild_header = inet_sk_rebuild_header,
1869 .sk_rx_dst_set = inet_sk_rx_dst_set,
1870 .conn_request = tcp_v4_conn_request,
1871 .syn_recv_sock = tcp_v4_syn_recv_sock,
1872 .net_header_len = sizeof(struct iphdr),
1873 .setsockopt = ip_setsockopt,
1874 .getsockopt = ip_getsockopt,
1875 .addr2sockaddr = inet_csk_addr2sockaddr,
1876 .sockaddr_len = sizeof(struct sockaddr_in),
1877#ifdef CONFIG_COMPAT
1878 .compat_setsockopt = compat_ip_setsockopt,
1879 .compat_getsockopt = compat_ip_getsockopt,
1880#endif
1881 .mtu_reduced = tcp_v4_mtu_reduced,
1882};
1883EXPORT_SYMBOL(ipv4_specific);
1884
1885#ifdef CONFIG_TCP_MD5SIG
1886static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1887 .md5_lookup = tcp_v4_md5_lookup,
1888 .calc_md5_hash = tcp_v4_md5_hash_skb,
1889 .md5_parse = tcp_v4_parse_md5_keys,
1890};
1891#endif
1892
1893
1894
1895
1896static int tcp_v4_init_sock(struct sock *sk)
1897{
1898 struct inet_connection_sock *icsk = inet_csk(sk);
1899
1900 tcp_init_sock(sk);
1901
1902 icsk->icsk_af_ops = &ipv4_specific;
1903
1904#ifdef CONFIG_TCP_MD5SIG
1905 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1906#endif
1907
1908 return 0;
1909}
1910
1911void tcp_v4_destroy_sock(struct sock *sk)
1912{
1913 struct tcp_sock *tp = tcp_sk(sk);
1914
1915 tcp_clear_xmit_timers(sk);
1916
1917 tcp_cleanup_congestion_control(sk);
1918
1919 tcp_cleanup_ulp(sk);
1920
1921
1922 tcp_write_queue_purge(sk);
1923
1924
1925 tcp_fastopen_active_disable_ofo_check(sk);
1926
1927
1928 skb_rbtree_purge(&tp->out_of_order_queue);
1929
1930#ifdef CONFIG_TCP_MD5SIG
1931
1932 if (tp->md5sig_info) {
1933 tcp_clear_md5_list(sk);
1934 kfree_rcu(tp->md5sig_info, rcu);
1935 tp->md5sig_info = NULL;
1936 }
1937#endif
1938
1939
1940 __skb_queue_purge(&tp->ucopy.prequeue);
1941
1942
1943 if (inet_csk(sk)->icsk_bind_hash)
1944 inet_put_port(sk);
1945
1946 BUG_ON(tp->fastopen_rsk);
1947
1948
1949 tcp_free_fastopen_req(tp);
1950 tcp_saved_syn_free(tp);
1951
1952 sk_sockets_allocated_dec(sk);
1953}
1954EXPORT_SYMBOL(tcp_v4_destroy_sock);
1955
1956#ifdef CONFIG_PROC_FS
1957
1958
1959
1960
1961
1962
1963
1964static void *listening_get_next(struct seq_file *seq, void *cur)
1965{
1966 struct tcp_iter_state *st = seq->private;
1967 struct net *net = seq_file_net(seq);
1968 struct inet_listen_hashbucket *ilb;
1969 struct sock *sk = cur;
1970
1971 if (!sk) {
1972get_head:
1973 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1974 spin_lock(&ilb->lock);
1975 sk = sk_head(&ilb->head);
1976 st->offset = 0;
1977 goto get_sk;
1978 }
1979 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1980 ++st->num;
1981 ++st->offset;
1982
1983 sk = sk_next(sk);
1984get_sk:
1985 sk_for_each_from(sk) {
1986 if (!net_eq(sock_net(sk), net))
1987 continue;
1988 if (sk->sk_family == st->family)
1989 return sk;
1990 }
1991 spin_unlock(&ilb->lock);
1992 st->offset = 0;
1993 if (++st->bucket < INET_LHTABLE_SIZE)
1994 goto get_head;
1995 return NULL;
1996}
1997
1998static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1999{
2000 struct tcp_iter_state *st = seq->private;
2001 void *rc;
2002
2003 st->bucket = 0;
2004 st->offset = 0;
2005 rc = listening_get_next(seq, NULL);
2006
2007 while (rc && *pos) {
2008 rc = listening_get_next(seq, rc);
2009 --*pos;
2010 }
2011 return rc;
2012}
2013
2014static inline bool empty_bucket(const struct tcp_iter_state *st)
2015{
2016 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
2017}
2018
2019
2020
2021
2022
2023static void *established_get_first(struct seq_file *seq)
2024{
2025 struct tcp_iter_state *st = seq->private;
2026 struct net *net = seq_file_net(seq);
2027 void *rc = NULL;
2028
2029 st->offset = 0;
2030 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2031 struct sock *sk;
2032 struct hlist_nulls_node *node;
2033 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2034
2035
2036 if (empty_bucket(st))
2037 continue;
2038
2039 spin_lock_bh(lock);
2040 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2041 if (sk->sk_family != st->family ||
2042 !net_eq(sock_net(sk), net)) {
2043 continue;
2044 }
2045 rc = sk;
2046 goto out;
2047 }
2048 spin_unlock_bh(lock);
2049 }
2050out:
2051 return rc;
2052}
2053
2054static void *established_get_next(struct seq_file *seq, void *cur)
2055{
2056 struct sock *sk = cur;
2057 struct hlist_nulls_node *node;
2058 struct tcp_iter_state *st = seq->private;
2059 struct net *net = seq_file_net(seq);
2060
2061 ++st->num;
2062 ++st->offset;
2063
2064 sk = sk_nulls_next(sk);
2065
2066 sk_nulls_for_each_from(sk, node) {
2067 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2068 return sk;
2069 }
2070
2071 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2072 ++st->bucket;
2073 return established_get_first(seq);
2074}
2075
2076static void *established_get_idx(struct seq_file *seq, loff_t pos)
2077{
2078 struct tcp_iter_state *st = seq->private;
2079 void *rc;
2080
2081 st->bucket = 0;
2082 rc = established_get_first(seq);
2083
2084 while (rc && pos) {
2085 rc = established_get_next(seq, rc);
2086 --pos;
2087 }
2088 return rc;
2089}
2090
2091static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2092{
2093 void *rc;
2094 struct tcp_iter_state *st = seq->private;
2095
2096 st->state = TCP_SEQ_STATE_LISTENING;
2097 rc = listening_get_idx(seq, &pos);
2098
2099 if (!rc) {
2100 st->state = TCP_SEQ_STATE_ESTABLISHED;
2101 rc = established_get_idx(seq, pos);
2102 }
2103
2104 return rc;
2105}
2106
2107static void *tcp_seek_last_pos(struct seq_file *seq)
2108{
2109 struct tcp_iter_state *st = seq->private;
2110 int offset = st->offset;
2111 int orig_num = st->num;
2112 void *rc = NULL;
2113
2114 switch (st->state) {
2115 case TCP_SEQ_STATE_LISTENING:
2116 if (st->bucket >= INET_LHTABLE_SIZE)
2117 break;
2118 st->state = TCP_SEQ_STATE_LISTENING;
2119 rc = listening_get_next(seq, NULL);
2120 while (offset-- && rc)
2121 rc = listening_get_next(seq, rc);
2122 if (rc)
2123 break;
2124 st->bucket = 0;
2125 st->state = TCP_SEQ_STATE_ESTABLISHED;
2126
2127 case TCP_SEQ_STATE_ESTABLISHED:
2128 if (st->bucket > tcp_hashinfo.ehash_mask)
2129 break;
2130 rc = established_get_first(seq);
2131 while (offset-- && rc)
2132 rc = established_get_next(seq, rc);
2133 }
2134
2135 st->num = orig_num;
2136
2137 return rc;
2138}
2139
2140static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2141{
2142 struct tcp_iter_state *st = seq->private;
2143 void *rc;
2144
2145 if (*pos && *pos == st->last_pos) {
2146 rc = tcp_seek_last_pos(seq);
2147 if (rc)
2148 goto out;
2149 }
2150
2151 st->state = TCP_SEQ_STATE_LISTENING;
2152 st->num = 0;
2153 st->bucket = 0;
2154 st->offset = 0;
2155 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2156
2157out:
2158 st->last_pos = *pos;
2159 return rc;
2160}
2161
2162static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2163{
2164 struct tcp_iter_state *st = seq->private;
2165 void *rc = NULL;
2166
2167 if (v == SEQ_START_TOKEN) {
2168 rc = tcp_get_idx(seq, 0);
2169 goto out;
2170 }
2171
2172 switch (st->state) {
2173 case TCP_SEQ_STATE_LISTENING:
2174 rc = listening_get_next(seq, v);
2175 if (!rc) {
2176 st->state = TCP_SEQ_STATE_ESTABLISHED;
2177 st->bucket = 0;
2178 st->offset = 0;
2179 rc = established_get_first(seq);
2180 }
2181 break;
2182 case TCP_SEQ_STATE_ESTABLISHED:
2183 rc = established_get_next(seq, v);
2184 break;
2185 }
2186out:
2187 ++*pos;
2188 st->last_pos = *pos;
2189 return rc;
2190}
2191
2192static void tcp_seq_stop(struct seq_file *seq, void *v)
2193{
2194 struct tcp_iter_state *st = seq->private;
2195
2196 switch (st->state) {
2197 case TCP_SEQ_STATE_LISTENING:
2198 if (v != SEQ_START_TOKEN)
2199 spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
2200 break;
2201 case TCP_SEQ_STATE_ESTABLISHED:
2202 if (v)
2203 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2204 break;
2205 }
2206}
2207
2208int tcp_seq_open(struct inode *inode, struct file *file)
2209{
2210 struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
2211 struct tcp_iter_state *s;
2212 int err;
2213
2214 err = seq_open_net(inode, file, &afinfo->seq_ops,
2215 sizeof(struct tcp_iter_state));
2216 if (err < 0)
2217 return err;
2218
2219 s = ((struct seq_file *)file->private_data)->private;
2220 s->family = afinfo->family;
2221 s->last_pos = 0;
2222 return 0;
2223}
2224EXPORT_SYMBOL(tcp_seq_open);
2225
2226int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2227{
2228 int rc = 0;
2229 struct proc_dir_entry *p;
2230
2231 afinfo->seq_ops.start = tcp_seq_start;
2232 afinfo->seq_ops.next = tcp_seq_next;
2233 afinfo->seq_ops.stop = tcp_seq_stop;
2234
2235 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2236 afinfo->seq_fops, afinfo);
2237 if (!p)
2238 rc = -ENOMEM;
2239 return rc;
2240}
2241EXPORT_SYMBOL(tcp_proc_register);
2242
2243void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2244{
2245 remove_proc_entry(afinfo->name, net->proc_net);
2246}
2247EXPORT_SYMBOL(tcp_proc_unregister);
2248
2249static void get_openreq4(const struct request_sock *req,
2250 struct seq_file *f, int i)
2251{
2252 const struct inet_request_sock *ireq = inet_rsk(req);
2253 long delta = req->rsk_timer.expires - jiffies;
2254
2255 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2256 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
2257 i,
2258 ireq->ir_loc_addr,
2259 ireq->ir_num,
2260 ireq->ir_rmt_addr,
2261 ntohs(ireq->ir_rmt_port),
2262 TCP_SYN_RECV,
2263 0, 0,
2264 1,
2265 jiffies_delta_to_clock_t(delta),
2266 req->num_timeout,
2267 from_kuid_munged(seq_user_ns(f),
2268 sock_i_uid(req->rsk_listener)),
2269 0,
2270 0,
2271 0,
2272 req);
2273}
2274
2275static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2276{
2277 int timer_active;
2278 unsigned long timer_expires;
2279 const struct tcp_sock *tp = tcp_sk(sk);
2280 const struct inet_connection_sock *icsk = inet_csk(sk);
2281 const struct inet_sock *inet = inet_sk(sk);
2282 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2283 __be32 dest = inet->inet_daddr;
2284 __be32 src = inet->inet_rcv_saddr;
2285 __u16 destp = ntohs(inet->inet_dport);
2286 __u16 srcp = ntohs(inet->inet_sport);
2287 int rx_queue;
2288 int state;
2289
2290 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2291 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2292 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2293 timer_active = 1;
2294 timer_expires = icsk->icsk_timeout;
2295 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2296 timer_active = 4;
2297 timer_expires = icsk->icsk_timeout;
2298 } else if (timer_pending(&sk->sk_timer)) {
2299 timer_active = 2;
2300 timer_expires = sk->sk_timer.expires;
2301 } else {
2302 timer_active = 0;
2303 timer_expires = jiffies;
2304 }
2305
2306 state = sk_state_load(sk);
2307 if (state == TCP_LISTEN)
2308 rx_queue = sk->sk_ack_backlog;
2309 else
2310
2311
2312
2313 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2314
2315 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2316 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
2317 i, src, srcp, dest, destp, state,
2318 tp->write_seq - tp->snd_una,
2319 rx_queue,
2320 timer_active,
2321 jiffies_delta_to_clock_t(timer_expires - jiffies),
2322 icsk->icsk_retransmits,
2323 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
2324 icsk->icsk_probes_out,
2325 sock_i_ino(sk),
2326 refcount_read(&sk->sk_refcnt), sk,
2327 jiffies_to_clock_t(icsk->icsk_rto),
2328 jiffies_to_clock_t(icsk->icsk_ack.ato),
2329 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2330 tp->snd_cwnd,
2331 state == TCP_LISTEN ?
2332 fastopenq->max_qlen :
2333 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
2334}
2335
2336static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2337 struct seq_file *f, int i)
2338{
2339 long delta = tw->tw_timer.expires - jiffies;
2340 __be32 dest, src;
2341 __u16 destp, srcp;
2342
2343 dest = tw->tw_daddr;
2344 src = tw->tw_rcv_saddr;
2345 destp = ntohs(tw->tw_dport);
2346 srcp = ntohs(tw->tw_sport);
2347
2348 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2349 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
2350 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2351 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2352 refcount_read(&tw->tw_refcnt), tw);
2353}
2354
2355#define TMPSZ 150
2356
2357static int tcp4_seq_show(struct seq_file *seq, void *v)
2358{
2359 struct tcp_iter_state *st;
2360 struct sock *sk = v;
2361
2362 seq_setwidth(seq, TMPSZ - 1);
2363 if (v == SEQ_START_TOKEN) {
2364 seq_puts(seq, " sl local_address rem_address st tx_queue "
2365 "rx_queue tr tm->when retrnsmt uid timeout "
2366 "inode");
2367 goto out;
2368 }
2369 st = seq->private;
2370
2371 if (sk->sk_state == TCP_TIME_WAIT)
2372 get_timewait4_sock(v, seq, st->num);
2373 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2374 get_openreq4(v, seq, st->num);
2375 else
2376 get_tcp4_sock(v, seq, st->num);
2377out:
2378 seq_pad(seq, '\n');
2379 return 0;
2380}
2381
2382static const struct file_operations tcp_afinfo_seq_fops = {
2383 .owner = THIS_MODULE,
2384 .open = tcp_seq_open,
2385 .read = seq_read,
2386 .llseek = seq_lseek,
2387 .release = seq_release_net
2388};
2389
2390static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2391 .name = "tcp",
2392 .family = AF_INET,
2393 .seq_fops = &tcp_afinfo_seq_fops,
2394 .seq_ops = {
2395 .show = tcp4_seq_show,
2396 },
2397};
2398
2399static int __net_init tcp4_proc_init_net(struct net *net)
2400{
2401 return tcp_proc_register(net, &tcp4_seq_afinfo);
2402}
2403
2404static void __net_exit tcp4_proc_exit_net(struct net *net)
2405{
2406 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2407}
2408
2409static struct pernet_operations tcp4_net_ops = {
2410 .init = tcp4_proc_init_net,
2411 .exit = tcp4_proc_exit_net,
2412};
2413
2414int __init tcp4_proc_init(void)
2415{
2416 return register_pernet_subsys(&tcp4_net_ops);
2417}
2418
2419void tcp4_proc_exit(void)
2420{
2421 unregister_pernet_subsys(&tcp4_net_ops);
2422}
2423#endif
2424
2425struct proto tcp_prot = {
2426 .name = "TCP",
2427 .owner = THIS_MODULE,
2428 .close = tcp_close,
2429 .connect = tcp_v4_connect,
2430 .disconnect = tcp_disconnect,
2431 .accept = inet_csk_accept,
2432 .ioctl = tcp_ioctl,
2433 .init = tcp_v4_init_sock,
2434 .destroy = tcp_v4_destroy_sock,
2435 .shutdown = tcp_shutdown,
2436 .setsockopt = tcp_setsockopt,
2437 .getsockopt = tcp_getsockopt,
2438 .keepalive = tcp_set_keepalive,
2439 .recvmsg = tcp_recvmsg,
2440 .sendmsg = tcp_sendmsg,
2441 .sendpage = tcp_sendpage,
2442 .backlog_rcv = tcp_v4_do_rcv,
2443 .release_cb = tcp_release_cb,
2444 .hash = inet_hash,
2445 .unhash = inet_unhash,
2446 .get_port = inet_csk_get_port,
2447 .enter_memory_pressure = tcp_enter_memory_pressure,
2448 .leave_memory_pressure = tcp_leave_memory_pressure,
2449 .stream_memory_free = tcp_stream_memory_free,
2450 .sockets_allocated = &tcp_sockets_allocated,
2451 .orphan_count = &tcp_orphan_count,
2452 .memory_allocated = &tcp_memory_allocated,
2453 .memory_pressure = &tcp_memory_pressure,
2454 .sysctl_mem = sysctl_tcp_mem,
2455 .sysctl_wmem = sysctl_tcp_wmem,
2456 .sysctl_rmem = sysctl_tcp_rmem,
2457 .max_header = MAX_TCP_HEADER,
2458 .obj_size = sizeof(struct tcp_sock),
2459 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2460 .twsk_prot = &tcp_timewait_sock_ops,
2461 .rsk_prot = &tcp_request_sock_ops,
2462 .h.hashinfo = &tcp_hashinfo,
2463 .no_autobind = true,
2464#ifdef CONFIG_COMPAT
2465 .compat_setsockopt = compat_tcp_setsockopt,
2466 .compat_getsockopt = compat_tcp_getsockopt,
2467#endif
2468 .diag_destroy = tcp_abort,
2469};
2470EXPORT_SYMBOL(tcp_prot);
2471
2472static void __net_exit tcp_sk_exit(struct net *net)
2473{
2474 int cpu;
2475
2476 for_each_possible_cpu(cpu)
2477 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2478 free_percpu(net->ipv4.tcp_sk);
2479}
2480
2481static int __net_init tcp_sk_init(struct net *net)
2482{
2483 int res, cpu, cnt;
2484
2485 net->ipv4.tcp_sk = alloc_percpu(struct sock *);
2486 if (!net->ipv4.tcp_sk)
2487 return -ENOMEM;
2488
2489 for_each_possible_cpu(cpu) {
2490 struct sock *sk;
2491
2492 res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
2493 IPPROTO_TCP, net);
2494 if (res)
2495 goto fail;
2496 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2497 *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2498 }
2499
2500 net->ipv4.sysctl_tcp_ecn = 2;
2501 net->ipv4.sysctl_tcp_ecn_fallback = 1;
2502
2503 net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
2504 net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
2505 net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
2506
2507 net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
2508 net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
2509 net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
2510
2511 net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
2512 net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
2513 net->ipv4.sysctl_tcp_syncookies = 1;
2514 net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
2515 net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
2516 net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
2517 net->ipv4.sysctl_tcp_orphan_retries = 0;
2518 net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
2519 net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
2520 net->ipv4.sysctl_tcp_tw_reuse = 0;
2521
2522 cnt = tcp_hashinfo.ehash_mask + 1;
2523 net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2;
2524 net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
2525
2526 net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256);
2527 net->ipv4.sysctl_tcp_sack = 1;
2528 net->ipv4.sysctl_tcp_window_scaling = 1;
2529 net->ipv4.sysctl_tcp_timestamps = 1;
2530
2531 return 0;
2532fail:
2533 tcp_sk_exit(net);
2534
2535 return res;
2536}
2537
2538static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2539{
2540 inet_twsk_purge(&tcp_hashinfo, AF_INET);
2541}
2542
2543static struct pernet_operations __net_initdata tcp_sk_ops = {
2544 .init = tcp_sk_init,
2545 .exit = tcp_sk_exit,
2546 .exit_batch = tcp_sk_exit_batch,
2547};
2548
2549void __init tcp_v4_init(void)
2550{
2551 if (register_pernet_subsys(&tcp_sk_ops))
2552 panic("Failed to create the TCP control socket.\n");
2553}
2554