1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53#define pr_fmt(fmt) "TCP: " fmt
54
55#include <linux/bottom_half.h>
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
64#include <linux/slab.h>
65
66#include <net/net_namespace.h>
67#include <net/icmp.h>
68#include <net/inet_hashtables.h>
69#include <net/tcp.h>
70#include <net/transp_v6.h>
71#include <net/ipv6.h>
72#include <net/inet_common.h>
73#include <net/timewait_sock.h>
74#include <net/xfrm.h>
75#include <net/secure_seq.h>
76#include <net/busy_poll.h>
77
78#include <linux/inet.h>
79#include <linux/ipv6.h>
80#include <linux/stddef.h>
81#include <linux/proc_fs.h>
82#include <linux/seq_file.h>
83
84#include <crypto/hash.h>
85#include <linux/scatterlist.h>
86
87int sysctl_tcp_low_latency __read_mostly;
88
89#ifdef CONFIG_TCP_MD5SIG
90static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
91 __be32 daddr, __be32 saddr, const struct tcphdr *th);
92#endif
93
94struct inet_hashinfo tcp_hashinfo;
95EXPORT_SYMBOL(tcp_hashinfo);
96
97static u32 tcp_v4_init_sequence(const struct sk_buff *skb, u32 *tsoff)
98{
99 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
100 ip_hdr(skb)->saddr,
101 tcp_hdr(skb)->dest,
102 tcp_hdr(skb)->source, tsoff);
103}
104
105int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
106{
107 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
108 struct tcp_sock *tp = tcp_sk(sk);
109
110
111
112
113
114
115
116
117
118
119
120
121 if (tcptw->tw_ts_recent_stamp &&
122 (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse &&
123 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
124 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
125 if (tp->write_seq == 0)
126 tp->write_seq = 1;
127 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
128 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
129 sock_hold(sktw);
130 return 1;
131 }
132
133 return 0;
134}
135EXPORT_SYMBOL_GPL(tcp_twsk_unique);
136
137
138int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
139{
140 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
141 struct inet_sock *inet = inet_sk(sk);
142 struct tcp_sock *tp = tcp_sk(sk);
143 __be16 orig_sport, orig_dport;
144 __be32 daddr, nexthop;
145 struct flowi4 *fl4;
146 struct rtable *rt;
147 int err;
148 struct ip_options_rcu *inet_opt;
149
150 if (addr_len < sizeof(struct sockaddr_in))
151 return -EINVAL;
152
153 if (usin->sin_family != AF_INET)
154 return -EAFNOSUPPORT;
155
156 nexthop = daddr = usin->sin_addr.s_addr;
157 inet_opt = rcu_dereference_protected(inet->inet_opt,
158 lockdep_sock_is_held(sk));
159 if (inet_opt && inet_opt->opt.srr) {
160 if (!daddr)
161 return -EINVAL;
162 nexthop = inet_opt->opt.faddr;
163 }
164
165 orig_sport = inet->inet_sport;
166 orig_dport = usin->sin_port;
167 fl4 = &inet->cork.fl.u.ip4;
168 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
169 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
170 IPPROTO_TCP,
171 orig_sport, orig_dport, sk);
172 if (IS_ERR(rt)) {
173 err = PTR_ERR(rt);
174 if (err == -ENETUNREACH)
175 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
176 return err;
177 }
178
179 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
180 ip_rt_put(rt);
181 return -ENETUNREACH;
182 }
183
184 if (!inet_opt || !inet_opt->opt.srr)
185 daddr = fl4->daddr;
186
187 if (!inet->inet_saddr)
188 inet->inet_saddr = fl4->saddr;
189 sk_rcv_saddr_set(sk, inet->inet_saddr);
190
191 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
192
193 tp->rx_opt.ts_recent = 0;
194 tp->rx_opt.ts_recent_stamp = 0;
195 if (likely(!tp->repair))
196 tp->write_seq = 0;
197 }
198
199 if (tcp_death_row.sysctl_tw_recycle &&
200 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
201 tcp_fetch_timewait_stamp(sk, &rt->dst);
202
203 inet->inet_dport = usin->sin_port;
204 sk_daddr_set(sk, daddr);
205
206 inet_csk(sk)->icsk_ext_hdr_len = 0;
207 if (inet_opt)
208 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
209
210 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
211
212
213
214
215
216
217 tcp_set_state(sk, TCP_SYN_SENT);
218 err = inet_hash_connect(&tcp_death_row, sk);
219 if (err)
220 goto failure;
221
222 sk_set_txhash(sk);
223
224 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
225 inet->inet_sport, inet->inet_dport, sk);
226 if (IS_ERR(rt)) {
227 err = PTR_ERR(rt);
228 rt = NULL;
229 goto failure;
230 }
231
232 sk->sk_gso_type = SKB_GSO_TCPV4;
233 sk_setup_caps(sk, &rt->dst);
234
235 if (!tp->write_seq && likely(!tp->repair))
236 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
237 inet->inet_daddr,
238 inet->inet_sport,
239 usin->sin_port,
240 &tp->tsoffset);
241
242 inet->inet_id = tp->write_seq ^ jiffies;
243
244 err = tcp_connect(sk);
245
246 rt = NULL;
247 if (err)
248 goto failure;
249
250 return 0;
251
252failure:
253
254
255
256
257 tcp_set_state(sk, TCP_CLOSE);
258 ip_rt_put(rt);
259 sk->sk_route_caps = 0;
260 inet->inet_dport = 0;
261 return err;
262}
263EXPORT_SYMBOL(tcp_v4_connect);
264
265
266
267
268
269
270void tcp_v4_mtu_reduced(struct sock *sk)
271{
272 struct dst_entry *dst;
273 struct inet_sock *inet = inet_sk(sk);
274 u32 mtu = tcp_sk(sk)->mtu_info;
275
276 dst = inet_csk_update_pmtu(sk, mtu);
277 if (!dst)
278 return;
279
280
281
282
283 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
284 sk->sk_err_soft = EMSGSIZE;
285
286 mtu = dst_mtu(dst);
287
288 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
289 ip_sk_accept_pmtu(sk) &&
290 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
291 tcp_sync_mss(sk, mtu);
292
293
294
295
296
297
298 tcp_simple_retransmit(sk);
299 }
300}
301EXPORT_SYMBOL(tcp_v4_mtu_reduced);
302
303static void do_redirect(struct sk_buff *skb, struct sock *sk)
304{
305 struct dst_entry *dst = __sk_dst_check(sk, 0);
306
307 if (dst)
308 dst->ops->redirect(dst, sk, skb);
309}
310
311
312
313void tcp_req_err(struct sock *sk, u32 seq, bool abort)
314{
315 struct request_sock *req = inet_reqsk(sk);
316 struct net *net = sock_net(sk);
317
318
319
320
321 if (seq != tcp_rsk(req)->snt_isn) {
322 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
323 } else if (abort) {
324
325
326
327
328
329
330 inet_csk_reqsk_queue_drop(req->rsk_listener, req);
331 tcp_listendrop(req->rsk_listener);
332 }
333 reqsk_put(req);
334}
335EXPORT_SYMBOL(tcp_req_err);
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
354{
355 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
356 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
357 struct inet_connection_sock *icsk;
358 struct tcp_sock *tp;
359 struct inet_sock *inet;
360 const int type = icmp_hdr(icmp_skb)->type;
361 const int code = icmp_hdr(icmp_skb)->code;
362 struct sock *sk;
363 struct sk_buff *skb;
364 struct request_sock *fastopen;
365 __u32 seq, snd_una;
366 __u32 remaining;
367 int err;
368 struct net *net = dev_net(icmp_skb->dev);
369
370 sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
371 th->dest, iph->saddr, ntohs(th->source),
372 inet_iif(icmp_skb));
373 if (!sk) {
374 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
375 return;
376 }
377 if (sk->sk_state == TCP_TIME_WAIT) {
378 inet_twsk_put(inet_twsk(sk));
379 return;
380 }
381 seq = ntohl(th->seq);
382 if (sk->sk_state == TCP_NEW_SYN_RECV)
383 return tcp_req_err(sk, seq,
384 type == ICMP_PARAMETERPROB ||
385 type == ICMP_TIME_EXCEEDED ||
386 (type == ICMP_DEST_UNREACH &&
387 (code == ICMP_NET_UNREACH ||
388 code == ICMP_HOST_UNREACH)));
389
390 bh_lock_sock(sk);
391
392
393
394
395
396 if (sock_owned_by_user(sk)) {
397 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
398 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
399 }
400 if (sk->sk_state == TCP_CLOSE)
401 goto out;
402
403 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
404 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
405 goto out;
406 }
407
408 icsk = inet_csk(sk);
409 tp = tcp_sk(sk);
410
411 fastopen = tp->fastopen_rsk;
412 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
413 if (sk->sk_state != TCP_LISTEN &&
414 !between(seq, snd_una, tp->snd_nxt)) {
415 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
416 goto out;
417 }
418
419 switch (type) {
420 case ICMP_REDIRECT:
421 do_redirect(icmp_skb, sk);
422 goto out;
423 case ICMP_SOURCE_QUENCH:
424
425 goto out;
426 case ICMP_PARAMETERPROB:
427 err = EPROTO;
428 break;
429 case ICMP_DEST_UNREACH:
430 if (code > NR_ICMP_UNREACH)
431 goto out;
432
433 if (code == ICMP_FRAG_NEEDED) {
434
435
436
437
438 if (sk->sk_state == TCP_LISTEN)
439 goto out;
440
441 tp->mtu_info = info;
442 if (!sock_owned_by_user(sk)) {
443 tcp_v4_mtu_reduced(sk);
444 } else {
445 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
446 sock_hold(sk);
447 }
448 goto out;
449 }
450
451 err = icmp_err_convert[code].errno;
452
453
454 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
455 break;
456 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
457 !icsk->icsk_backoff || fastopen)
458 break;
459
460 if (sock_owned_by_user(sk))
461 break;
462
463 icsk->icsk_backoff--;
464 icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
465 TCP_TIMEOUT_INIT;
466 icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
467
468 skb = tcp_write_queue_head(sk);
469 BUG_ON(!skb);
470
471 remaining = icsk->icsk_rto -
472 min(icsk->icsk_rto,
473 tcp_time_stamp - tcp_skb_timestamp(skb));
474
475 if (remaining) {
476 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
477 remaining, TCP_RTO_MAX);
478 } else {
479
480
481 tcp_retransmit_timer(sk);
482 }
483
484 break;
485 case ICMP_TIME_EXCEEDED:
486 err = EHOSTUNREACH;
487 break;
488 default:
489 goto out;
490 }
491
492 switch (sk->sk_state) {
493 case TCP_SYN_SENT:
494 case TCP_SYN_RECV:
495
496
497
498 if (fastopen && !fastopen->sk)
499 break;
500
501 if (!sock_owned_by_user(sk)) {
502 sk->sk_err = err;
503
504 sk->sk_error_report(sk);
505
506 tcp_done(sk);
507 } else {
508 sk->sk_err_soft = err;
509 }
510 goto out;
511 }
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529 inet = inet_sk(sk);
530 if (!sock_owned_by_user(sk) && inet->recverr) {
531 sk->sk_err = err;
532 sk->sk_error_report(sk);
533 } else {
534 sk->sk_err_soft = err;
535 }
536
537out:
538 bh_unlock_sock(sk);
539 sock_put(sk);
540}
541
542void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
543{
544 struct tcphdr *th = tcp_hdr(skb);
545
546 if (skb->ip_summed == CHECKSUM_PARTIAL) {
547 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
548 skb->csum_start = skb_transport_header(skb) - skb->head;
549 skb->csum_offset = offsetof(struct tcphdr, check);
550 } else {
551 th->check = tcp_v4_check(skb->len, saddr, daddr,
552 csum_partial(th,
553 th->doff << 2,
554 skb->csum));
555 }
556}
557
558
559void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
560{
561 const struct inet_sock *inet = inet_sk(sk);
562
563 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
564}
565EXPORT_SYMBOL(tcp_v4_send_check);
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
581{
582 const struct tcphdr *th = tcp_hdr(skb);
583 struct {
584 struct tcphdr th;
585#ifdef CONFIG_TCP_MD5SIG
586 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
587#endif
588 } rep;
589 struct ip_reply_arg arg;
590#ifdef CONFIG_TCP_MD5SIG
591 struct tcp_md5sig_key *key = NULL;
592 const __u8 *hash_location = NULL;
593 unsigned char newhash[16];
594 int genhash;
595 struct sock *sk1 = NULL;
596#endif
597 struct net *net;
598
599
600 if (th->rst)
601 return;
602
603
604
605
606 if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
607 return;
608
609
610 memset(&rep, 0, sizeof(rep));
611 rep.th.dest = th->source;
612 rep.th.source = th->dest;
613 rep.th.doff = sizeof(struct tcphdr) / 4;
614 rep.th.rst = 1;
615
616 if (th->ack) {
617 rep.th.seq = th->ack_seq;
618 } else {
619 rep.th.ack = 1;
620 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
621 skb->len - (th->doff << 2));
622 }
623
624 memset(&arg, 0, sizeof(arg));
625 arg.iov[0].iov_base = (unsigned char *)&rep;
626 arg.iov[0].iov_len = sizeof(rep.th);
627
628 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
629#ifdef CONFIG_TCP_MD5SIG
630 rcu_read_lock();
631 hash_location = tcp_parse_md5sig_option(th);
632 if (sk && sk_fullsock(sk)) {
633 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
634 &ip_hdr(skb)->saddr, AF_INET);
635 } else if (hash_location) {
636
637
638
639
640
641
642
643 sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
644 ip_hdr(skb)->saddr,
645 th->source, ip_hdr(skb)->daddr,
646 ntohs(th->source), inet_iif(skb));
647
648 if (!sk1)
649 goto out;
650
651 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
652 &ip_hdr(skb)->saddr, AF_INET);
653 if (!key)
654 goto out;
655
656
657 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
658 if (genhash || memcmp(hash_location, newhash, 16) != 0)
659 goto out;
660
661 }
662
663 if (key) {
664 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
665 (TCPOPT_NOP << 16) |
666 (TCPOPT_MD5SIG << 8) |
667 TCPOLEN_MD5SIG);
668
669 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
670 rep.th.doff = arg.iov[0].iov_len / 4;
671
672 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
673 key, ip_hdr(skb)->saddr,
674 ip_hdr(skb)->daddr, &rep.th);
675 }
676#endif
677 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
678 ip_hdr(skb)->saddr,
679 arg.iov[0].iov_len, IPPROTO_TCP, 0);
680 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
681 arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
682
683
684
685
686
687 if (sk)
688 arg.bound_dev_if = sk->sk_bound_dev_if;
689
690 BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
691 offsetof(struct inet_timewait_sock, tw_bound_dev_if));
692
693 arg.tos = ip_hdr(skb)->tos;
694 arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
695 local_bh_disable();
696 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
697 skb, &TCP_SKB_CB(skb)->header.h4.opt,
698 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
699 &arg, arg.iov[0].iov_len);
700
701 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
702 __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
703 local_bh_enable();
704
705#ifdef CONFIG_TCP_MD5SIG
706out:
707 rcu_read_unlock();
708#endif
709}
710
711
712
713
714
715static void tcp_v4_send_ack(const struct sock *sk,
716 struct sk_buff *skb, u32 seq, u32 ack,
717 u32 win, u32 tsval, u32 tsecr, int oif,
718 struct tcp_md5sig_key *key,
719 int reply_flags, u8 tos)
720{
721 const struct tcphdr *th = tcp_hdr(skb);
722 struct {
723 struct tcphdr th;
724 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
725#ifdef CONFIG_TCP_MD5SIG
726 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
727#endif
728 ];
729 } rep;
730 struct net *net = sock_net(sk);
731 struct ip_reply_arg arg;
732
733 memset(&rep.th, 0, sizeof(struct tcphdr));
734 memset(&arg, 0, sizeof(arg));
735
736 arg.iov[0].iov_base = (unsigned char *)&rep;
737 arg.iov[0].iov_len = sizeof(rep.th);
738 if (tsecr) {
739 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
740 (TCPOPT_TIMESTAMP << 8) |
741 TCPOLEN_TIMESTAMP);
742 rep.opt[1] = htonl(tsval);
743 rep.opt[2] = htonl(tsecr);
744 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
745 }
746
747
748 rep.th.dest = th->source;
749 rep.th.source = th->dest;
750 rep.th.doff = arg.iov[0].iov_len / 4;
751 rep.th.seq = htonl(seq);
752 rep.th.ack_seq = htonl(ack);
753 rep.th.ack = 1;
754 rep.th.window = htons(win);
755
756#ifdef CONFIG_TCP_MD5SIG
757 if (key) {
758 int offset = (tsecr) ? 3 : 0;
759
760 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
761 (TCPOPT_NOP << 16) |
762 (TCPOPT_MD5SIG << 8) |
763 TCPOLEN_MD5SIG);
764 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
765 rep.th.doff = arg.iov[0].iov_len/4;
766
767 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
768 key, ip_hdr(skb)->saddr,
769 ip_hdr(skb)->daddr, &rep.th);
770 }
771#endif
772 arg.flags = reply_flags;
773 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
774 ip_hdr(skb)->saddr,
775 arg.iov[0].iov_len, IPPROTO_TCP, 0);
776 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
777 if (oif)
778 arg.bound_dev_if = oif;
779 arg.tos = tos;
780 arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
781 local_bh_disable();
782 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
783 skb, &TCP_SKB_CB(skb)->header.h4.opt,
784 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
785 &arg, arg.iov[0].iov_len);
786
787 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
788 local_bh_enable();
789}
790
791static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
792{
793 struct inet_timewait_sock *tw = inet_twsk(sk);
794 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
795
796 tcp_v4_send_ack(sk, skb,
797 tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
798 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
799 tcp_time_stamp + tcptw->tw_ts_offset,
800 tcptw->tw_ts_recent,
801 tw->tw_bound_dev_if,
802 tcp_twsk_md5_key(tcptw),
803 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
804 tw->tw_tos
805 );
806
807 inet_twsk_put(tw);
808}
809
810static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
811 struct request_sock *req)
812{
813
814
815
816 u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
817 tcp_sk(sk)->snd_nxt;
818
819
820
821
822
823
824 tcp_v4_send_ack(sk, skb, seq,
825 tcp_rsk(req)->rcv_nxt,
826 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
827 tcp_time_stamp + tcp_rsk(req)->ts_off,
828 req->ts_recent,
829 0,
830 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
831 AF_INET),
832 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
833 ip_hdr(skb)->tos);
834}
835
836
837
838
839
840
841static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
842 struct flowi *fl,
843 struct request_sock *req,
844 struct tcp_fastopen_cookie *foc,
845 enum tcp_synack_type synack_type)
846{
847 const struct inet_request_sock *ireq = inet_rsk(req);
848 struct flowi4 fl4;
849 int err = -1;
850 struct sk_buff *skb;
851
852
853 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
854 return -1;
855
856 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
857
858 if (skb) {
859 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
860
861 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
862 ireq->ir_rmt_addr,
863 ireq->opt);
864 err = net_xmit_eval(err);
865 }
866
867 return err;
868}
869
870
871
872
873static void tcp_v4_reqsk_destructor(struct request_sock *req)
874{
875 kfree(inet_rsk(req)->opt);
876}
877
878#ifdef CONFIG_TCP_MD5SIG
879
880
881
882
883
884
885
886struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
887 const union tcp_md5_addr *addr,
888 int family)
889{
890 const struct tcp_sock *tp = tcp_sk(sk);
891 struct tcp_md5sig_key *key;
892 unsigned int size = sizeof(struct in_addr);
893 const struct tcp_md5sig_info *md5sig;
894
895
896 md5sig = rcu_dereference_check(tp->md5sig_info,
897 lockdep_sock_is_held(sk));
898 if (!md5sig)
899 return NULL;
900#if IS_ENABLED(CONFIG_IPV6)
901 if (family == AF_INET6)
902 size = sizeof(struct in6_addr);
903#endif
904 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
905 if (key->family != family)
906 continue;
907 if (!memcmp(&key->addr, addr, size))
908 return key;
909 }
910 return NULL;
911}
912EXPORT_SYMBOL(tcp_md5_do_lookup);
913
914struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
915 const struct sock *addr_sk)
916{
917 const union tcp_md5_addr *addr;
918
919 addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
920 return tcp_md5_do_lookup(sk, addr, AF_INET);
921}
922EXPORT_SYMBOL(tcp_v4_md5_lookup);
923
924
925int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
926 int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
927{
928
929 struct tcp_md5sig_key *key;
930 struct tcp_sock *tp = tcp_sk(sk);
931 struct tcp_md5sig_info *md5sig;
932
933 key = tcp_md5_do_lookup(sk, addr, family);
934 if (key) {
935
936 memcpy(key->key, newkey, newkeylen);
937 key->keylen = newkeylen;
938 return 0;
939 }
940
941 md5sig = rcu_dereference_protected(tp->md5sig_info,
942 lockdep_sock_is_held(sk));
943 if (!md5sig) {
944 md5sig = kmalloc(sizeof(*md5sig), gfp);
945 if (!md5sig)
946 return -ENOMEM;
947
948 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
949 INIT_HLIST_HEAD(&md5sig->head);
950 rcu_assign_pointer(tp->md5sig_info, md5sig);
951 }
952
953 key = sock_kmalloc(sk, sizeof(*key), gfp);
954 if (!key)
955 return -ENOMEM;
956 if (!tcp_alloc_md5sig_pool()) {
957 sock_kfree_s(sk, key, sizeof(*key));
958 return -ENOMEM;
959 }
960
961 memcpy(key->key, newkey, newkeylen);
962 key->keylen = newkeylen;
963 key->family = family;
964 memcpy(&key->addr, addr,
965 (family == AF_INET6) ? sizeof(struct in6_addr) :
966 sizeof(struct in_addr));
967 hlist_add_head_rcu(&key->node, &md5sig->head);
968 return 0;
969}
970EXPORT_SYMBOL(tcp_md5_do_add);
971
972int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
973{
974 struct tcp_md5sig_key *key;
975
976 key = tcp_md5_do_lookup(sk, addr, family);
977 if (!key)
978 return -ENOENT;
979 hlist_del_rcu(&key->node);
980 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
981 kfree_rcu(key, rcu);
982 return 0;
983}
984EXPORT_SYMBOL(tcp_md5_do_del);
985
986static void tcp_clear_md5_list(struct sock *sk)
987{
988 struct tcp_sock *tp = tcp_sk(sk);
989 struct tcp_md5sig_key *key;
990 struct hlist_node *n;
991 struct tcp_md5sig_info *md5sig;
992
993 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
994
995 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
996 hlist_del_rcu(&key->node);
997 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
998 kfree_rcu(key, rcu);
999 }
1000}
1001
1002static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1003 int optlen)
1004{
1005 struct tcp_md5sig cmd;
1006 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1007
1008 if (optlen < sizeof(cmd))
1009 return -EINVAL;
1010
1011 if (copy_from_user(&cmd, optval, sizeof(cmd)))
1012 return -EFAULT;
1013
1014 if (sin->sin_family != AF_INET)
1015 return -EINVAL;
1016
1017 if (!cmd.tcpm_keylen)
1018 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1019 AF_INET);
1020
1021 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1022 return -EINVAL;
1023
1024 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1025 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1026 GFP_KERNEL);
1027}
1028
1029static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
1030 __be32 daddr, __be32 saddr,
1031 const struct tcphdr *th, int nbytes)
1032{
1033 struct tcp4_pseudohdr *bp;
1034 struct scatterlist sg;
1035 struct tcphdr *_th;
1036
1037 bp = hp->scratch;
1038 bp->saddr = saddr;
1039 bp->daddr = daddr;
1040 bp->pad = 0;
1041 bp->protocol = IPPROTO_TCP;
1042 bp->len = cpu_to_be16(nbytes);
1043
1044 _th = (struct tcphdr *)(bp + 1);
1045 memcpy(_th, th, sizeof(*th));
1046 _th->check = 0;
1047
1048 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
1049 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
1050 sizeof(*bp) + sizeof(*th));
1051 return crypto_ahash_update(hp->md5_req);
1052}
1053
1054static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1055 __be32 daddr, __be32 saddr, const struct tcphdr *th)
1056{
1057 struct tcp_md5sig_pool *hp;
1058 struct ahash_request *req;
1059
1060 hp = tcp_get_md5sig_pool();
1061 if (!hp)
1062 goto clear_hash_noput;
1063 req = hp->md5_req;
1064
1065 if (crypto_ahash_init(req))
1066 goto clear_hash;
1067 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
1068 goto clear_hash;
1069 if (tcp_md5_hash_key(hp, key))
1070 goto clear_hash;
1071 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1072 if (crypto_ahash_final(req))
1073 goto clear_hash;
1074
1075 tcp_put_md5sig_pool();
1076 return 0;
1077
1078clear_hash:
1079 tcp_put_md5sig_pool();
1080clear_hash_noput:
1081 memset(md5_hash, 0, 16);
1082 return 1;
1083}
1084
1085int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1086 const struct sock *sk,
1087 const struct sk_buff *skb)
1088{
1089 struct tcp_md5sig_pool *hp;
1090 struct ahash_request *req;
1091 const struct tcphdr *th = tcp_hdr(skb);
1092 __be32 saddr, daddr;
1093
1094 if (sk) {
1095 saddr = sk->sk_rcv_saddr;
1096 daddr = sk->sk_daddr;
1097 } else {
1098 const struct iphdr *iph = ip_hdr(skb);
1099 saddr = iph->saddr;
1100 daddr = iph->daddr;
1101 }
1102
1103 hp = tcp_get_md5sig_pool();
1104 if (!hp)
1105 goto clear_hash_noput;
1106 req = hp->md5_req;
1107
1108 if (crypto_ahash_init(req))
1109 goto clear_hash;
1110
1111 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
1112 goto clear_hash;
1113 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1114 goto clear_hash;
1115 if (tcp_md5_hash_key(hp, key))
1116 goto clear_hash;
1117 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1118 if (crypto_ahash_final(req))
1119 goto clear_hash;
1120
1121 tcp_put_md5sig_pool();
1122 return 0;
1123
1124clear_hash:
1125 tcp_put_md5sig_pool();
1126clear_hash_noput:
1127 memset(md5_hash, 0, 16);
1128 return 1;
1129}
1130EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1131
1132#endif
1133
1134
1135static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
1136 const struct sk_buff *skb)
1137{
1138#ifdef CONFIG_TCP_MD5SIG
1139
1140
1141
1142
1143
1144
1145
1146
1147 const __u8 *hash_location = NULL;
1148 struct tcp_md5sig_key *hash_expected;
1149 const struct iphdr *iph = ip_hdr(skb);
1150 const struct tcphdr *th = tcp_hdr(skb);
1151 int genhash;
1152 unsigned char newhash[16];
1153
1154 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1155 AF_INET);
1156 hash_location = tcp_parse_md5sig_option(th);
1157
1158
1159 if (!hash_expected && !hash_location)
1160 return false;
1161
1162 if (hash_expected && !hash_location) {
1163 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1164 return true;
1165 }
1166
1167 if (!hash_expected && hash_location) {
1168 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1169 return true;
1170 }
1171
1172
1173
1174
1175 genhash = tcp_v4_md5_hash_skb(newhash,
1176 hash_expected,
1177 NULL, skb);
1178
1179 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1180 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
1181 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1182 &iph->saddr, ntohs(th->source),
1183 &iph->daddr, ntohs(th->dest),
1184 genhash ? " tcp_v4_calc_md5_hash failed"
1185 : "");
1186 return true;
1187 }
1188 return false;
1189#endif
1190 return false;
1191}
1192
1193static void tcp_v4_init_req(struct request_sock *req,
1194 const struct sock *sk_listener,
1195 struct sk_buff *skb)
1196{
1197 struct inet_request_sock *ireq = inet_rsk(req);
1198
1199 sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
1200 sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
1201 ireq->opt = tcp_v4_save_options(skb);
1202}
1203
1204static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
1205 struct flowi *fl,
1206 const struct request_sock *req,
1207 bool *strict)
1208{
1209 struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
1210
1211 if (strict) {
1212 if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
1213 *strict = true;
1214 else
1215 *strict = false;
1216 }
1217
1218 return dst;
1219}
1220
1221struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1222 .family = PF_INET,
1223 .obj_size = sizeof(struct tcp_request_sock),
1224 .rtx_syn_ack = tcp_rtx_synack,
1225 .send_ack = tcp_v4_reqsk_send_ack,
1226 .destructor = tcp_v4_reqsk_destructor,
1227 .send_reset = tcp_v4_send_reset,
1228 .syn_ack_timeout = tcp_syn_ack_timeout,
1229};
1230
1231static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1232 .mss_clamp = TCP_MSS_DEFAULT,
1233#ifdef CONFIG_TCP_MD5SIG
1234 .req_md5_lookup = tcp_v4_md5_lookup,
1235 .calc_md5_hash = tcp_v4_md5_hash_skb,
1236#endif
1237 .init_req = tcp_v4_init_req,
1238#ifdef CONFIG_SYN_COOKIES
1239 .cookie_init_seq = cookie_v4_init_sequence,
1240#endif
1241 .route_req = tcp_v4_route_req,
1242 .init_seq = tcp_v4_init_sequence,
1243 .send_synack = tcp_v4_send_synack,
1244};
1245
1246int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1247{
1248
1249 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1250 goto drop;
1251
1252 return tcp_conn_request(&tcp_request_sock_ops,
1253 &tcp_request_sock_ipv4_ops, sk, skb);
1254
1255drop:
1256 tcp_listendrop(sk);
1257 return 0;
1258}
1259EXPORT_SYMBOL(tcp_v4_conn_request);
1260
1261
1262
1263
1264
1265
1266struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1267 struct request_sock *req,
1268 struct dst_entry *dst,
1269 struct request_sock *req_unhash,
1270 bool *own_req)
1271{
1272 struct inet_request_sock *ireq;
1273 struct inet_sock *newinet;
1274 struct tcp_sock *newtp;
1275 struct sock *newsk;
1276#ifdef CONFIG_TCP_MD5SIG
1277 struct tcp_md5sig_key *key;
1278#endif
1279 struct ip_options_rcu *inet_opt;
1280
1281 if (sk_acceptq_is_full(sk))
1282 goto exit_overflow;
1283
1284 newsk = tcp_create_openreq_child(sk, req, skb);
1285 if (!newsk)
1286 goto exit_nonewsk;
1287
1288 newsk->sk_gso_type = SKB_GSO_TCPV4;
1289 inet_sk_rx_dst_set(newsk, skb);
1290
1291 newtp = tcp_sk(newsk);
1292 newinet = inet_sk(newsk);
1293 ireq = inet_rsk(req);
1294 sk_daddr_set(newsk, ireq->ir_rmt_addr);
1295 sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
1296 newsk->sk_bound_dev_if = ireq->ir_iif;
1297 newinet->inet_saddr = ireq->ir_loc_addr;
1298 inet_opt = ireq->opt;
1299 rcu_assign_pointer(newinet->inet_opt, inet_opt);
1300 ireq->opt = NULL;
1301 newinet->mc_index = inet_iif(skb);
1302 newinet->mc_ttl = ip_hdr(skb)->ttl;
1303 newinet->rcv_tos = ip_hdr(skb)->tos;
1304 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1305 if (inet_opt)
1306 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1307 newinet->inet_id = newtp->write_seq ^ jiffies;
1308
1309 if (!dst) {
1310 dst = inet_csk_route_child_sock(sk, newsk, req);
1311 if (!dst)
1312 goto put_and_exit;
1313 } else {
1314
1315 }
1316 sk_setup_caps(newsk, dst);
1317
1318 tcp_ca_openreq_child(newsk, dst);
1319
1320 tcp_sync_mss(newsk, dst_mtu(dst));
1321 newtp->advmss = dst_metric_advmss(dst);
1322 if (tcp_sk(sk)->rx_opt.user_mss &&
1323 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1324 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1325
1326 tcp_initialize_rcv_mss(newsk);
1327
1328#ifdef CONFIG_TCP_MD5SIG
1329
1330 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1331 AF_INET);
1332 if (key) {
1333
1334
1335
1336
1337
1338
1339 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1340 AF_INET, key->key, key->keylen, GFP_ATOMIC);
1341 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1342 }
1343#endif
1344
1345 if (__inet_inherit_port(sk, newsk) < 0)
1346 goto put_and_exit;
1347 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1348 if (*own_req)
1349 tcp_move_syn(newtp, req);
1350
1351 return newsk;
1352
1353exit_overflow:
1354 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1355exit_nonewsk:
1356 dst_release(dst);
1357exit:
1358 tcp_listendrop(sk);
1359 return NULL;
1360put_and_exit:
1361 inet_csk_prepare_forced_close(newsk);
1362 tcp_done(newsk);
1363 goto exit;
1364}
1365EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1366
1367static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
1368{
1369#ifdef CONFIG_SYN_COOKIES
1370 const struct tcphdr *th = tcp_hdr(skb);
1371
1372 if (!th->syn)
1373 sk = cookie_v4_check(sk, skb);
1374#endif
1375 return sk;
1376}
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1387{
1388 struct sock *rsk;
1389
1390 if (sk->sk_state == TCP_ESTABLISHED) {
1391 struct dst_entry *dst = sk->sk_rx_dst;
1392
1393 sock_rps_save_rxhash(sk, skb);
1394 sk_mark_napi_id(sk, skb);
1395 if (dst) {
1396 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1397 !dst->ops->check(dst, 0)) {
1398 dst_release(dst);
1399 sk->sk_rx_dst = NULL;
1400 }
1401 }
1402 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1403 return 0;
1404 }
1405
1406 if (tcp_checksum_complete(skb))
1407 goto csum_err;
1408
1409 if (sk->sk_state == TCP_LISTEN) {
1410 struct sock *nsk = tcp_v4_cookie_check(sk, skb);
1411
1412 if (!nsk)
1413 goto discard;
1414 if (nsk != sk) {
1415 sock_rps_save_rxhash(nsk, skb);
1416 sk_mark_napi_id(nsk, skb);
1417 if (tcp_child_process(sk, nsk, skb)) {
1418 rsk = nsk;
1419 goto reset;
1420 }
1421 return 0;
1422 }
1423 } else
1424 sock_rps_save_rxhash(sk, skb);
1425
1426 if (tcp_rcv_state_process(sk, skb)) {
1427 rsk = sk;
1428 goto reset;
1429 }
1430 return 0;
1431
1432reset:
1433 tcp_v4_send_reset(rsk, skb);
1434discard:
1435 kfree_skb(skb);
1436
1437
1438
1439
1440
1441 return 0;
1442
1443csum_err:
1444 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1445 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1446 goto discard;
1447}
1448EXPORT_SYMBOL(tcp_v4_do_rcv);
1449
1450void tcp_v4_early_demux(struct sk_buff *skb)
1451{
1452 const struct iphdr *iph;
1453 const struct tcphdr *th;
1454 struct sock *sk;
1455
1456 if (skb->pkt_type != PACKET_HOST)
1457 return;
1458
1459 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1460 return;
1461
1462 iph = ip_hdr(skb);
1463 th = tcp_hdr(skb);
1464
1465 if (th->doff < sizeof(struct tcphdr) / 4)
1466 return;
1467
1468 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1469 iph->saddr, th->source,
1470 iph->daddr, ntohs(th->dest),
1471 skb->skb_iif);
1472 if (sk) {
1473 skb->sk = sk;
1474 skb->destructor = sock_edemux;
1475 if (sk_fullsock(sk)) {
1476 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1477
1478 if (dst)
1479 dst = dst_check(dst, 0);
1480 if (dst &&
1481 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1482 skb_dst_set_noref(skb, dst);
1483 }
1484 }
1485}
1486
1487
1488
1489
1490
1491
1492
1493
1494bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1495{
1496 struct tcp_sock *tp = tcp_sk(sk);
1497
1498 if (sysctl_tcp_low_latency || !tp->ucopy.task)
1499 return false;
1500
1501 if (skb->len <= tcp_hdrlen(skb) &&
1502 skb_queue_len(&tp->ucopy.prequeue) == 0)
1503 return false;
1504
1505
1506
1507
1508
1509
1510
1511 if (likely(sk->sk_rx_dst))
1512 skb_dst_drop(skb);
1513 else
1514 skb_dst_force_safe(skb);
1515
1516 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1517 tp->ucopy.memory += skb->truesize;
1518 if (skb_queue_len(&tp->ucopy.prequeue) >= 32 ||
1519 tp->ucopy.memory + atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
1520 struct sk_buff *skb1;
1521
1522 BUG_ON(sock_owned_by_user(sk));
1523 __NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED,
1524 skb_queue_len(&tp->ucopy.prequeue));
1525
1526 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
1527 sk_backlog_rcv(sk, skb1);
1528
1529 tp->ucopy.memory = 0;
1530 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1531 wake_up_interruptible_sync_poll(sk_sleep(sk),
1532 POLLIN | POLLRDNORM | POLLRDBAND);
1533 if (!inet_csk_ack_scheduled(sk))
1534 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1535 (3 * tcp_rto_min(sk)) / 4,
1536 TCP_RTO_MAX);
1537 }
1538 return true;
1539}
1540EXPORT_SYMBOL(tcp_prequeue);
1541
1542bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
1543{
1544 u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
1545
1546
1547
1548
1549
1550 limit += 64*1024;
1551
1552
1553
1554
1555
1556
1557
1558 if (!skb->data_len)
1559 skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
1560
1561 if (unlikely(sk_add_backlog(sk, skb, limit))) {
1562 bh_unlock_sock(sk);
1563 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
1564 return true;
1565 }
1566 return false;
1567}
1568EXPORT_SYMBOL(tcp_add_backlog);
1569
1570int tcp_filter(struct sock *sk, struct sk_buff *skb)
1571{
1572 struct tcphdr *th = (struct tcphdr *)skb->data;
1573 unsigned int eaten = skb->len;
1574 int err;
1575
1576 err = sk_filter_trim_cap(sk, skb, th->doff * 4);
1577 if (!err) {
1578 eaten -= skb->len;
1579 TCP_SKB_CB(skb)->end_seq -= eaten;
1580 }
1581 return err;
1582}
1583EXPORT_SYMBOL(tcp_filter);
1584
1585
1586
1587
1588
1589int tcp_v4_rcv(struct sk_buff *skb)
1590{
1591 struct net *net = dev_net(skb->dev);
1592 const struct iphdr *iph;
1593 const struct tcphdr *th;
1594 bool refcounted;
1595 struct sock *sk;
1596 int ret;
1597
1598 if (skb->pkt_type != PACKET_HOST)
1599 goto discard_it;
1600
1601
1602 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1603
1604 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1605 goto discard_it;
1606
1607 th = (const struct tcphdr *)skb->data;
1608
1609 if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
1610 goto bad_packet;
1611 if (!pskb_may_pull(skb, th->doff * 4))
1612 goto discard_it;
1613
1614
1615
1616
1617
1618
1619 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
1620 goto csum_error;
1621
1622 th = (const struct tcphdr *)skb->data;
1623 iph = ip_hdr(skb);
1624
1625
1626
1627 memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1628 sizeof(struct inet_skb_parm));
1629 barrier();
1630
1631 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1632 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1633 skb->len - th->doff * 4);
1634 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1635 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1636 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1637 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1638 TCP_SKB_CB(skb)->sacked = 0;
1639
1640lookup:
1641 sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
1642 th->dest, &refcounted);
1643 if (!sk)
1644 goto no_tcp_socket;
1645
1646process:
1647 if (sk->sk_state == TCP_TIME_WAIT)
1648 goto do_time_wait;
1649
1650 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1651 struct request_sock *req = inet_reqsk(sk);
1652 struct sock *nsk;
1653
1654 sk = req->rsk_listener;
1655 if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
1656 sk_drops_add(sk, skb);
1657 reqsk_put(req);
1658 goto discard_it;
1659 }
1660 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1661 inet_csk_reqsk_queue_drop_and_put(sk, req);
1662 goto lookup;
1663 }
1664
1665
1666
1667 sock_hold(sk);
1668 refcounted = true;
1669 nsk = tcp_check_req(sk, skb, req, false);
1670 if (!nsk) {
1671 reqsk_put(req);
1672 goto discard_and_relse;
1673 }
1674 if (nsk == sk) {
1675 reqsk_put(req);
1676 } else if (tcp_child_process(sk, nsk, skb)) {
1677 tcp_v4_send_reset(nsk, skb);
1678 goto discard_and_relse;
1679 } else {
1680 sock_put(sk);
1681 return 0;
1682 }
1683 }
1684 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1685 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1686 goto discard_and_relse;
1687 }
1688
1689 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1690 goto discard_and_relse;
1691
1692 if (tcp_v4_inbound_md5_hash(sk, skb))
1693 goto discard_and_relse;
1694
1695 nf_reset(skb);
1696
1697 if (tcp_filter(sk, skb))
1698 goto discard_and_relse;
1699 th = (const struct tcphdr *)skb->data;
1700 iph = ip_hdr(skb);
1701
1702 skb->dev = NULL;
1703
1704 if (sk->sk_state == TCP_LISTEN) {
1705 ret = tcp_v4_do_rcv(sk, skb);
1706 goto put_and_return;
1707 }
1708
1709 sk_incoming_cpu_update(sk);
1710
1711 bh_lock_sock_nested(sk);
1712 tcp_segs_in(tcp_sk(sk), skb);
1713 ret = 0;
1714 if (!sock_owned_by_user(sk)) {
1715 if (!tcp_prequeue(sk, skb))
1716 ret = tcp_v4_do_rcv(sk, skb);
1717 } else if (tcp_add_backlog(sk, skb)) {
1718 goto discard_and_relse;
1719 }
1720 bh_unlock_sock(sk);
1721
1722put_and_return:
1723 if (refcounted)
1724 sock_put(sk);
1725
1726 return ret;
1727
1728no_tcp_socket:
1729 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1730 goto discard_it;
1731
1732 if (tcp_checksum_complete(skb)) {
1733csum_error:
1734 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1735bad_packet:
1736 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1737 } else {
1738 tcp_v4_send_reset(NULL, skb);
1739 }
1740
1741discard_it:
1742
1743 kfree_skb(skb);
1744 return 0;
1745
1746discard_and_relse:
1747 sk_drops_add(sk, skb);
1748 if (refcounted)
1749 sock_put(sk);
1750 goto discard_it;
1751
1752do_time_wait:
1753 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1754 inet_twsk_put(inet_twsk(sk));
1755 goto discard_it;
1756 }
1757
1758 if (tcp_checksum_complete(skb)) {
1759 inet_twsk_put(inet_twsk(sk));
1760 goto csum_error;
1761 }
1762 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1763 case TCP_TW_SYN: {
1764 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1765 &tcp_hashinfo, skb,
1766 __tcp_hdrlen(th),
1767 iph->saddr, th->source,
1768 iph->daddr, th->dest,
1769 inet_iif(skb));
1770 if (sk2) {
1771 inet_twsk_deschedule_put(inet_twsk(sk));
1772 sk = sk2;
1773 refcounted = false;
1774 goto process;
1775 }
1776
1777 }
1778 case TCP_TW_ACK:
1779 tcp_v4_timewait_ack(sk, skb);
1780 break;
1781 case TCP_TW_RST:
1782 tcp_v4_send_reset(sk, skb);
1783 inet_twsk_deschedule_put(inet_twsk(sk));
1784 goto discard_it;
1785 case TCP_TW_SUCCESS:;
1786 }
1787 goto discard_it;
1788}
1789
1790static struct timewait_sock_ops tcp_timewait_sock_ops = {
1791 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1792 .twsk_unique = tcp_twsk_unique,
1793 .twsk_destructor= tcp_twsk_destructor,
1794};
1795
1796void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
1797{
1798 struct dst_entry *dst = skb_dst(skb);
1799
1800 if (dst && dst_hold_safe(dst)) {
1801 sk->sk_rx_dst = dst;
1802 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1803 }
1804}
1805EXPORT_SYMBOL(inet_sk_rx_dst_set);
1806
1807const struct inet_connection_sock_af_ops ipv4_specific = {
1808 .queue_xmit = ip_queue_xmit,
1809 .send_check = tcp_v4_send_check,
1810 .rebuild_header = inet_sk_rebuild_header,
1811 .sk_rx_dst_set = inet_sk_rx_dst_set,
1812 .conn_request = tcp_v4_conn_request,
1813 .syn_recv_sock = tcp_v4_syn_recv_sock,
1814 .net_header_len = sizeof(struct iphdr),
1815 .setsockopt = ip_setsockopt,
1816 .getsockopt = ip_getsockopt,
1817 .addr2sockaddr = inet_csk_addr2sockaddr,
1818 .sockaddr_len = sizeof(struct sockaddr_in),
1819 .bind_conflict = inet_csk_bind_conflict,
1820#ifdef CONFIG_COMPAT
1821 .compat_setsockopt = compat_ip_setsockopt,
1822 .compat_getsockopt = compat_ip_getsockopt,
1823#endif
1824 .mtu_reduced = tcp_v4_mtu_reduced,
1825};
1826EXPORT_SYMBOL(ipv4_specific);
1827
1828#ifdef CONFIG_TCP_MD5SIG
1829static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1830 .md5_lookup = tcp_v4_md5_lookup,
1831 .calc_md5_hash = tcp_v4_md5_hash_skb,
1832 .md5_parse = tcp_v4_parse_md5_keys,
1833};
1834#endif
1835
1836
1837
1838
1839static int tcp_v4_init_sock(struct sock *sk)
1840{
1841 struct inet_connection_sock *icsk = inet_csk(sk);
1842
1843 tcp_init_sock(sk);
1844
1845 icsk->icsk_af_ops = &ipv4_specific;
1846
1847#ifdef CONFIG_TCP_MD5SIG
1848 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1849#endif
1850
1851 return 0;
1852}
1853
1854void tcp_v4_destroy_sock(struct sock *sk)
1855{
1856 struct tcp_sock *tp = tcp_sk(sk);
1857
1858 tcp_clear_xmit_timers(sk);
1859
1860 tcp_cleanup_congestion_control(sk);
1861
1862
1863 tcp_write_queue_purge(sk);
1864
1865
1866 skb_rbtree_purge(&tp->out_of_order_queue);
1867
1868#ifdef CONFIG_TCP_MD5SIG
1869
1870 if (tp->md5sig_info) {
1871 tcp_clear_md5_list(sk);
1872 kfree_rcu(tp->md5sig_info, rcu);
1873 tp->md5sig_info = NULL;
1874 }
1875#endif
1876
1877
1878 __skb_queue_purge(&tp->ucopy.prequeue);
1879
1880
1881 if (inet_csk(sk)->icsk_bind_hash)
1882 inet_put_port(sk);
1883
1884 BUG_ON(tp->fastopen_rsk);
1885
1886
1887 tcp_free_fastopen_req(tp);
1888 tcp_saved_syn_free(tp);
1889
1890 local_bh_disable();
1891 sk_sockets_allocated_dec(sk);
1892 local_bh_enable();
1893}
1894EXPORT_SYMBOL(tcp_v4_destroy_sock);
1895
1896#ifdef CONFIG_PROC_FS
1897
1898
1899
1900
1901
1902
1903
1904static void *listening_get_next(struct seq_file *seq, void *cur)
1905{
1906 struct tcp_iter_state *st = seq->private;
1907 struct net *net = seq_file_net(seq);
1908 struct inet_listen_hashbucket *ilb;
1909 struct sock *sk = cur;
1910
1911 if (!sk) {
1912get_head:
1913 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1914 spin_lock(&ilb->lock);
1915 sk = sk_head(&ilb->head);
1916 st->offset = 0;
1917 goto get_sk;
1918 }
1919 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1920 ++st->num;
1921 ++st->offset;
1922
1923 sk = sk_next(sk);
1924get_sk:
1925 sk_for_each_from(sk) {
1926 if (!net_eq(sock_net(sk), net))
1927 continue;
1928 if (sk->sk_family == st->family)
1929 return sk;
1930 }
1931 spin_unlock(&ilb->lock);
1932 st->offset = 0;
1933 if (++st->bucket < INET_LHTABLE_SIZE)
1934 goto get_head;
1935 return NULL;
1936}
1937
1938static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1939{
1940 struct tcp_iter_state *st = seq->private;
1941 void *rc;
1942
1943 st->bucket = 0;
1944 st->offset = 0;
1945 rc = listening_get_next(seq, NULL);
1946
1947 while (rc && *pos) {
1948 rc = listening_get_next(seq, rc);
1949 --*pos;
1950 }
1951 return rc;
1952}
1953
1954static inline bool empty_bucket(const struct tcp_iter_state *st)
1955{
1956 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
1957}
1958
1959
1960
1961
1962
1963static void *established_get_first(struct seq_file *seq)
1964{
1965 struct tcp_iter_state *st = seq->private;
1966 struct net *net = seq_file_net(seq);
1967 void *rc = NULL;
1968
1969 st->offset = 0;
1970 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1971 struct sock *sk;
1972 struct hlist_nulls_node *node;
1973 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1974
1975
1976 if (empty_bucket(st))
1977 continue;
1978
1979 spin_lock_bh(lock);
1980 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
1981 if (sk->sk_family != st->family ||
1982 !net_eq(sock_net(sk), net)) {
1983 continue;
1984 }
1985 rc = sk;
1986 goto out;
1987 }
1988 spin_unlock_bh(lock);
1989 }
1990out:
1991 return rc;
1992}
1993
1994static void *established_get_next(struct seq_file *seq, void *cur)
1995{
1996 struct sock *sk = cur;
1997 struct hlist_nulls_node *node;
1998 struct tcp_iter_state *st = seq->private;
1999 struct net *net = seq_file_net(seq);
2000
2001 ++st->num;
2002 ++st->offset;
2003
2004 sk = sk_nulls_next(sk);
2005
2006 sk_nulls_for_each_from(sk, node) {
2007 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2008 return sk;
2009 }
2010
2011 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2012 ++st->bucket;
2013 return established_get_first(seq);
2014}
2015
2016static void *established_get_idx(struct seq_file *seq, loff_t pos)
2017{
2018 struct tcp_iter_state *st = seq->private;
2019 void *rc;
2020
2021 st->bucket = 0;
2022 rc = established_get_first(seq);
2023
2024 while (rc && pos) {
2025 rc = established_get_next(seq, rc);
2026 --pos;
2027 }
2028 return rc;
2029}
2030
2031static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2032{
2033 void *rc;
2034 struct tcp_iter_state *st = seq->private;
2035
2036 st->state = TCP_SEQ_STATE_LISTENING;
2037 rc = listening_get_idx(seq, &pos);
2038
2039 if (!rc) {
2040 st->state = TCP_SEQ_STATE_ESTABLISHED;
2041 rc = established_get_idx(seq, pos);
2042 }
2043
2044 return rc;
2045}
2046
2047static void *tcp_seek_last_pos(struct seq_file *seq)
2048{
2049 struct tcp_iter_state *st = seq->private;
2050 int offset = st->offset;
2051 int orig_num = st->num;
2052 void *rc = NULL;
2053
2054 switch (st->state) {
2055 case TCP_SEQ_STATE_LISTENING:
2056 if (st->bucket >= INET_LHTABLE_SIZE)
2057 break;
2058 st->state = TCP_SEQ_STATE_LISTENING;
2059 rc = listening_get_next(seq, NULL);
2060 while (offset-- && rc)
2061 rc = listening_get_next(seq, rc);
2062 if (rc)
2063 break;
2064 st->bucket = 0;
2065 st->state = TCP_SEQ_STATE_ESTABLISHED;
2066
2067 case TCP_SEQ_STATE_ESTABLISHED:
2068 if (st->bucket > tcp_hashinfo.ehash_mask)
2069 break;
2070 rc = established_get_first(seq);
2071 while (offset-- && rc)
2072 rc = established_get_next(seq, rc);
2073 }
2074
2075 st->num = orig_num;
2076
2077 return rc;
2078}
2079
2080static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2081{
2082 struct tcp_iter_state *st = seq->private;
2083 void *rc;
2084
2085 if (*pos && *pos == st->last_pos) {
2086 rc = tcp_seek_last_pos(seq);
2087 if (rc)
2088 goto out;
2089 }
2090
2091 st->state = TCP_SEQ_STATE_LISTENING;
2092 st->num = 0;
2093 st->bucket = 0;
2094 st->offset = 0;
2095 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2096
2097out:
2098 st->last_pos = *pos;
2099 return rc;
2100}
2101
2102static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2103{
2104 struct tcp_iter_state *st = seq->private;
2105 void *rc = NULL;
2106
2107 if (v == SEQ_START_TOKEN) {
2108 rc = tcp_get_idx(seq, 0);
2109 goto out;
2110 }
2111
2112 switch (st->state) {
2113 case TCP_SEQ_STATE_LISTENING:
2114 rc = listening_get_next(seq, v);
2115 if (!rc) {
2116 st->state = TCP_SEQ_STATE_ESTABLISHED;
2117 st->bucket = 0;
2118 st->offset = 0;
2119 rc = established_get_first(seq);
2120 }
2121 break;
2122 case TCP_SEQ_STATE_ESTABLISHED:
2123 rc = established_get_next(seq, v);
2124 break;
2125 }
2126out:
2127 ++*pos;
2128 st->last_pos = *pos;
2129 return rc;
2130}
2131
2132static void tcp_seq_stop(struct seq_file *seq, void *v)
2133{
2134 struct tcp_iter_state *st = seq->private;
2135
2136 switch (st->state) {
2137 case TCP_SEQ_STATE_LISTENING:
2138 if (v != SEQ_START_TOKEN)
2139 spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
2140 break;
2141 case TCP_SEQ_STATE_ESTABLISHED:
2142 if (v)
2143 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2144 break;
2145 }
2146}
2147
2148int tcp_seq_open(struct inode *inode, struct file *file)
2149{
2150 struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
2151 struct tcp_iter_state *s;
2152 int err;
2153
2154 err = seq_open_net(inode, file, &afinfo->seq_ops,
2155 sizeof(struct tcp_iter_state));
2156 if (err < 0)
2157 return err;
2158
2159 s = ((struct seq_file *)file->private_data)->private;
2160 s->family = afinfo->family;
2161 s->last_pos = 0;
2162 return 0;
2163}
2164EXPORT_SYMBOL(tcp_seq_open);
2165
2166int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2167{
2168 int rc = 0;
2169 struct proc_dir_entry *p;
2170
2171 afinfo->seq_ops.start = tcp_seq_start;
2172 afinfo->seq_ops.next = tcp_seq_next;
2173 afinfo->seq_ops.stop = tcp_seq_stop;
2174
2175 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2176 afinfo->seq_fops, afinfo);
2177 if (!p)
2178 rc = -ENOMEM;
2179 return rc;
2180}
2181EXPORT_SYMBOL(tcp_proc_register);
2182
2183void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2184{
2185 remove_proc_entry(afinfo->name, net->proc_net);
2186}
2187EXPORT_SYMBOL(tcp_proc_unregister);
2188
2189static void get_openreq4(const struct request_sock *req,
2190 struct seq_file *f, int i)
2191{
2192 const struct inet_request_sock *ireq = inet_rsk(req);
2193 long delta = req->rsk_timer.expires - jiffies;
2194
2195 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2196 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
2197 i,
2198 ireq->ir_loc_addr,
2199 ireq->ir_num,
2200 ireq->ir_rmt_addr,
2201 ntohs(ireq->ir_rmt_port),
2202 TCP_SYN_RECV,
2203 0, 0,
2204 1,
2205 jiffies_delta_to_clock_t(delta),
2206 req->num_timeout,
2207 from_kuid_munged(seq_user_ns(f),
2208 sock_i_uid(req->rsk_listener)),
2209 0,
2210 0,
2211 0,
2212 req);
2213}
2214
2215static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2216{
2217 int timer_active;
2218 unsigned long timer_expires;
2219 const struct tcp_sock *tp = tcp_sk(sk);
2220 const struct inet_connection_sock *icsk = inet_csk(sk);
2221 const struct inet_sock *inet = inet_sk(sk);
2222 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2223 __be32 dest = inet->inet_daddr;
2224 __be32 src = inet->inet_rcv_saddr;
2225 __u16 destp = ntohs(inet->inet_dport);
2226 __u16 srcp = ntohs(inet->inet_sport);
2227 int rx_queue;
2228 int state;
2229
2230 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2231 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2232 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2233 timer_active = 1;
2234 timer_expires = icsk->icsk_timeout;
2235 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2236 timer_active = 4;
2237 timer_expires = icsk->icsk_timeout;
2238 } else if (timer_pending(&sk->sk_timer)) {
2239 timer_active = 2;
2240 timer_expires = sk->sk_timer.expires;
2241 } else {
2242 timer_active = 0;
2243 timer_expires = jiffies;
2244 }
2245
2246 state = sk_state_load(sk);
2247 if (state == TCP_LISTEN)
2248 rx_queue = sk->sk_ack_backlog;
2249 else
2250
2251
2252
2253 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2254
2255 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2256 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
2257 i, src, srcp, dest, destp, state,
2258 tp->write_seq - tp->snd_una,
2259 rx_queue,
2260 timer_active,
2261 jiffies_delta_to_clock_t(timer_expires - jiffies),
2262 icsk->icsk_retransmits,
2263 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
2264 icsk->icsk_probes_out,
2265 sock_i_ino(sk),
2266 atomic_read(&sk->sk_refcnt), sk,
2267 jiffies_to_clock_t(icsk->icsk_rto),
2268 jiffies_to_clock_t(icsk->icsk_ack.ato),
2269 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2270 tp->snd_cwnd,
2271 state == TCP_LISTEN ?
2272 fastopenq->max_qlen :
2273 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
2274}
2275
2276static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2277 struct seq_file *f, int i)
2278{
2279 long delta = tw->tw_timer.expires - jiffies;
2280 __be32 dest, src;
2281 __u16 destp, srcp;
2282
2283 dest = tw->tw_daddr;
2284 src = tw->tw_rcv_saddr;
2285 destp = ntohs(tw->tw_dport);
2286 srcp = ntohs(tw->tw_sport);
2287
2288 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2289 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
2290 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2291 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2292 atomic_read(&tw->tw_refcnt), tw);
2293}
2294
2295#define TMPSZ 150
2296
2297static int tcp4_seq_show(struct seq_file *seq, void *v)
2298{
2299 struct tcp_iter_state *st;
2300 struct sock *sk = v;
2301
2302 seq_setwidth(seq, TMPSZ - 1);
2303 if (v == SEQ_START_TOKEN) {
2304 seq_puts(seq, " sl local_address rem_address st tx_queue "
2305 "rx_queue tr tm->when retrnsmt uid timeout "
2306 "inode");
2307 goto out;
2308 }
2309 st = seq->private;
2310
2311 if (sk->sk_state == TCP_TIME_WAIT)
2312 get_timewait4_sock(v, seq, st->num);
2313 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2314 get_openreq4(v, seq, st->num);
2315 else
2316 get_tcp4_sock(v, seq, st->num);
2317out:
2318 seq_pad(seq, '\n');
2319 return 0;
2320}
2321
2322static const struct file_operations tcp_afinfo_seq_fops = {
2323 .owner = THIS_MODULE,
2324 .open = tcp_seq_open,
2325 .read = seq_read,
2326 .llseek = seq_lseek,
2327 .release = seq_release_net
2328};
2329
2330static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2331 .name = "tcp",
2332 .family = AF_INET,
2333 .seq_fops = &tcp_afinfo_seq_fops,
2334 .seq_ops = {
2335 .show = tcp4_seq_show,
2336 },
2337};
2338
2339static int __net_init tcp4_proc_init_net(struct net *net)
2340{
2341 return tcp_proc_register(net, &tcp4_seq_afinfo);
2342}
2343
2344static void __net_exit tcp4_proc_exit_net(struct net *net)
2345{
2346 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2347}
2348
2349static struct pernet_operations tcp4_net_ops = {
2350 .init = tcp4_proc_init_net,
2351 .exit = tcp4_proc_exit_net,
2352};
2353
2354int __init tcp4_proc_init(void)
2355{
2356 return register_pernet_subsys(&tcp4_net_ops);
2357}
2358
2359void tcp4_proc_exit(void)
2360{
2361 unregister_pernet_subsys(&tcp4_net_ops);
2362}
2363#endif
2364
2365struct proto tcp_prot = {
2366 .name = "TCP",
2367 .owner = THIS_MODULE,
2368 .close = tcp_close,
2369 .connect = tcp_v4_connect,
2370 .disconnect = tcp_disconnect,
2371 .accept = inet_csk_accept,
2372 .ioctl = tcp_ioctl,
2373 .init = tcp_v4_init_sock,
2374 .destroy = tcp_v4_destroy_sock,
2375 .shutdown = tcp_shutdown,
2376 .setsockopt = tcp_setsockopt,
2377 .getsockopt = tcp_getsockopt,
2378 .recvmsg = tcp_recvmsg,
2379 .sendmsg = tcp_sendmsg,
2380 .sendpage = tcp_sendpage,
2381 .backlog_rcv = tcp_v4_do_rcv,
2382 .release_cb = tcp_release_cb,
2383 .hash = inet_hash,
2384 .unhash = inet_unhash,
2385 .get_port = inet_csk_get_port,
2386 .enter_memory_pressure = tcp_enter_memory_pressure,
2387 .stream_memory_free = tcp_stream_memory_free,
2388 .sockets_allocated = &tcp_sockets_allocated,
2389 .orphan_count = &tcp_orphan_count,
2390 .memory_allocated = &tcp_memory_allocated,
2391 .memory_pressure = &tcp_memory_pressure,
2392 .sysctl_mem = sysctl_tcp_mem,
2393 .sysctl_wmem = sysctl_tcp_wmem,
2394 .sysctl_rmem = sysctl_tcp_rmem,
2395 .max_header = MAX_TCP_HEADER,
2396 .obj_size = sizeof(struct tcp_sock),
2397 .slab_flags = SLAB_DESTROY_BY_RCU,
2398 .twsk_prot = &tcp_timewait_sock_ops,
2399 .rsk_prot = &tcp_request_sock_ops,
2400 .h.hashinfo = &tcp_hashinfo,
2401 .no_autobind = true,
2402#ifdef CONFIG_COMPAT
2403 .compat_setsockopt = compat_tcp_setsockopt,
2404 .compat_getsockopt = compat_tcp_getsockopt,
2405#endif
2406 .diag_destroy = tcp_abort,
2407};
2408EXPORT_SYMBOL(tcp_prot);
2409
2410static void __net_exit tcp_sk_exit(struct net *net)
2411{
2412 int cpu;
2413
2414 for_each_possible_cpu(cpu)
2415 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2416 free_percpu(net->ipv4.tcp_sk);
2417}
2418
2419static int __net_init tcp_sk_init(struct net *net)
2420{
2421 int res, cpu;
2422
2423 net->ipv4.tcp_sk = alloc_percpu(struct sock *);
2424 if (!net->ipv4.tcp_sk)
2425 return -ENOMEM;
2426
2427 for_each_possible_cpu(cpu) {
2428 struct sock *sk;
2429
2430 res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
2431 IPPROTO_TCP, net);
2432 if (res)
2433 goto fail;
2434 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2435 *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2436 }
2437
2438 net->ipv4.sysctl_tcp_ecn = 2;
2439 net->ipv4.sysctl_tcp_ecn_fallback = 1;
2440
2441 net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
2442 net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
2443 net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
2444
2445 net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
2446 net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
2447 net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
2448
2449 net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
2450 net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
2451 net->ipv4.sysctl_tcp_syncookies = 1;
2452 net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
2453 net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
2454 net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
2455 net->ipv4.sysctl_tcp_orphan_retries = 0;
2456 net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
2457 net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
2458 net->ipv4.sysctl_tcp_tw_reuse = 0;
2459
2460 return 0;
2461fail:
2462 tcp_sk_exit(net);
2463
2464 return res;
2465}
2466
2467static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2468{
2469 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2470}
2471
2472static struct pernet_operations __net_initdata tcp_sk_ops = {
2473 .init = tcp_sk_init,
2474 .exit = tcp_sk_exit,
2475 .exit_batch = tcp_sk_exit_batch,
2476};
2477
2478void __init tcp_v4_init(void)
2479{
2480 inet_hashinfo_init(&tcp_hashinfo);
2481 if (register_pernet_subsys(&tcp_sk_ops))
2482 panic("Failed to create the TCP control socket.\n");
2483}
2484