1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53#define pr_fmt(fmt) "TCP: " fmt
54
55#include <linux/bottom_half.h>
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
64#include <linux/slab.h>
65
66#include <net/net_namespace.h>
67#include <net/icmp.h>
68#include <net/inet_hashtables.h>
69#include <net/tcp.h>
70#include <net/transp_v6.h>
71#include <net/ipv6.h>
72#include <net/inet_common.h>
73#include <net/timewait_sock.h>
74#include <net/xfrm.h>
75#include <net/secure_seq.h>
76#include <net/busy_poll.h>
77
78#include <linux/inet.h>
79#include <linux/ipv6.h>
80#include <linux/stddef.h>
81#include <linux/proc_fs.h>
82#include <linux/seq_file.h>
83
84#include <crypto/hash.h>
85#include <linux/scatterlist.h>
86
87int sysctl_tcp_tw_reuse __read_mostly;
88int sysctl_tcp_low_latency __read_mostly;
89EXPORT_SYMBOL(sysctl_tcp_low_latency);
90
91#ifdef CONFIG_TCP_MD5SIG
92static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
93 __be32 daddr, __be32 saddr, const struct tcphdr *th);
94#endif
95
96struct inet_hashinfo tcp_hashinfo;
97EXPORT_SYMBOL(tcp_hashinfo);
98
99static __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
100{
101 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
102 ip_hdr(skb)->saddr,
103 tcp_hdr(skb)->dest,
104 tcp_hdr(skb)->source);
105}
106
107int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
108{
109 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
110 struct tcp_sock *tp = tcp_sk(sk);
111
112
113
114
115
116
117
118
119
120
121
122
123 if (tcptw->tw_ts_recent_stamp &&
124 (!twp || (sysctl_tcp_tw_reuse &&
125 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
126 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
127 if (tp->write_seq == 0)
128 tp->write_seq = 1;
129 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
130 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
131 sock_hold(sktw);
132 return 1;
133 }
134
135 return 0;
136}
137EXPORT_SYMBOL_GPL(tcp_twsk_unique);
138
139
140int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
141{
142 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
143 struct inet_sock *inet = inet_sk(sk);
144 struct tcp_sock *tp = tcp_sk(sk);
145 __be16 orig_sport, orig_dport;
146 __be32 daddr, nexthop;
147 struct flowi4 *fl4;
148 struct rtable *rt;
149 int err;
150 struct ip_options_rcu *inet_opt;
151
152 if (addr_len < sizeof(struct sockaddr_in))
153 return -EINVAL;
154
155 if (usin->sin_family != AF_INET)
156 return -EAFNOSUPPORT;
157
158 nexthop = daddr = usin->sin_addr.s_addr;
159 inet_opt = rcu_dereference_protected(inet->inet_opt,
160 lockdep_sock_is_held(sk));
161 if (inet_opt && inet_opt->opt.srr) {
162 if (!daddr)
163 return -EINVAL;
164 nexthop = inet_opt->opt.faddr;
165 }
166
167 orig_sport = inet->inet_sport;
168 orig_dport = usin->sin_port;
169 fl4 = &inet->cork.fl.u.ip4;
170 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
171 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
172 IPPROTO_TCP,
173 orig_sport, orig_dport, sk);
174 if (IS_ERR(rt)) {
175 err = PTR_ERR(rt);
176 if (err == -ENETUNREACH)
177 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
178 return err;
179 }
180
181 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
182 ip_rt_put(rt);
183 return -ENETUNREACH;
184 }
185
186 if (!inet_opt || !inet_opt->opt.srr)
187 daddr = fl4->daddr;
188
189 if (!inet->inet_saddr)
190 inet->inet_saddr = fl4->saddr;
191 sk_rcv_saddr_set(sk, inet->inet_saddr);
192
193 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
194
195 tp->rx_opt.ts_recent = 0;
196 tp->rx_opt.ts_recent_stamp = 0;
197 if (likely(!tp->repair))
198 tp->write_seq = 0;
199 }
200
201 if (tcp_death_row.sysctl_tw_recycle &&
202 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
203 tcp_fetch_timewait_stamp(sk, &rt->dst);
204
205 inet->inet_dport = usin->sin_port;
206 sk_daddr_set(sk, daddr);
207
208 inet_csk(sk)->icsk_ext_hdr_len = 0;
209 if (inet_opt)
210 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
211
212 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
213
214
215
216
217
218
219 tcp_set_state(sk, TCP_SYN_SENT);
220 err = inet_hash_connect(&tcp_death_row, sk);
221 if (err)
222 goto failure;
223
224 sk_set_txhash(sk);
225
226 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
227 inet->inet_sport, inet->inet_dport, sk);
228 if (IS_ERR(rt)) {
229 err = PTR_ERR(rt);
230 rt = NULL;
231 goto failure;
232 }
233
234 sk->sk_gso_type = SKB_GSO_TCPV4;
235 sk_setup_caps(sk, &rt->dst);
236
237 if (!tp->write_seq && likely(!tp->repair))
238 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
239 inet->inet_daddr,
240 inet->inet_sport,
241 usin->sin_port);
242
243 inet->inet_id = tp->write_seq ^ jiffies;
244
245 err = tcp_connect(sk);
246
247 rt = NULL;
248 if (err)
249 goto failure;
250
251 return 0;
252
253failure:
254
255
256
257
258 tcp_set_state(sk, TCP_CLOSE);
259 ip_rt_put(rt);
260 sk->sk_route_caps = 0;
261 inet->inet_dport = 0;
262 return err;
263}
264EXPORT_SYMBOL(tcp_v4_connect);
265
266
267
268
269
270
271void tcp_v4_mtu_reduced(struct sock *sk)
272{
273 struct dst_entry *dst;
274 struct inet_sock *inet = inet_sk(sk);
275 u32 mtu = tcp_sk(sk)->mtu_info;
276
277 dst = inet_csk_update_pmtu(sk, mtu);
278 if (!dst)
279 return;
280
281
282
283
284 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
285 sk->sk_err_soft = EMSGSIZE;
286
287 mtu = dst_mtu(dst);
288
289 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
290 ip_sk_accept_pmtu(sk) &&
291 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
292 tcp_sync_mss(sk, mtu);
293
294
295
296
297
298
299 tcp_simple_retransmit(sk);
300 }
301}
302EXPORT_SYMBOL(tcp_v4_mtu_reduced);
303
304static void do_redirect(struct sk_buff *skb, struct sock *sk)
305{
306 struct dst_entry *dst = __sk_dst_check(sk, 0);
307
308 if (dst)
309 dst->ops->redirect(dst, sk, skb);
310}
311
312
313
314void tcp_req_err(struct sock *sk, u32 seq, bool abort)
315{
316 struct request_sock *req = inet_reqsk(sk);
317 struct net *net = sock_net(sk);
318
319
320
321
322 if (seq != tcp_rsk(req)->snt_isn) {
323 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
324 } else if (abort) {
325
326
327
328
329
330
331 inet_csk_reqsk_queue_drop(req->rsk_listener, req);
332 tcp_listendrop(req->rsk_listener);
333 }
334 reqsk_put(req);
335}
336EXPORT_SYMBOL(tcp_req_err);
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
355{
356 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
357 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
358 struct inet_connection_sock *icsk;
359 struct tcp_sock *tp;
360 struct inet_sock *inet;
361 const int type = icmp_hdr(icmp_skb)->type;
362 const int code = icmp_hdr(icmp_skb)->code;
363 struct sock *sk;
364 struct sk_buff *skb;
365 struct request_sock *fastopen;
366 __u32 seq, snd_una;
367 __u32 remaining;
368 int err;
369 struct net *net = dev_net(icmp_skb->dev);
370
371 sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
372 th->dest, iph->saddr, ntohs(th->source),
373 inet_iif(icmp_skb));
374 if (!sk) {
375 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
376 return;
377 }
378 if (sk->sk_state == TCP_TIME_WAIT) {
379 inet_twsk_put(inet_twsk(sk));
380 return;
381 }
382 seq = ntohl(th->seq);
383 if (sk->sk_state == TCP_NEW_SYN_RECV)
384 return tcp_req_err(sk, seq,
385 type == ICMP_PARAMETERPROB ||
386 type == ICMP_TIME_EXCEEDED ||
387 (type == ICMP_DEST_UNREACH &&
388 (code == ICMP_NET_UNREACH ||
389 code == ICMP_HOST_UNREACH)));
390
391 bh_lock_sock(sk);
392
393
394
395
396
397 if (sock_owned_by_user(sk)) {
398 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
399 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
400 }
401 if (sk->sk_state == TCP_CLOSE)
402 goto out;
403
404 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
405 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
406 goto out;
407 }
408
409 icsk = inet_csk(sk);
410 tp = tcp_sk(sk);
411
412 fastopen = tp->fastopen_rsk;
413 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
414 if (sk->sk_state != TCP_LISTEN &&
415 !between(seq, snd_una, tp->snd_nxt)) {
416 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
417 goto out;
418 }
419
420 switch (type) {
421 case ICMP_REDIRECT:
422 do_redirect(icmp_skb, sk);
423 goto out;
424 case ICMP_SOURCE_QUENCH:
425
426 goto out;
427 case ICMP_PARAMETERPROB:
428 err = EPROTO;
429 break;
430 case ICMP_DEST_UNREACH:
431 if (code > NR_ICMP_UNREACH)
432 goto out;
433
434 if (code == ICMP_FRAG_NEEDED) {
435
436
437
438
439 if (sk->sk_state == TCP_LISTEN)
440 goto out;
441
442 tp->mtu_info = info;
443 if (!sock_owned_by_user(sk)) {
444 tcp_v4_mtu_reduced(sk);
445 } else {
446 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
447 sock_hold(sk);
448 }
449 goto out;
450 }
451
452 err = icmp_err_convert[code].errno;
453
454
455 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
456 break;
457 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
458 !icsk->icsk_backoff || fastopen)
459 break;
460
461 if (sock_owned_by_user(sk))
462 break;
463
464 icsk->icsk_backoff--;
465 icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
466 TCP_TIMEOUT_INIT;
467 icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
468
469 skb = tcp_write_queue_head(sk);
470 BUG_ON(!skb);
471
472 remaining = icsk->icsk_rto -
473 min(icsk->icsk_rto,
474 tcp_time_stamp - tcp_skb_timestamp(skb));
475
476 if (remaining) {
477 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
478 remaining, TCP_RTO_MAX);
479 } else {
480
481
482 tcp_retransmit_timer(sk);
483 }
484
485 break;
486 case ICMP_TIME_EXCEEDED:
487 err = EHOSTUNREACH;
488 break;
489 default:
490 goto out;
491 }
492
493 switch (sk->sk_state) {
494 case TCP_SYN_SENT:
495 case TCP_SYN_RECV:
496
497
498
499 if (fastopen && !fastopen->sk)
500 break;
501
502 if (!sock_owned_by_user(sk)) {
503 sk->sk_err = err;
504
505 sk->sk_error_report(sk);
506
507 tcp_done(sk);
508 } else {
509 sk->sk_err_soft = err;
510 }
511 goto out;
512 }
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530 inet = inet_sk(sk);
531 if (!sock_owned_by_user(sk) && inet->recverr) {
532 sk->sk_err = err;
533 sk->sk_error_report(sk);
534 } else {
535 sk->sk_err_soft = err;
536 }
537
538out:
539 bh_unlock_sock(sk);
540 sock_put(sk);
541}
542
543void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
544{
545 struct tcphdr *th = tcp_hdr(skb);
546
547 if (skb->ip_summed == CHECKSUM_PARTIAL) {
548 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
549 skb->csum_start = skb_transport_header(skb) - skb->head;
550 skb->csum_offset = offsetof(struct tcphdr, check);
551 } else {
552 th->check = tcp_v4_check(skb->len, saddr, daddr,
553 csum_partial(th,
554 th->doff << 2,
555 skb->csum));
556 }
557}
558
559
560void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
561{
562 const struct inet_sock *inet = inet_sk(sk);
563
564 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
565}
566EXPORT_SYMBOL(tcp_v4_send_check);
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
582{
583 const struct tcphdr *th = tcp_hdr(skb);
584 struct {
585 struct tcphdr th;
586#ifdef CONFIG_TCP_MD5SIG
587 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
588#endif
589 } rep;
590 struct ip_reply_arg arg;
591#ifdef CONFIG_TCP_MD5SIG
592 struct tcp_md5sig_key *key = NULL;
593 const __u8 *hash_location = NULL;
594 unsigned char newhash[16];
595 int genhash;
596 struct sock *sk1 = NULL;
597#endif
598 struct net *net;
599
600
601 if (th->rst)
602 return;
603
604
605
606
607 if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
608 return;
609
610
611 memset(&rep, 0, sizeof(rep));
612 rep.th.dest = th->source;
613 rep.th.source = th->dest;
614 rep.th.doff = sizeof(struct tcphdr) / 4;
615 rep.th.rst = 1;
616
617 if (th->ack) {
618 rep.th.seq = th->ack_seq;
619 } else {
620 rep.th.ack = 1;
621 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
622 skb->len - (th->doff << 2));
623 }
624
625 memset(&arg, 0, sizeof(arg));
626 arg.iov[0].iov_base = (unsigned char *)&rep;
627 arg.iov[0].iov_len = sizeof(rep.th);
628
629 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
630#ifdef CONFIG_TCP_MD5SIG
631 rcu_read_lock();
632 hash_location = tcp_parse_md5sig_option(th);
633 if (sk && sk_fullsock(sk)) {
634 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
635 &ip_hdr(skb)->saddr, AF_INET);
636 } else if (hash_location) {
637
638
639
640
641
642
643
644 sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
645 ip_hdr(skb)->saddr,
646 th->source, ip_hdr(skb)->daddr,
647 ntohs(th->source), inet_iif(skb));
648
649 if (!sk1)
650 goto out;
651
652 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
653 &ip_hdr(skb)->saddr, AF_INET);
654 if (!key)
655 goto out;
656
657
658 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
659 if (genhash || memcmp(hash_location, newhash, 16) != 0)
660 goto out;
661
662 }
663
664 if (key) {
665 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
666 (TCPOPT_NOP << 16) |
667 (TCPOPT_MD5SIG << 8) |
668 TCPOLEN_MD5SIG);
669
670 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
671 rep.th.doff = arg.iov[0].iov_len / 4;
672
673 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
674 key, ip_hdr(skb)->saddr,
675 ip_hdr(skb)->daddr, &rep.th);
676 }
677#endif
678 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
679 ip_hdr(skb)->saddr,
680 arg.iov[0].iov_len, IPPROTO_TCP, 0);
681 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
682 arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
683
684
685
686
687
688 if (sk)
689 arg.bound_dev_if = sk->sk_bound_dev_if;
690
691 BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
692 offsetof(struct inet_timewait_sock, tw_bound_dev_if));
693
694 arg.tos = ip_hdr(skb)->tos;
695 local_bh_disable();
696 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
697 skb, &TCP_SKB_CB(skb)->header.h4.opt,
698 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
699 &arg, arg.iov[0].iov_len);
700
701 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
702 __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
703 local_bh_enable();
704
705#ifdef CONFIG_TCP_MD5SIG
706out:
707 rcu_read_unlock();
708#endif
709}
710
711
712
713
714
715static void tcp_v4_send_ack(struct net *net,
716 struct sk_buff *skb, u32 seq, u32 ack,
717 u32 win, u32 tsval, u32 tsecr, int oif,
718 struct tcp_md5sig_key *key,
719 int reply_flags, u8 tos)
720{
721 const struct tcphdr *th = tcp_hdr(skb);
722 struct {
723 struct tcphdr th;
724 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
725#ifdef CONFIG_TCP_MD5SIG
726 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
727#endif
728 ];
729 } rep;
730 struct ip_reply_arg arg;
731
732 memset(&rep.th, 0, sizeof(struct tcphdr));
733 memset(&arg, 0, sizeof(arg));
734
735 arg.iov[0].iov_base = (unsigned char *)&rep;
736 arg.iov[0].iov_len = sizeof(rep.th);
737 if (tsecr) {
738 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
739 (TCPOPT_TIMESTAMP << 8) |
740 TCPOLEN_TIMESTAMP);
741 rep.opt[1] = htonl(tsval);
742 rep.opt[2] = htonl(tsecr);
743 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
744 }
745
746
747 rep.th.dest = th->source;
748 rep.th.source = th->dest;
749 rep.th.doff = arg.iov[0].iov_len / 4;
750 rep.th.seq = htonl(seq);
751 rep.th.ack_seq = htonl(ack);
752 rep.th.ack = 1;
753 rep.th.window = htons(win);
754
755#ifdef CONFIG_TCP_MD5SIG
756 if (key) {
757 int offset = (tsecr) ? 3 : 0;
758
759 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
760 (TCPOPT_NOP << 16) |
761 (TCPOPT_MD5SIG << 8) |
762 TCPOLEN_MD5SIG);
763 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
764 rep.th.doff = arg.iov[0].iov_len/4;
765
766 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
767 key, ip_hdr(skb)->saddr,
768 ip_hdr(skb)->daddr, &rep.th);
769 }
770#endif
771 arg.flags = reply_flags;
772 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
773 ip_hdr(skb)->saddr,
774 arg.iov[0].iov_len, IPPROTO_TCP, 0);
775 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
776 if (oif)
777 arg.bound_dev_if = oif;
778 arg.tos = tos;
779 local_bh_disable();
780 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
781 skb, &TCP_SKB_CB(skb)->header.h4.opt,
782 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
783 &arg, arg.iov[0].iov_len);
784
785 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
786 local_bh_enable();
787}
788
789static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
790{
791 struct inet_timewait_sock *tw = inet_twsk(sk);
792 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
793
794 tcp_v4_send_ack(sock_net(sk), skb,
795 tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
796 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
797 tcp_time_stamp + tcptw->tw_ts_offset,
798 tcptw->tw_ts_recent,
799 tw->tw_bound_dev_if,
800 tcp_twsk_md5_key(tcptw),
801 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
802 tw->tw_tos
803 );
804
805 inet_twsk_put(tw);
806}
807
808static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
809 struct request_sock *req)
810{
811
812
813
814 u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
815 tcp_sk(sk)->snd_nxt;
816
817
818
819
820
821
822 tcp_v4_send_ack(sock_net(sk), skb, seq,
823 tcp_rsk(req)->rcv_nxt,
824 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
825 tcp_time_stamp,
826 req->ts_recent,
827 0,
828 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
829 AF_INET),
830 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
831 ip_hdr(skb)->tos);
832}
833
834
835
836
837
838
839static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
840 struct flowi *fl,
841 struct request_sock *req,
842 struct tcp_fastopen_cookie *foc,
843 enum tcp_synack_type synack_type)
844{
845 const struct inet_request_sock *ireq = inet_rsk(req);
846 struct flowi4 fl4;
847 int err = -1;
848 struct sk_buff *skb;
849
850
851 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
852 return -1;
853
854 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
855
856 if (skb) {
857 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
858
859 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
860 ireq->ir_rmt_addr,
861 ireq->opt);
862 err = net_xmit_eval(err);
863 }
864
865 return err;
866}
867
868
869
870
871static void tcp_v4_reqsk_destructor(struct request_sock *req)
872{
873 kfree(inet_rsk(req)->opt);
874}
875
876#ifdef CONFIG_TCP_MD5SIG
877
878
879
880
881
882
883
884struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
885 const union tcp_md5_addr *addr,
886 int family)
887{
888 const struct tcp_sock *tp = tcp_sk(sk);
889 struct tcp_md5sig_key *key;
890 unsigned int size = sizeof(struct in_addr);
891 const struct tcp_md5sig_info *md5sig;
892
893
894 md5sig = rcu_dereference_check(tp->md5sig_info,
895 lockdep_sock_is_held(sk));
896 if (!md5sig)
897 return NULL;
898#if IS_ENABLED(CONFIG_IPV6)
899 if (family == AF_INET6)
900 size = sizeof(struct in6_addr);
901#endif
902 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
903 if (key->family != family)
904 continue;
905 if (!memcmp(&key->addr, addr, size))
906 return key;
907 }
908 return NULL;
909}
910EXPORT_SYMBOL(tcp_md5_do_lookup);
911
912struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
913 const struct sock *addr_sk)
914{
915 const union tcp_md5_addr *addr;
916
917 addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
918 return tcp_md5_do_lookup(sk, addr, AF_INET);
919}
920EXPORT_SYMBOL(tcp_v4_md5_lookup);
921
922
923int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
924 int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
925{
926
927 struct tcp_md5sig_key *key;
928 struct tcp_sock *tp = tcp_sk(sk);
929 struct tcp_md5sig_info *md5sig;
930
931 key = tcp_md5_do_lookup(sk, addr, family);
932 if (key) {
933
934 memcpy(key->key, newkey, newkeylen);
935 key->keylen = newkeylen;
936 return 0;
937 }
938
939 md5sig = rcu_dereference_protected(tp->md5sig_info,
940 lockdep_sock_is_held(sk));
941 if (!md5sig) {
942 md5sig = kmalloc(sizeof(*md5sig), gfp);
943 if (!md5sig)
944 return -ENOMEM;
945
946 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
947 INIT_HLIST_HEAD(&md5sig->head);
948 rcu_assign_pointer(tp->md5sig_info, md5sig);
949 }
950
951 key = sock_kmalloc(sk, sizeof(*key), gfp);
952 if (!key)
953 return -ENOMEM;
954 if (!tcp_alloc_md5sig_pool()) {
955 sock_kfree_s(sk, key, sizeof(*key));
956 return -ENOMEM;
957 }
958
959 memcpy(key->key, newkey, newkeylen);
960 key->keylen = newkeylen;
961 key->family = family;
962 memcpy(&key->addr, addr,
963 (family == AF_INET6) ? sizeof(struct in6_addr) :
964 sizeof(struct in_addr));
965 hlist_add_head_rcu(&key->node, &md5sig->head);
966 return 0;
967}
968EXPORT_SYMBOL(tcp_md5_do_add);
969
970int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
971{
972 struct tcp_md5sig_key *key;
973
974 key = tcp_md5_do_lookup(sk, addr, family);
975 if (!key)
976 return -ENOENT;
977 hlist_del_rcu(&key->node);
978 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
979 kfree_rcu(key, rcu);
980 return 0;
981}
982EXPORT_SYMBOL(tcp_md5_do_del);
983
984static void tcp_clear_md5_list(struct sock *sk)
985{
986 struct tcp_sock *tp = tcp_sk(sk);
987 struct tcp_md5sig_key *key;
988 struct hlist_node *n;
989 struct tcp_md5sig_info *md5sig;
990
991 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
992
993 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
994 hlist_del_rcu(&key->node);
995 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
996 kfree_rcu(key, rcu);
997 }
998}
999
1000static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1001 int optlen)
1002{
1003 struct tcp_md5sig cmd;
1004 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1005
1006 if (optlen < sizeof(cmd))
1007 return -EINVAL;
1008
1009 if (copy_from_user(&cmd, optval, sizeof(cmd)))
1010 return -EFAULT;
1011
1012 if (sin->sin_family != AF_INET)
1013 return -EINVAL;
1014
1015 if (!cmd.tcpm_keylen)
1016 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1017 AF_INET);
1018
1019 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1020 return -EINVAL;
1021
1022 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1023 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1024 GFP_KERNEL);
1025}
1026
1027static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
1028 __be32 daddr, __be32 saddr,
1029 const struct tcphdr *th, int nbytes)
1030{
1031 struct tcp4_pseudohdr *bp;
1032 struct scatterlist sg;
1033 struct tcphdr *_th;
1034
1035 bp = hp->scratch;
1036 bp->saddr = saddr;
1037 bp->daddr = daddr;
1038 bp->pad = 0;
1039 bp->protocol = IPPROTO_TCP;
1040 bp->len = cpu_to_be16(nbytes);
1041
1042 _th = (struct tcphdr *)(bp + 1);
1043 memcpy(_th, th, sizeof(*th));
1044 _th->check = 0;
1045
1046 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
1047 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
1048 sizeof(*bp) + sizeof(*th));
1049 return crypto_ahash_update(hp->md5_req);
1050}
1051
1052static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1053 __be32 daddr, __be32 saddr, const struct tcphdr *th)
1054{
1055 struct tcp_md5sig_pool *hp;
1056 struct ahash_request *req;
1057
1058 hp = tcp_get_md5sig_pool();
1059 if (!hp)
1060 goto clear_hash_noput;
1061 req = hp->md5_req;
1062
1063 if (crypto_ahash_init(req))
1064 goto clear_hash;
1065 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
1066 goto clear_hash;
1067 if (tcp_md5_hash_key(hp, key))
1068 goto clear_hash;
1069 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1070 if (crypto_ahash_final(req))
1071 goto clear_hash;
1072
1073 tcp_put_md5sig_pool();
1074 return 0;
1075
1076clear_hash:
1077 tcp_put_md5sig_pool();
1078clear_hash_noput:
1079 memset(md5_hash, 0, 16);
1080 return 1;
1081}
1082
1083int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1084 const struct sock *sk,
1085 const struct sk_buff *skb)
1086{
1087 struct tcp_md5sig_pool *hp;
1088 struct ahash_request *req;
1089 const struct tcphdr *th = tcp_hdr(skb);
1090 __be32 saddr, daddr;
1091
1092 if (sk) {
1093 saddr = sk->sk_rcv_saddr;
1094 daddr = sk->sk_daddr;
1095 } else {
1096 const struct iphdr *iph = ip_hdr(skb);
1097 saddr = iph->saddr;
1098 daddr = iph->daddr;
1099 }
1100
1101 hp = tcp_get_md5sig_pool();
1102 if (!hp)
1103 goto clear_hash_noput;
1104 req = hp->md5_req;
1105
1106 if (crypto_ahash_init(req))
1107 goto clear_hash;
1108
1109 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
1110 goto clear_hash;
1111 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1112 goto clear_hash;
1113 if (tcp_md5_hash_key(hp, key))
1114 goto clear_hash;
1115 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1116 if (crypto_ahash_final(req))
1117 goto clear_hash;
1118
1119 tcp_put_md5sig_pool();
1120 return 0;
1121
1122clear_hash:
1123 tcp_put_md5sig_pool();
1124clear_hash_noput:
1125 memset(md5_hash, 0, 16);
1126 return 1;
1127}
1128EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1129
1130#endif
1131
1132
1133static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
1134 const struct sk_buff *skb)
1135{
1136#ifdef CONFIG_TCP_MD5SIG
1137
1138
1139
1140
1141
1142
1143
1144
1145 const __u8 *hash_location = NULL;
1146 struct tcp_md5sig_key *hash_expected;
1147 const struct iphdr *iph = ip_hdr(skb);
1148 const struct tcphdr *th = tcp_hdr(skb);
1149 int genhash;
1150 unsigned char newhash[16];
1151
1152 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1153 AF_INET);
1154 hash_location = tcp_parse_md5sig_option(th);
1155
1156
1157 if (!hash_expected && !hash_location)
1158 return false;
1159
1160 if (hash_expected && !hash_location) {
1161 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1162 return true;
1163 }
1164
1165 if (!hash_expected && hash_location) {
1166 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1167 return true;
1168 }
1169
1170
1171
1172
1173 genhash = tcp_v4_md5_hash_skb(newhash,
1174 hash_expected,
1175 NULL, skb);
1176
1177 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1178 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1179 &iph->saddr, ntohs(th->source),
1180 &iph->daddr, ntohs(th->dest),
1181 genhash ? " tcp_v4_calc_md5_hash failed"
1182 : "");
1183 return true;
1184 }
1185 return false;
1186#endif
1187 return false;
1188}
1189
1190static void tcp_v4_init_req(struct request_sock *req,
1191 const struct sock *sk_listener,
1192 struct sk_buff *skb)
1193{
1194 struct inet_request_sock *ireq = inet_rsk(req);
1195
1196 sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
1197 sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
1198 ireq->no_srccheck = inet_sk(sk_listener)->transparent;
1199 ireq->opt = tcp_v4_save_options(skb);
1200}
1201
1202static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
1203 struct flowi *fl,
1204 const struct request_sock *req,
1205 bool *strict)
1206{
1207 struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
1208
1209 if (strict) {
1210 if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
1211 *strict = true;
1212 else
1213 *strict = false;
1214 }
1215
1216 return dst;
1217}
1218
1219struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1220 .family = PF_INET,
1221 .obj_size = sizeof(struct tcp_request_sock),
1222 .rtx_syn_ack = tcp_rtx_synack,
1223 .send_ack = tcp_v4_reqsk_send_ack,
1224 .destructor = tcp_v4_reqsk_destructor,
1225 .send_reset = tcp_v4_send_reset,
1226 .syn_ack_timeout = tcp_syn_ack_timeout,
1227};
1228
1229static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1230 .mss_clamp = TCP_MSS_DEFAULT,
1231#ifdef CONFIG_TCP_MD5SIG
1232 .req_md5_lookup = tcp_v4_md5_lookup,
1233 .calc_md5_hash = tcp_v4_md5_hash_skb,
1234#endif
1235 .init_req = tcp_v4_init_req,
1236#ifdef CONFIG_SYN_COOKIES
1237 .cookie_init_seq = cookie_v4_init_sequence,
1238#endif
1239 .route_req = tcp_v4_route_req,
1240 .init_seq = tcp_v4_init_sequence,
1241 .send_synack = tcp_v4_send_synack,
1242};
1243
1244int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1245{
1246
1247 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1248 goto drop;
1249
1250 return tcp_conn_request(&tcp_request_sock_ops,
1251 &tcp_request_sock_ipv4_ops, sk, skb);
1252
1253drop:
1254 tcp_listendrop(sk);
1255 return 0;
1256}
1257EXPORT_SYMBOL(tcp_v4_conn_request);
1258
1259
1260
1261
1262
1263
1264struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1265 struct request_sock *req,
1266 struct dst_entry *dst,
1267 struct request_sock *req_unhash,
1268 bool *own_req)
1269{
1270 struct inet_request_sock *ireq;
1271 struct inet_sock *newinet;
1272 struct tcp_sock *newtp;
1273 struct sock *newsk;
1274#ifdef CONFIG_TCP_MD5SIG
1275 struct tcp_md5sig_key *key;
1276#endif
1277 struct ip_options_rcu *inet_opt;
1278
1279 if (sk_acceptq_is_full(sk))
1280 goto exit_overflow;
1281
1282 newsk = tcp_create_openreq_child(sk, req, skb);
1283 if (!newsk)
1284 goto exit_nonewsk;
1285
1286 newsk->sk_gso_type = SKB_GSO_TCPV4;
1287 inet_sk_rx_dst_set(newsk, skb);
1288
1289 newtp = tcp_sk(newsk);
1290 newinet = inet_sk(newsk);
1291 ireq = inet_rsk(req);
1292 sk_daddr_set(newsk, ireq->ir_rmt_addr);
1293 sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
1294 newsk->sk_bound_dev_if = ireq->ir_iif;
1295 newinet->inet_saddr = ireq->ir_loc_addr;
1296 inet_opt = ireq->opt;
1297 rcu_assign_pointer(newinet->inet_opt, inet_opt);
1298 ireq->opt = NULL;
1299 newinet->mc_index = inet_iif(skb);
1300 newinet->mc_ttl = ip_hdr(skb)->ttl;
1301 newinet->rcv_tos = ip_hdr(skb)->tos;
1302 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1303 if (inet_opt)
1304 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1305 newinet->inet_id = newtp->write_seq ^ jiffies;
1306
1307 if (!dst) {
1308 dst = inet_csk_route_child_sock(sk, newsk, req);
1309 if (!dst)
1310 goto put_and_exit;
1311 } else {
1312
1313 }
1314 sk_setup_caps(newsk, dst);
1315
1316 tcp_ca_openreq_child(newsk, dst);
1317
1318 tcp_sync_mss(newsk, dst_mtu(dst));
1319 newtp->advmss = dst_metric_advmss(dst);
1320 if (tcp_sk(sk)->rx_opt.user_mss &&
1321 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1322 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1323
1324 tcp_initialize_rcv_mss(newsk);
1325
1326#ifdef CONFIG_TCP_MD5SIG
1327
1328 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1329 AF_INET);
1330 if (key) {
1331
1332
1333
1334
1335
1336
1337 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1338 AF_INET, key->key, key->keylen, GFP_ATOMIC);
1339 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1340 }
1341#endif
1342
1343 if (__inet_inherit_port(sk, newsk) < 0)
1344 goto put_and_exit;
1345 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1346 if (*own_req)
1347 tcp_move_syn(newtp, req);
1348
1349 return newsk;
1350
1351exit_overflow:
1352 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1353exit_nonewsk:
1354 dst_release(dst);
1355exit:
1356 tcp_listendrop(sk);
1357 return NULL;
1358put_and_exit:
1359 inet_csk_prepare_forced_close(newsk);
1360 tcp_done(newsk);
1361 goto exit;
1362}
1363EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1364
1365static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
1366{
1367#ifdef CONFIG_SYN_COOKIES
1368 const struct tcphdr *th = tcp_hdr(skb);
1369
1370 if (!th->syn)
1371 sk = cookie_v4_check(sk, skb);
1372#endif
1373 return sk;
1374}
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1385{
1386 struct sock *rsk;
1387
1388 if (sk->sk_state == TCP_ESTABLISHED) {
1389 struct dst_entry *dst = sk->sk_rx_dst;
1390
1391 sock_rps_save_rxhash(sk, skb);
1392 sk_mark_napi_id(sk, skb);
1393 if (dst) {
1394 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1395 !dst->ops->check(dst, 0)) {
1396 dst_release(dst);
1397 sk->sk_rx_dst = NULL;
1398 }
1399 }
1400 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1401 return 0;
1402 }
1403
1404 if (tcp_checksum_complete(skb))
1405 goto csum_err;
1406
1407 if (sk->sk_state == TCP_LISTEN) {
1408 struct sock *nsk = tcp_v4_cookie_check(sk, skb);
1409
1410 if (!nsk)
1411 goto discard;
1412 if (nsk != sk) {
1413 sock_rps_save_rxhash(nsk, skb);
1414 sk_mark_napi_id(nsk, skb);
1415 if (tcp_child_process(sk, nsk, skb)) {
1416 rsk = nsk;
1417 goto reset;
1418 }
1419 return 0;
1420 }
1421 } else
1422 sock_rps_save_rxhash(sk, skb);
1423
1424 if (tcp_rcv_state_process(sk, skb)) {
1425 rsk = sk;
1426 goto reset;
1427 }
1428 return 0;
1429
1430reset:
1431 tcp_v4_send_reset(rsk, skb);
1432discard:
1433 kfree_skb(skb);
1434
1435
1436
1437
1438
1439 return 0;
1440
1441csum_err:
1442 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1443 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1444 goto discard;
1445}
1446EXPORT_SYMBOL(tcp_v4_do_rcv);
1447
1448void tcp_v4_early_demux(struct sk_buff *skb)
1449{
1450 const struct iphdr *iph;
1451 const struct tcphdr *th;
1452 struct sock *sk;
1453
1454 if (skb->pkt_type != PACKET_HOST)
1455 return;
1456
1457 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1458 return;
1459
1460 iph = ip_hdr(skb);
1461 th = tcp_hdr(skb);
1462
1463 if (th->doff < sizeof(struct tcphdr) / 4)
1464 return;
1465
1466 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1467 iph->saddr, th->source,
1468 iph->daddr, ntohs(th->dest),
1469 skb->skb_iif);
1470 if (sk) {
1471 skb->sk = sk;
1472 skb->destructor = sock_edemux;
1473 if (sk_fullsock(sk)) {
1474 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1475
1476 if (dst)
1477 dst = dst_check(dst, 0);
1478 if (dst &&
1479 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1480 skb_dst_set_noref(skb, dst);
1481 }
1482 }
1483}
1484
1485
1486
1487
1488
1489
1490
1491
1492bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1493{
1494 struct tcp_sock *tp = tcp_sk(sk);
1495
1496 if (sysctl_tcp_low_latency || !tp->ucopy.task)
1497 return false;
1498
1499 if (skb->len <= tcp_hdrlen(skb) &&
1500 skb_queue_len(&tp->ucopy.prequeue) == 0)
1501 return false;
1502
1503
1504
1505
1506
1507
1508
1509 if (likely(sk->sk_rx_dst))
1510 skb_dst_drop(skb);
1511 else
1512 skb_dst_force_safe(skb);
1513
1514 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1515 tp->ucopy.memory += skb->truesize;
1516 if (skb_queue_len(&tp->ucopy.prequeue) >= 32 ||
1517 tp->ucopy.memory + atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
1518 struct sk_buff *skb1;
1519
1520 BUG_ON(sock_owned_by_user(sk));
1521 __NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED,
1522 skb_queue_len(&tp->ucopy.prequeue));
1523
1524 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
1525 sk_backlog_rcv(sk, skb1);
1526
1527 tp->ucopy.memory = 0;
1528 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1529 wake_up_interruptible_sync_poll(sk_sleep(sk),
1530 POLLIN | POLLRDNORM | POLLRDBAND);
1531 if (!inet_csk_ack_scheduled(sk))
1532 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1533 (3 * tcp_rto_min(sk)) / 4,
1534 TCP_RTO_MAX);
1535 }
1536 return true;
1537}
1538EXPORT_SYMBOL(tcp_prequeue);
1539
1540
1541
1542
1543
1544int tcp_v4_rcv(struct sk_buff *skb)
1545{
1546 struct net *net = dev_net(skb->dev);
1547 const struct iphdr *iph;
1548 const struct tcphdr *th;
1549 bool refcounted;
1550 struct sock *sk;
1551 int ret;
1552
1553 if (skb->pkt_type != PACKET_HOST)
1554 goto discard_it;
1555
1556
1557 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1558
1559 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1560 goto discard_it;
1561
1562 th = (const struct tcphdr *)skb->data;
1563
1564 if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
1565 goto bad_packet;
1566 if (!pskb_may_pull(skb, th->doff * 4))
1567 goto discard_it;
1568
1569
1570
1571
1572
1573
1574 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
1575 goto csum_error;
1576
1577 th = (const struct tcphdr *)skb->data;
1578 iph = ip_hdr(skb);
1579
1580
1581
1582 memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1583 sizeof(struct inet_skb_parm));
1584 barrier();
1585
1586 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1587 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1588 skb->len - th->doff * 4);
1589 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1590 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1591 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1592 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1593 TCP_SKB_CB(skb)->sacked = 0;
1594
1595lookup:
1596 sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
1597 th->dest, &refcounted);
1598 if (!sk)
1599 goto no_tcp_socket;
1600
1601process:
1602 if (sk->sk_state == TCP_TIME_WAIT)
1603 goto do_time_wait;
1604
1605 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1606 struct request_sock *req = inet_reqsk(sk);
1607 struct sock *nsk;
1608
1609 sk = req->rsk_listener;
1610 if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
1611 reqsk_put(req);
1612 goto discard_it;
1613 }
1614 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1615 inet_csk_reqsk_queue_drop_and_put(sk, req);
1616 goto lookup;
1617 }
1618
1619
1620
1621 sock_hold(sk);
1622 refcounted = true;
1623 nsk = tcp_check_req(sk, skb, req, false);
1624 if (!nsk) {
1625 reqsk_put(req);
1626 goto discard_and_relse;
1627 }
1628 if (nsk == sk) {
1629 reqsk_put(req);
1630 } else if (tcp_child_process(sk, nsk, skb)) {
1631 tcp_v4_send_reset(nsk, skb);
1632 goto discard_and_relse;
1633 } else {
1634 sock_put(sk);
1635 return 0;
1636 }
1637 }
1638 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1639 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1640 goto discard_and_relse;
1641 }
1642
1643 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1644 goto discard_and_relse;
1645
1646 if (tcp_v4_inbound_md5_hash(sk, skb))
1647 goto discard_and_relse;
1648
1649 nf_reset(skb);
1650
1651 if (sk_filter(sk, skb))
1652 goto discard_and_relse;
1653
1654 skb->dev = NULL;
1655
1656 if (sk->sk_state == TCP_LISTEN) {
1657 ret = tcp_v4_do_rcv(sk, skb);
1658 goto put_and_return;
1659 }
1660
1661 sk_incoming_cpu_update(sk);
1662
1663 bh_lock_sock_nested(sk);
1664 tcp_segs_in(tcp_sk(sk), skb);
1665 ret = 0;
1666 if (!sock_owned_by_user(sk)) {
1667 if (!tcp_prequeue(sk, skb))
1668 ret = tcp_v4_do_rcv(sk, skb);
1669 } else if (unlikely(sk_add_backlog(sk, skb,
1670 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1671 bh_unlock_sock(sk);
1672 __NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP);
1673 goto discard_and_relse;
1674 }
1675 bh_unlock_sock(sk);
1676
1677put_and_return:
1678 if (refcounted)
1679 sock_put(sk);
1680
1681 return ret;
1682
1683no_tcp_socket:
1684 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1685 goto discard_it;
1686
1687 if (tcp_checksum_complete(skb)) {
1688csum_error:
1689 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1690bad_packet:
1691 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1692 } else {
1693 tcp_v4_send_reset(NULL, skb);
1694 }
1695
1696discard_it:
1697
1698 kfree_skb(skb);
1699 return 0;
1700
1701discard_and_relse:
1702 sk_drops_add(sk, skb);
1703 if (refcounted)
1704 sock_put(sk);
1705 goto discard_it;
1706
1707do_time_wait:
1708 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1709 inet_twsk_put(inet_twsk(sk));
1710 goto discard_it;
1711 }
1712
1713 if (tcp_checksum_complete(skb)) {
1714 inet_twsk_put(inet_twsk(sk));
1715 goto csum_error;
1716 }
1717 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1718 case TCP_TW_SYN: {
1719 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1720 &tcp_hashinfo, skb,
1721 __tcp_hdrlen(th),
1722 iph->saddr, th->source,
1723 iph->daddr, th->dest,
1724 inet_iif(skb));
1725 if (sk2) {
1726 inet_twsk_deschedule_put(inet_twsk(sk));
1727 sk = sk2;
1728 refcounted = false;
1729 goto process;
1730 }
1731
1732 }
1733 case TCP_TW_ACK:
1734 tcp_v4_timewait_ack(sk, skb);
1735 break;
1736 case TCP_TW_RST:
1737 tcp_v4_send_reset(sk, skb);
1738 inet_twsk_deschedule_put(inet_twsk(sk));
1739 goto discard_it;
1740 case TCP_TW_SUCCESS:;
1741 }
1742 goto discard_it;
1743}
1744
1745static struct timewait_sock_ops tcp_timewait_sock_ops = {
1746 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1747 .twsk_unique = tcp_twsk_unique,
1748 .twsk_destructor= tcp_twsk_destructor,
1749};
1750
1751void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
1752{
1753 struct dst_entry *dst = skb_dst(skb);
1754
1755 if (dst && dst_hold_safe(dst)) {
1756 sk->sk_rx_dst = dst;
1757 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1758 }
1759}
1760EXPORT_SYMBOL(inet_sk_rx_dst_set);
1761
1762const struct inet_connection_sock_af_ops ipv4_specific = {
1763 .queue_xmit = ip_queue_xmit,
1764 .send_check = tcp_v4_send_check,
1765 .rebuild_header = inet_sk_rebuild_header,
1766 .sk_rx_dst_set = inet_sk_rx_dst_set,
1767 .conn_request = tcp_v4_conn_request,
1768 .syn_recv_sock = tcp_v4_syn_recv_sock,
1769 .net_header_len = sizeof(struct iphdr),
1770 .setsockopt = ip_setsockopt,
1771 .getsockopt = ip_getsockopt,
1772 .addr2sockaddr = inet_csk_addr2sockaddr,
1773 .sockaddr_len = sizeof(struct sockaddr_in),
1774 .bind_conflict = inet_csk_bind_conflict,
1775#ifdef CONFIG_COMPAT
1776 .compat_setsockopt = compat_ip_setsockopt,
1777 .compat_getsockopt = compat_ip_getsockopt,
1778#endif
1779 .mtu_reduced = tcp_v4_mtu_reduced,
1780};
1781EXPORT_SYMBOL(ipv4_specific);
1782
1783#ifdef CONFIG_TCP_MD5SIG
1784static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1785 .md5_lookup = tcp_v4_md5_lookup,
1786 .calc_md5_hash = tcp_v4_md5_hash_skb,
1787 .md5_parse = tcp_v4_parse_md5_keys,
1788};
1789#endif
1790
1791
1792
1793
1794static int tcp_v4_init_sock(struct sock *sk)
1795{
1796 struct inet_connection_sock *icsk = inet_csk(sk);
1797
1798 tcp_init_sock(sk);
1799
1800 icsk->icsk_af_ops = &ipv4_specific;
1801
1802#ifdef CONFIG_TCP_MD5SIG
1803 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1804#endif
1805
1806 return 0;
1807}
1808
1809void tcp_v4_destroy_sock(struct sock *sk)
1810{
1811 struct tcp_sock *tp = tcp_sk(sk);
1812
1813 tcp_clear_xmit_timers(sk);
1814
1815 tcp_cleanup_congestion_control(sk);
1816
1817
1818 tcp_write_queue_purge(sk);
1819
1820
1821 __skb_queue_purge(&tp->out_of_order_queue);
1822
1823#ifdef CONFIG_TCP_MD5SIG
1824
1825 if (tp->md5sig_info) {
1826 tcp_clear_md5_list(sk);
1827 kfree_rcu(tp->md5sig_info, rcu);
1828 tp->md5sig_info = NULL;
1829 }
1830#endif
1831
1832
1833 __skb_queue_purge(&tp->ucopy.prequeue);
1834
1835
1836 if (inet_csk(sk)->icsk_bind_hash)
1837 inet_put_port(sk);
1838
1839 BUG_ON(tp->fastopen_rsk);
1840
1841
1842 tcp_free_fastopen_req(tp);
1843 tcp_saved_syn_free(tp);
1844
1845 local_bh_disable();
1846 sk_sockets_allocated_dec(sk);
1847 local_bh_enable();
1848
1849 if (mem_cgroup_sockets_enabled && sk->sk_memcg)
1850 sock_release_memcg(sk);
1851}
1852EXPORT_SYMBOL(tcp_v4_destroy_sock);
1853
1854#ifdef CONFIG_PROC_FS
1855
1856
1857
1858
1859
1860
1861
1862static void *listening_get_next(struct seq_file *seq, void *cur)
1863{
1864 struct tcp_iter_state *st = seq->private;
1865 struct net *net = seq_file_net(seq);
1866 struct inet_listen_hashbucket *ilb;
1867 struct inet_connection_sock *icsk;
1868 struct sock *sk = cur;
1869
1870 if (!sk) {
1871get_head:
1872 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1873 spin_lock_bh(&ilb->lock);
1874 sk = sk_head(&ilb->head);
1875 st->offset = 0;
1876 goto get_sk;
1877 }
1878 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1879 ++st->num;
1880 ++st->offset;
1881
1882 sk = sk_next(sk);
1883get_sk:
1884 sk_for_each_from(sk) {
1885 if (!net_eq(sock_net(sk), net))
1886 continue;
1887 if (sk->sk_family == st->family)
1888 return sk;
1889 icsk = inet_csk(sk);
1890 }
1891 spin_unlock_bh(&ilb->lock);
1892 st->offset = 0;
1893 if (++st->bucket < INET_LHTABLE_SIZE)
1894 goto get_head;
1895 return NULL;
1896}
1897
1898static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1899{
1900 struct tcp_iter_state *st = seq->private;
1901 void *rc;
1902
1903 st->bucket = 0;
1904 st->offset = 0;
1905 rc = listening_get_next(seq, NULL);
1906
1907 while (rc && *pos) {
1908 rc = listening_get_next(seq, rc);
1909 --*pos;
1910 }
1911 return rc;
1912}
1913
1914static inline bool empty_bucket(const struct tcp_iter_state *st)
1915{
1916 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
1917}
1918
1919
1920
1921
1922
1923static void *established_get_first(struct seq_file *seq)
1924{
1925 struct tcp_iter_state *st = seq->private;
1926 struct net *net = seq_file_net(seq);
1927 void *rc = NULL;
1928
1929 st->offset = 0;
1930 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1931 struct sock *sk;
1932 struct hlist_nulls_node *node;
1933 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1934
1935
1936 if (empty_bucket(st))
1937 continue;
1938
1939 spin_lock_bh(lock);
1940 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
1941 if (sk->sk_family != st->family ||
1942 !net_eq(sock_net(sk), net)) {
1943 continue;
1944 }
1945 rc = sk;
1946 goto out;
1947 }
1948 spin_unlock_bh(lock);
1949 }
1950out:
1951 return rc;
1952}
1953
1954static void *established_get_next(struct seq_file *seq, void *cur)
1955{
1956 struct sock *sk = cur;
1957 struct hlist_nulls_node *node;
1958 struct tcp_iter_state *st = seq->private;
1959 struct net *net = seq_file_net(seq);
1960
1961 ++st->num;
1962 ++st->offset;
1963
1964 sk = sk_nulls_next(sk);
1965
1966 sk_nulls_for_each_from(sk, node) {
1967 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
1968 return sk;
1969 }
1970
1971 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1972 ++st->bucket;
1973 return established_get_first(seq);
1974}
1975
1976static void *established_get_idx(struct seq_file *seq, loff_t pos)
1977{
1978 struct tcp_iter_state *st = seq->private;
1979 void *rc;
1980
1981 st->bucket = 0;
1982 rc = established_get_first(seq);
1983
1984 while (rc && pos) {
1985 rc = established_get_next(seq, rc);
1986 --pos;
1987 }
1988 return rc;
1989}
1990
1991static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
1992{
1993 void *rc;
1994 struct tcp_iter_state *st = seq->private;
1995
1996 st->state = TCP_SEQ_STATE_LISTENING;
1997 rc = listening_get_idx(seq, &pos);
1998
1999 if (!rc) {
2000 st->state = TCP_SEQ_STATE_ESTABLISHED;
2001 rc = established_get_idx(seq, pos);
2002 }
2003
2004 return rc;
2005}
2006
2007static void *tcp_seek_last_pos(struct seq_file *seq)
2008{
2009 struct tcp_iter_state *st = seq->private;
2010 int offset = st->offset;
2011 int orig_num = st->num;
2012 void *rc = NULL;
2013
2014 switch (st->state) {
2015 case TCP_SEQ_STATE_LISTENING:
2016 if (st->bucket >= INET_LHTABLE_SIZE)
2017 break;
2018 st->state = TCP_SEQ_STATE_LISTENING;
2019 rc = listening_get_next(seq, NULL);
2020 while (offset-- && rc)
2021 rc = listening_get_next(seq, rc);
2022 if (rc)
2023 break;
2024 st->bucket = 0;
2025 st->state = TCP_SEQ_STATE_ESTABLISHED;
2026
2027 case TCP_SEQ_STATE_ESTABLISHED:
2028 if (st->bucket > tcp_hashinfo.ehash_mask)
2029 break;
2030 rc = established_get_first(seq);
2031 while (offset-- && rc)
2032 rc = established_get_next(seq, rc);
2033 }
2034
2035 st->num = orig_num;
2036
2037 return rc;
2038}
2039
2040static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2041{
2042 struct tcp_iter_state *st = seq->private;
2043 void *rc;
2044
2045 if (*pos && *pos == st->last_pos) {
2046 rc = tcp_seek_last_pos(seq);
2047 if (rc)
2048 goto out;
2049 }
2050
2051 st->state = TCP_SEQ_STATE_LISTENING;
2052 st->num = 0;
2053 st->bucket = 0;
2054 st->offset = 0;
2055 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2056
2057out:
2058 st->last_pos = *pos;
2059 return rc;
2060}
2061
2062static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2063{
2064 struct tcp_iter_state *st = seq->private;
2065 void *rc = NULL;
2066
2067 if (v == SEQ_START_TOKEN) {
2068 rc = tcp_get_idx(seq, 0);
2069 goto out;
2070 }
2071
2072 switch (st->state) {
2073 case TCP_SEQ_STATE_LISTENING:
2074 rc = listening_get_next(seq, v);
2075 if (!rc) {
2076 st->state = TCP_SEQ_STATE_ESTABLISHED;
2077 st->bucket = 0;
2078 st->offset = 0;
2079 rc = established_get_first(seq);
2080 }
2081 break;
2082 case TCP_SEQ_STATE_ESTABLISHED:
2083 rc = established_get_next(seq, v);
2084 break;
2085 }
2086out:
2087 ++*pos;
2088 st->last_pos = *pos;
2089 return rc;
2090}
2091
2092static void tcp_seq_stop(struct seq_file *seq, void *v)
2093{
2094 struct tcp_iter_state *st = seq->private;
2095
2096 switch (st->state) {
2097 case TCP_SEQ_STATE_LISTENING:
2098 if (v != SEQ_START_TOKEN)
2099 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2100 break;
2101 case TCP_SEQ_STATE_ESTABLISHED:
2102 if (v)
2103 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2104 break;
2105 }
2106}
2107
2108int tcp_seq_open(struct inode *inode, struct file *file)
2109{
2110 struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
2111 struct tcp_iter_state *s;
2112 int err;
2113
2114 err = seq_open_net(inode, file, &afinfo->seq_ops,
2115 sizeof(struct tcp_iter_state));
2116 if (err < 0)
2117 return err;
2118
2119 s = ((struct seq_file *)file->private_data)->private;
2120 s->family = afinfo->family;
2121 s->last_pos = 0;
2122 return 0;
2123}
2124EXPORT_SYMBOL(tcp_seq_open);
2125
2126int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2127{
2128 int rc = 0;
2129 struct proc_dir_entry *p;
2130
2131 afinfo->seq_ops.start = tcp_seq_start;
2132 afinfo->seq_ops.next = tcp_seq_next;
2133 afinfo->seq_ops.stop = tcp_seq_stop;
2134
2135 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2136 afinfo->seq_fops, afinfo);
2137 if (!p)
2138 rc = -ENOMEM;
2139 return rc;
2140}
2141EXPORT_SYMBOL(tcp_proc_register);
2142
2143void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2144{
2145 remove_proc_entry(afinfo->name, net->proc_net);
2146}
2147EXPORT_SYMBOL(tcp_proc_unregister);
2148
2149static void get_openreq4(const struct request_sock *req,
2150 struct seq_file *f, int i)
2151{
2152 const struct inet_request_sock *ireq = inet_rsk(req);
2153 long delta = req->rsk_timer.expires - jiffies;
2154
2155 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2156 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
2157 i,
2158 ireq->ir_loc_addr,
2159 ireq->ir_num,
2160 ireq->ir_rmt_addr,
2161 ntohs(ireq->ir_rmt_port),
2162 TCP_SYN_RECV,
2163 0, 0,
2164 1,
2165 jiffies_delta_to_clock_t(delta),
2166 req->num_timeout,
2167 from_kuid_munged(seq_user_ns(f),
2168 sock_i_uid(req->rsk_listener)),
2169 0,
2170 0,
2171 0,
2172 req);
2173}
2174
2175static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2176{
2177 int timer_active;
2178 unsigned long timer_expires;
2179 const struct tcp_sock *tp = tcp_sk(sk);
2180 const struct inet_connection_sock *icsk = inet_csk(sk);
2181 const struct inet_sock *inet = inet_sk(sk);
2182 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2183 __be32 dest = inet->inet_daddr;
2184 __be32 src = inet->inet_rcv_saddr;
2185 __u16 destp = ntohs(inet->inet_dport);
2186 __u16 srcp = ntohs(inet->inet_sport);
2187 int rx_queue;
2188 int state;
2189
2190 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2191 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2192 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2193 timer_active = 1;
2194 timer_expires = icsk->icsk_timeout;
2195 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2196 timer_active = 4;
2197 timer_expires = icsk->icsk_timeout;
2198 } else if (timer_pending(&sk->sk_timer)) {
2199 timer_active = 2;
2200 timer_expires = sk->sk_timer.expires;
2201 } else {
2202 timer_active = 0;
2203 timer_expires = jiffies;
2204 }
2205
2206 state = sk_state_load(sk);
2207 if (state == TCP_LISTEN)
2208 rx_queue = sk->sk_ack_backlog;
2209 else
2210
2211
2212
2213 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2214
2215 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2216 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
2217 i, src, srcp, dest, destp, state,
2218 tp->write_seq - tp->snd_una,
2219 rx_queue,
2220 timer_active,
2221 jiffies_delta_to_clock_t(timer_expires - jiffies),
2222 icsk->icsk_retransmits,
2223 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
2224 icsk->icsk_probes_out,
2225 sock_i_ino(sk),
2226 atomic_read(&sk->sk_refcnt), sk,
2227 jiffies_to_clock_t(icsk->icsk_rto),
2228 jiffies_to_clock_t(icsk->icsk_ack.ato),
2229 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2230 tp->snd_cwnd,
2231 state == TCP_LISTEN ?
2232 fastopenq->max_qlen :
2233 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
2234}
2235
2236static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2237 struct seq_file *f, int i)
2238{
2239 long delta = tw->tw_timer.expires - jiffies;
2240 __be32 dest, src;
2241 __u16 destp, srcp;
2242
2243 dest = tw->tw_daddr;
2244 src = tw->tw_rcv_saddr;
2245 destp = ntohs(tw->tw_dport);
2246 srcp = ntohs(tw->tw_sport);
2247
2248 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2249 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
2250 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2251 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2252 atomic_read(&tw->tw_refcnt), tw);
2253}
2254
2255#define TMPSZ 150
2256
2257static int tcp4_seq_show(struct seq_file *seq, void *v)
2258{
2259 struct tcp_iter_state *st;
2260 struct sock *sk = v;
2261
2262 seq_setwidth(seq, TMPSZ - 1);
2263 if (v == SEQ_START_TOKEN) {
2264 seq_puts(seq, " sl local_address rem_address st tx_queue "
2265 "rx_queue tr tm->when retrnsmt uid timeout "
2266 "inode");
2267 goto out;
2268 }
2269 st = seq->private;
2270
2271 if (sk->sk_state == TCP_TIME_WAIT)
2272 get_timewait4_sock(v, seq, st->num);
2273 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2274 get_openreq4(v, seq, st->num);
2275 else
2276 get_tcp4_sock(v, seq, st->num);
2277out:
2278 seq_pad(seq, '\n');
2279 return 0;
2280}
2281
2282static const struct file_operations tcp_afinfo_seq_fops = {
2283 .owner = THIS_MODULE,
2284 .open = tcp_seq_open,
2285 .read = seq_read,
2286 .llseek = seq_lseek,
2287 .release = seq_release_net
2288};
2289
2290static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2291 .name = "tcp",
2292 .family = AF_INET,
2293 .seq_fops = &tcp_afinfo_seq_fops,
2294 .seq_ops = {
2295 .show = tcp4_seq_show,
2296 },
2297};
2298
2299static int __net_init tcp4_proc_init_net(struct net *net)
2300{
2301 return tcp_proc_register(net, &tcp4_seq_afinfo);
2302}
2303
2304static void __net_exit tcp4_proc_exit_net(struct net *net)
2305{
2306 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2307}
2308
2309static struct pernet_operations tcp4_net_ops = {
2310 .init = tcp4_proc_init_net,
2311 .exit = tcp4_proc_exit_net,
2312};
2313
2314int __init tcp4_proc_init(void)
2315{
2316 return register_pernet_subsys(&tcp4_net_ops);
2317}
2318
2319void tcp4_proc_exit(void)
2320{
2321 unregister_pernet_subsys(&tcp4_net_ops);
2322}
2323#endif
2324
2325struct proto tcp_prot = {
2326 .name = "TCP",
2327 .owner = THIS_MODULE,
2328 .close = tcp_close,
2329 .connect = tcp_v4_connect,
2330 .disconnect = tcp_disconnect,
2331 .accept = inet_csk_accept,
2332 .ioctl = tcp_ioctl,
2333 .init = tcp_v4_init_sock,
2334 .destroy = tcp_v4_destroy_sock,
2335 .shutdown = tcp_shutdown,
2336 .setsockopt = tcp_setsockopt,
2337 .getsockopt = tcp_getsockopt,
2338 .recvmsg = tcp_recvmsg,
2339 .sendmsg = tcp_sendmsg,
2340 .sendpage = tcp_sendpage,
2341 .backlog_rcv = tcp_v4_do_rcv,
2342 .release_cb = tcp_release_cb,
2343 .hash = inet_hash,
2344 .unhash = inet_unhash,
2345 .get_port = inet_csk_get_port,
2346 .enter_memory_pressure = tcp_enter_memory_pressure,
2347 .stream_memory_free = tcp_stream_memory_free,
2348 .sockets_allocated = &tcp_sockets_allocated,
2349 .orphan_count = &tcp_orphan_count,
2350 .memory_allocated = &tcp_memory_allocated,
2351 .memory_pressure = &tcp_memory_pressure,
2352 .sysctl_mem = sysctl_tcp_mem,
2353 .sysctl_wmem = sysctl_tcp_wmem,
2354 .sysctl_rmem = sysctl_tcp_rmem,
2355 .max_header = MAX_TCP_HEADER,
2356 .obj_size = sizeof(struct tcp_sock),
2357 .slab_flags = SLAB_DESTROY_BY_RCU,
2358 .twsk_prot = &tcp_timewait_sock_ops,
2359 .rsk_prot = &tcp_request_sock_ops,
2360 .h.hashinfo = &tcp_hashinfo,
2361 .no_autobind = true,
2362#ifdef CONFIG_COMPAT
2363 .compat_setsockopt = compat_tcp_setsockopt,
2364 .compat_getsockopt = compat_tcp_getsockopt,
2365#endif
2366 .diag_destroy = tcp_abort,
2367};
2368EXPORT_SYMBOL(tcp_prot);
2369
2370static void __net_exit tcp_sk_exit(struct net *net)
2371{
2372 int cpu;
2373
2374 for_each_possible_cpu(cpu)
2375 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2376 free_percpu(net->ipv4.tcp_sk);
2377}
2378
2379static int __net_init tcp_sk_init(struct net *net)
2380{
2381 int res, cpu;
2382
2383 net->ipv4.tcp_sk = alloc_percpu(struct sock *);
2384 if (!net->ipv4.tcp_sk)
2385 return -ENOMEM;
2386
2387 for_each_possible_cpu(cpu) {
2388 struct sock *sk;
2389
2390 res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
2391 IPPROTO_TCP, net);
2392 if (res)
2393 goto fail;
2394 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2395 *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2396 }
2397
2398 net->ipv4.sysctl_tcp_ecn = 2;
2399 net->ipv4.sysctl_tcp_ecn_fallback = 1;
2400
2401 net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
2402 net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
2403 net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
2404
2405 net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
2406 net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
2407 net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
2408
2409 net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
2410 net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
2411 net->ipv4.sysctl_tcp_syncookies = 1;
2412 net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
2413 net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
2414 net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
2415 net->ipv4.sysctl_tcp_orphan_retries = 0;
2416 net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
2417 net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
2418
2419 return 0;
2420fail:
2421 tcp_sk_exit(net);
2422
2423 return res;
2424}
2425
2426static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2427{
2428 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2429}
2430
2431static struct pernet_operations __net_initdata tcp_sk_ops = {
2432 .init = tcp_sk_init,
2433 .exit = tcp_sk_exit,
2434 .exit_batch = tcp_sk_exit_batch,
2435};
2436
2437void __init tcp_v4_init(void)
2438{
2439 inet_hashinfo_init(&tcp_hashinfo);
2440 if (register_pernet_subsys(&tcp_sk_ops))
2441 panic("Failed to create the TCP control socket.\n");
2442}
2443