1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54#include <linux/bottom_half.h>
55#include <linux/types.h>
56#include <linux/fcntl.h>
57#include <linux/module.h>
58#include <linux/random.h>
59#include <linux/cache.h>
60#include <linux/jhash.h>
61#include <linux/init.h>
62#include <linux/times.h>
63#include <linux/slab.h>
64
65#include <net/net_namespace.h>
66#include <net/icmp.h>
67#include <net/inet_hashtables.h>
68#include <net/tcp.h>
69#include <net/transp_v6.h>
70#include <net/ipv6.h>
71#include <net/inet_common.h>
72#include <net/timewait_sock.h>
73#include <net/xfrm.h>
74#include <net/netdma.h>
75
76#include <linux/inet.h>
77#include <linux/ipv6.h>
78#include <linux/stddef.h>
79#include <linux/proc_fs.h>
80#include <linux/seq_file.h>
81
82#include <linux/crypto.h>
83#include <linux/scatterlist.h>
84
85int sysctl_tcp_tw_reuse __read_mostly;
86int sysctl_tcp_low_latency __read_mostly;
87EXPORT_SYMBOL(sysctl_tcp_low_latency);
88
89
90#ifdef CONFIG_TCP_MD5SIG
91static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
92 __be32 addr);
93static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
94 __be32 daddr, __be32 saddr, struct tcphdr *th);
95#else
96static inline
97struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
98{
99 return NULL;
100}
101#endif
102
103struct inet_hashinfo tcp_hashinfo;
104EXPORT_SYMBOL(tcp_hashinfo);
105
106static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
107{
108 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
109 ip_hdr(skb)->saddr,
110 tcp_hdr(skb)->dest,
111 tcp_hdr(skb)->source);
112}
113
114int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
115{
116 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
117 struct tcp_sock *tp = tcp_sk(sk);
118
119
120
121
122
123
124
125
126
127
128
129
130 if (tcptw->tw_ts_recent_stamp &&
131 (twp == NULL || (sysctl_tcp_tw_reuse &&
132 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
133 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
134 if (tp->write_seq == 0)
135 tp->write_seq = 1;
136 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
137 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
138 sock_hold(sktw);
139 return 1;
140 }
141
142 return 0;
143}
144EXPORT_SYMBOL_GPL(tcp_twsk_unique);
145
146
147int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
148{
149 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
150 struct inet_sock *inet = inet_sk(sk);
151 struct tcp_sock *tp = tcp_sk(sk);
152 __be16 orig_sport, orig_dport;
153 __be32 daddr, nexthop;
154 struct flowi4 *fl4;
155 struct rtable *rt;
156 int err;
157 struct ip_options_rcu *inet_opt;
158
159 if (addr_len < sizeof(struct sockaddr_in))
160 return -EINVAL;
161
162 if (usin->sin_family != AF_INET)
163 return -EAFNOSUPPORT;
164
165 nexthop = daddr = usin->sin_addr.s_addr;
166 inet_opt = rcu_dereference_protected(inet->inet_opt,
167 sock_owned_by_user(sk));
168 if (inet_opt && inet_opt->opt.srr) {
169 if (!daddr)
170 return -EINVAL;
171 nexthop = inet_opt->opt.faddr;
172 }
173
174 orig_sport = inet->inet_sport;
175 orig_dport = usin->sin_port;
176 fl4 = &inet->cork.fl.u.ip4;
177 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
178 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
179 IPPROTO_TCP,
180 orig_sport, orig_dport, sk, true);
181 if (IS_ERR(rt)) {
182 err = PTR_ERR(rt);
183 if (err == -ENETUNREACH)
184 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
185 return err;
186 }
187
188 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
189 ip_rt_put(rt);
190 return -ENETUNREACH;
191 }
192
193 if (!inet_opt || !inet_opt->opt.srr)
194 daddr = fl4->daddr;
195
196 if (!inet->inet_saddr)
197 inet->inet_saddr = fl4->saddr;
198 inet->inet_rcv_saddr = inet->inet_saddr;
199
200 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
201
202 tp->rx_opt.ts_recent = 0;
203 tp->rx_opt.ts_recent_stamp = 0;
204 tp->write_seq = 0;
205 }
206
207 if (tcp_death_row.sysctl_tw_recycle &&
208 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
209 struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
210
211
212
213
214
215
216 if (peer) {
217 inet_peer_refcheck(peer);
218 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
219 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
220 tp->rx_opt.ts_recent = peer->tcp_ts;
221 }
222 }
223 }
224
225 inet->inet_dport = usin->sin_port;
226 inet->inet_daddr = daddr;
227
228 inet_csk(sk)->icsk_ext_hdr_len = 0;
229 if (inet_opt)
230 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
231
232 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
233
234
235
236
237
238
239 tcp_set_state(sk, TCP_SYN_SENT);
240 err = inet_hash_connect(&tcp_death_row, sk);
241 if (err)
242 goto failure;
243
244 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
245 inet->inet_sport, inet->inet_dport, sk);
246 if (IS_ERR(rt)) {
247 err = PTR_ERR(rt);
248 rt = NULL;
249 goto failure;
250 }
251
252 sk->sk_gso_type = SKB_GSO_TCPV4;
253 sk_setup_caps(sk, &rt->dst);
254
255 if (!tp->write_seq)
256 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
257 inet->inet_daddr,
258 inet->inet_sport,
259 usin->sin_port);
260
261 inet->inet_id = tp->write_seq ^ jiffies;
262
263 err = tcp_connect(sk);
264 rt = NULL;
265 if (err)
266 goto failure;
267
268 return 0;
269
270failure:
271
272
273
274
275 tcp_set_state(sk, TCP_CLOSE);
276 ip_rt_put(rt);
277 sk->sk_route_caps = 0;
278 inet->inet_dport = 0;
279 return err;
280}
281EXPORT_SYMBOL(tcp_v4_connect);
282
283
284
285
286static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
287{
288 struct dst_entry *dst;
289 struct inet_sock *inet = inet_sk(sk);
290
291
292
293
294
295 if (sk->sk_state == TCP_LISTEN)
296 return;
297
298
299
300
301
302
303
304 if ((dst = __sk_dst_check(sk, 0)) == NULL)
305 return;
306
307 dst->ops->update_pmtu(dst, mtu);
308
309
310
311
312 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
313 sk->sk_err_soft = EMSGSIZE;
314
315 mtu = dst_mtu(dst);
316
317 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
318 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
319 tcp_sync_mss(sk, mtu);
320
321
322
323
324
325
326 tcp_simple_retransmit(sk);
327 }
328}
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
347{
348 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
349 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
350 struct inet_connection_sock *icsk;
351 struct tcp_sock *tp;
352 struct inet_sock *inet;
353 const int type = icmp_hdr(icmp_skb)->type;
354 const int code = icmp_hdr(icmp_skb)->code;
355 struct sock *sk;
356 struct sk_buff *skb;
357 __u32 seq;
358 __u32 remaining;
359 int err;
360 struct net *net = dev_net(icmp_skb->dev);
361
362 if (icmp_skb->len < (iph->ihl << 2) + 8) {
363 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
364 return;
365 }
366
367 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
368 iph->saddr, th->source, inet_iif(icmp_skb));
369 if (!sk) {
370 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
371 return;
372 }
373 if (sk->sk_state == TCP_TIME_WAIT) {
374 inet_twsk_put(inet_twsk(sk));
375 return;
376 }
377
378 bh_lock_sock(sk);
379
380
381
382 if (sock_owned_by_user(sk))
383 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
384
385 if (sk->sk_state == TCP_CLOSE)
386 goto out;
387
388 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
389 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
390 goto out;
391 }
392
393 icsk = inet_csk(sk);
394 tp = tcp_sk(sk);
395 seq = ntohl(th->seq);
396 if (sk->sk_state != TCP_LISTEN &&
397 !between(seq, tp->snd_una, tp->snd_nxt)) {
398 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
399 goto out;
400 }
401
402 switch (type) {
403 case ICMP_SOURCE_QUENCH:
404
405 goto out;
406 case ICMP_PARAMETERPROB:
407 err = EPROTO;
408 break;
409 case ICMP_DEST_UNREACH:
410 if (code > NR_ICMP_UNREACH)
411 goto out;
412
413 if (code == ICMP_FRAG_NEEDED) {
414 if (!sock_owned_by_user(sk))
415 do_pmtu_discovery(sk, iph, info);
416 goto out;
417 }
418
419 err = icmp_err_convert[code].errno;
420
421
422 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
423 break;
424 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
425 !icsk->icsk_backoff)
426 break;
427
428 if (sock_owned_by_user(sk))
429 break;
430
431 icsk->icsk_backoff--;
432 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
433 icsk->icsk_backoff;
434 tcp_bound_rto(sk);
435
436 skb = tcp_write_queue_head(sk);
437 BUG_ON(!skb);
438
439 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
440 tcp_time_stamp - TCP_SKB_CB(skb)->when);
441
442 if (remaining) {
443 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
444 remaining, TCP_RTO_MAX);
445 } else {
446
447
448 tcp_retransmit_timer(sk);
449 }
450
451 break;
452 case ICMP_TIME_EXCEEDED:
453 err = EHOSTUNREACH;
454 break;
455 default:
456 goto out;
457 }
458
459 switch (sk->sk_state) {
460 struct request_sock *req, **prev;
461 case TCP_LISTEN:
462 if (sock_owned_by_user(sk))
463 goto out;
464
465 req = inet_csk_search_req(sk, &prev, th->dest,
466 iph->daddr, iph->saddr);
467 if (!req)
468 goto out;
469
470
471
472
473 WARN_ON(req->sk);
474
475 if (seq != tcp_rsk(req)->snt_isn) {
476 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
477 goto out;
478 }
479
480
481
482
483
484
485
486 inet_csk_reqsk_queue_drop(sk, req, prev);
487 goto out;
488
489 case TCP_SYN_SENT:
490 case TCP_SYN_RECV:
491
492
493 if (!sock_owned_by_user(sk)) {
494 sk->sk_err = err;
495
496 sk->sk_error_report(sk);
497
498 tcp_done(sk);
499 } else {
500 sk->sk_err_soft = err;
501 }
502 goto out;
503 }
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521 inet = inet_sk(sk);
522 if (!sock_owned_by_user(sk) && inet->recverr) {
523 sk->sk_err = err;
524 sk->sk_error_report(sk);
525 } else {
526 sk->sk_err_soft = err;
527 }
528
529out:
530 bh_unlock_sock(sk);
531 sock_put(sk);
532}
533
534static void __tcp_v4_send_check(struct sk_buff *skb,
535 __be32 saddr, __be32 daddr)
536{
537 struct tcphdr *th = tcp_hdr(skb);
538
539 if (skb->ip_summed == CHECKSUM_PARTIAL) {
540 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
541 skb->csum_start = skb_transport_header(skb) - skb->head;
542 skb->csum_offset = offsetof(struct tcphdr, check);
543 } else {
544 th->check = tcp_v4_check(skb->len, saddr, daddr,
545 csum_partial(th,
546 th->doff << 2,
547 skb->csum));
548 }
549}
550
551
552void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
553{
554 struct inet_sock *inet = inet_sk(sk);
555
556 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
557}
558EXPORT_SYMBOL(tcp_v4_send_check);
559
560int tcp_v4_gso_send_check(struct sk_buff *skb)
561{
562 const struct iphdr *iph;
563 struct tcphdr *th;
564
565 if (!pskb_may_pull(skb, sizeof(*th)))
566 return -EINVAL;
567
568 iph = ip_hdr(skb);
569 th = tcp_hdr(skb);
570
571 th->check = 0;
572 skb->ip_summed = CHECKSUM_PARTIAL;
573 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
574 return 0;
575}
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
591{
592 struct tcphdr *th = tcp_hdr(skb);
593 struct {
594 struct tcphdr th;
595#ifdef CONFIG_TCP_MD5SIG
596 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
597#endif
598 } rep;
599 struct ip_reply_arg arg;
600#ifdef CONFIG_TCP_MD5SIG
601 struct tcp_md5sig_key *key;
602#endif
603 struct net *net;
604
605
606 if (th->rst)
607 return;
608
609 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
610 return;
611
612
613 memset(&rep, 0, sizeof(rep));
614 rep.th.dest = th->source;
615 rep.th.source = th->dest;
616 rep.th.doff = sizeof(struct tcphdr) / 4;
617 rep.th.rst = 1;
618
619 if (th->ack) {
620 rep.th.seq = th->ack_seq;
621 } else {
622 rep.th.ack = 1;
623 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
624 skb->len - (th->doff << 2));
625 }
626
627 memset(&arg, 0, sizeof(arg));
628 arg.iov[0].iov_base = (unsigned char *)&rep;
629 arg.iov[0].iov_len = sizeof(rep.th);
630
631#ifdef CONFIG_TCP_MD5SIG
632 key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
633 if (key) {
634 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
635 (TCPOPT_NOP << 16) |
636 (TCPOPT_MD5SIG << 8) |
637 TCPOLEN_MD5SIG);
638
639 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
640 rep.th.doff = arg.iov[0].iov_len / 4;
641
642 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
643 key, ip_hdr(skb)->saddr,
644 ip_hdr(skb)->daddr, &rep.th);
645 }
646#endif
647 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
648 ip_hdr(skb)->saddr,
649 arg.iov[0].iov_len, IPPROTO_TCP, 0);
650 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
651 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
652
653 net = dev_net(skb_dst(skb)->dev);
654 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
655 &arg, arg.iov[0].iov_len);
656
657 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
658 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
659}
660
661
662
663
664
665static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
666 u32 win, u32 ts, int oif,
667 struct tcp_md5sig_key *key,
668 int reply_flags)
669{
670 struct tcphdr *th = tcp_hdr(skb);
671 struct {
672 struct tcphdr th;
673 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
674#ifdef CONFIG_TCP_MD5SIG
675 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
676#endif
677 ];
678 } rep;
679 struct ip_reply_arg arg;
680 struct net *net = dev_net(skb_dst(skb)->dev);
681
682 memset(&rep.th, 0, sizeof(struct tcphdr));
683 memset(&arg, 0, sizeof(arg));
684
685 arg.iov[0].iov_base = (unsigned char *)&rep;
686 arg.iov[0].iov_len = sizeof(rep.th);
687 if (ts) {
688 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
689 (TCPOPT_TIMESTAMP << 8) |
690 TCPOLEN_TIMESTAMP);
691 rep.opt[1] = htonl(tcp_time_stamp);
692 rep.opt[2] = htonl(ts);
693 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
694 }
695
696
697 rep.th.dest = th->source;
698 rep.th.source = th->dest;
699 rep.th.doff = arg.iov[0].iov_len / 4;
700 rep.th.seq = htonl(seq);
701 rep.th.ack_seq = htonl(ack);
702 rep.th.ack = 1;
703 rep.th.window = htons(win);
704
705#ifdef CONFIG_TCP_MD5SIG
706 if (key) {
707 int offset = (ts) ? 3 : 0;
708
709 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
710 (TCPOPT_NOP << 16) |
711 (TCPOPT_MD5SIG << 8) |
712 TCPOLEN_MD5SIG);
713 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
714 rep.th.doff = arg.iov[0].iov_len/4;
715
716 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
717 key, ip_hdr(skb)->saddr,
718 ip_hdr(skb)->daddr, &rep.th);
719 }
720#endif
721 arg.flags = reply_flags;
722 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
723 ip_hdr(skb)->saddr,
724 arg.iov[0].iov_len, IPPROTO_TCP, 0);
725 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
726 if (oif)
727 arg.bound_dev_if = oif;
728
729 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
730 &arg, arg.iov[0].iov_len);
731
732 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
733}
734
735static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
736{
737 struct inet_timewait_sock *tw = inet_twsk(sk);
738 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
739
740 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
741 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
742 tcptw->tw_ts_recent,
743 tw->tw_bound_dev_if,
744 tcp_twsk_md5_key(tcptw),
745 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
746 );
747
748 inet_twsk_put(tw);
749}
750
751static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
752 struct request_sock *req)
753{
754 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
755 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
756 req->ts_recent,
757 0,
758 tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
759 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
760}
761
762
763
764
765
766
767static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
768 struct request_sock *req,
769 struct request_values *rvp)
770{
771 const struct inet_request_sock *ireq = inet_rsk(req);
772 struct flowi4 fl4;
773 int err = -1;
774 struct sk_buff * skb;
775
776
777 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
778 return -1;
779
780 skb = tcp_make_synack(sk, dst, req, rvp);
781
782 if (skb) {
783 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
784
785 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
786 ireq->rmt_addr,
787 ireq->opt);
788 err = net_xmit_eval(err);
789 }
790
791 dst_release(dst);
792 return err;
793}
794
795static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
796 struct request_values *rvp)
797{
798 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
799 return tcp_v4_send_synack(sk, NULL, req, rvp);
800}
801
802
803
804
805static void tcp_v4_reqsk_destructor(struct request_sock *req)
806{
807 kfree(inet_rsk(req)->opt);
808}
809
810static void syn_flood_warning(const struct sk_buff *skb)
811{
812 const char *msg;
813
814#ifdef CONFIG_SYN_COOKIES
815 if (sysctl_tcp_syncookies)
816 msg = "Sending cookies";
817 else
818#endif
819 msg = "Dropping request";
820
821 pr_info("TCP: Possible SYN flooding on port %d. %s.\n",
822 ntohs(tcp_hdr(skb)->dest), msg);
823}
824
825
826
827
828static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
829 struct sk_buff *skb)
830{
831 const struct ip_options *opt = &(IPCB(skb)->opt);
832 struct ip_options_rcu *dopt = NULL;
833
834 if (opt && opt->optlen) {
835 int opt_size = sizeof(*dopt) + opt->optlen;
836
837 dopt = kmalloc(opt_size, GFP_ATOMIC);
838 if (dopt) {
839 if (ip_options_echo(&dopt->opt, skb)) {
840 kfree(dopt);
841 dopt = NULL;
842 }
843 }
844 }
845 return dopt;
846}
847
848#ifdef CONFIG_TCP_MD5SIG
849
850
851
852
853
854
855
856static struct tcp_md5sig_key *
857 tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
858{
859 struct tcp_sock *tp = tcp_sk(sk);
860 int i;
861
862 if (!tp->md5sig_info || !tp->md5sig_info->entries4)
863 return NULL;
864 for (i = 0; i < tp->md5sig_info->entries4; i++) {
865 if (tp->md5sig_info->keys4[i].addr == addr)
866 return &tp->md5sig_info->keys4[i].base;
867 }
868 return NULL;
869}
870
871struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
872 struct sock *addr_sk)
873{
874 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
875}
876EXPORT_SYMBOL(tcp_v4_md5_lookup);
877
878static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
879 struct request_sock *req)
880{
881 return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
882}
883
884
885int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
886 u8 *newkey, u8 newkeylen)
887{
888
889 struct tcp_md5sig_key *key;
890 struct tcp_sock *tp = tcp_sk(sk);
891 struct tcp4_md5sig_key *keys;
892
893 key = tcp_v4_md5_do_lookup(sk, addr);
894 if (key) {
895
896 kfree(key->key);
897 key->key = newkey;
898 key->keylen = newkeylen;
899 } else {
900 struct tcp_md5sig_info *md5sig;
901
902 if (!tp->md5sig_info) {
903 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
904 GFP_ATOMIC);
905 if (!tp->md5sig_info) {
906 kfree(newkey);
907 return -ENOMEM;
908 }
909 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
910 }
911 if (tcp_alloc_md5sig_pool(sk) == NULL) {
912 kfree(newkey);
913 return -ENOMEM;
914 }
915 md5sig = tp->md5sig_info;
916
917 if (md5sig->alloced4 == md5sig->entries4) {
918 keys = kmalloc((sizeof(*keys) *
919 (md5sig->entries4 + 1)), GFP_ATOMIC);
920 if (!keys) {
921 kfree(newkey);
922 tcp_free_md5sig_pool();
923 return -ENOMEM;
924 }
925
926 if (md5sig->entries4)
927 memcpy(keys, md5sig->keys4,
928 sizeof(*keys) * md5sig->entries4);
929
930
931 kfree(md5sig->keys4);
932 md5sig->keys4 = keys;
933 md5sig->alloced4++;
934 }
935 md5sig->entries4++;
936 md5sig->keys4[md5sig->entries4 - 1].addr = addr;
937 md5sig->keys4[md5sig->entries4 - 1].base.key = newkey;
938 md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
939 }
940 return 0;
941}
942EXPORT_SYMBOL(tcp_v4_md5_do_add);
943
944static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
945 u8 *newkey, u8 newkeylen)
946{
947 return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,
948 newkey, newkeylen);
949}
950
951int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
952{
953 struct tcp_sock *tp = tcp_sk(sk);
954 int i;
955
956 for (i = 0; i < tp->md5sig_info->entries4; i++) {
957 if (tp->md5sig_info->keys4[i].addr == addr) {
958
959 kfree(tp->md5sig_info->keys4[i].base.key);
960 tp->md5sig_info->entries4--;
961
962 if (tp->md5sig_info->entries4 == 0) {
963 kfree(tp->md5sig_info->keys4);
964 tp->md5sig_info->keys4 = NULL;
965 tp->md5sig_info->alloced4 = 0;
966 } else if (tp->md5sig_info->entries4 != i) {
967
968 memmove(&tp->md5sig_info->keys4[i],
969 &tp->md5sig_info->keys4[i+1],
970 (tp->md5sig_info->entries4 - i) *
971 sizeof(struct tcp4_md5sig_key));
972 }
973 tcp_free_md5sig_pool();
974 return 0;
975 }
976 }
977 return -ENOENT;
978}
979EXPORT_SYMBOL(tcp_v4_md5_do_del);
980
981static void tcp_v4_clear_md5_list(struct sock *sk)
982{
983 struct tcp_sock *tp = tcp_sk(sk);
984
985
986
987
988
989 if (tp->md5sig_info->entries4) {
990 int i;
991 for (i = 0; i < tp->md5sig_info->entries4; i++)
992 kfree(tp->md5sig_info->keys4[i].base.key);
993 tp->md5sig_info->entries4 = 0;
994 tcp_free_md5sig_pool();
995 }
996 if (tp->md5sig_info->keys4) {
997 kfree(tp->md5sig_info->keys4);
998 tp->md5sig_info->keys4 = NULL;
999 tp->md5sig_info->alloced4 = 0;
1000 }
1001}
1002
1003static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1004 int optlen)
1005{
1006 struct tcp_md5sig cmd;
1007 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1008 u8 *newkey;
1009
1010 if (optlen < sizeof(cmd))
1011 return -EINVAL;
1012
1013 if (copy_from_user(&cmd, optval, sizeof(cmd)))
1014 return -EFAULT;
1015
1016 if (sin->sin_family != AF_INET)
1017 return -EINVAL;
1018
1019 if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
1020 if (!tcp_sk(sk)->md5sig_info)
1021 return -ENOENT;
1022 return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
1023 }
1024
1025 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1026 return -EINVAL;
1027
1028 if (!tcp_sk(sk)->md5sig_info) {
1029 struct tcp_sock *tp = tcp_sk(sk);
1030 struct tcp_md5sig_info *p;
1031
1032 p = kzalloc(sizeof(*p), sk->sk_allocation);
1033 if (!p)
1034 return -EINVAL;
1035
1036 tp->md5sig_info = p;
1037 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1038 }
1039
1040 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
1041 if (!newkey)
1042 return -ENOMEM;
1043 return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1044 newkey, cmd.tcpm_keylen);
1045}
1046
1047static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1048 __be32 daddr, __be32 saddr, int nbytes)
1049{
1050 struct tcp4_pseudohdr *bp;
1051 struct scatterlist sg;
1052
1053 bp = &hp->md5_blk.ip4;
1054
1055
1056
1057
1058
1059
1060 bp->saddr = saddr;
1061 bp->daddr = daddr;
1062 bp->pad = 0;
1063 bp->protocol = IPPROTO_TCP;
1064 bp->len = cpu_to_be16(nbytes);
1065
1066 sg_init_one(&sg, bp, sizeof(*bp));
1067 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1068}
1069
1070static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1071 __be32 daddr, __be32 saddr, struct tcphdr *th)
1072{
1073 struct tcp_md5sig_pool *hp;
1074 struct hash_desc *desc;
1075
1076 hp = tcp_get_md5sig_pool();
1077 if (!hp)
1078 goto clear_hash_noput;
1079 desc = &hp->md5_desc;
1080
1081 if (crypto_hash_init(desc))
1082 goto clear_hash;
1083 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1084 goto clear_hash;
1085 if (tcp_md5_hash_header(hp, th))
1086 goto clear_hash;
1087 if (tcp_md5_hash_key(hp, key))
1088 goto clear_hash;
1089 if (crypto_hash_final(desc, md5_hash))
1090 goto clear_hash;
1091
1092 tcp_put_md5sig_pool();
1093 return 0;
1094
1095clear_hash:
1096 tcp_put_md5sig_pool();
1097clear_hash_noput:
1098 memset(md5_hash, 0, 16);
1099 return 1;
1100}
1101
1102int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1103 struct sock *sk, struct request_sock *req,
1104 struct sk_buff *skb)
1105{
1106 struct tcp_md5sig_pool *hp;
1107 struct hash_desc *desc;
1108 struct tcphdr *th = tcp_hdr(skb);
1109 __be32 saddr, daddr;
1110
1111 if (sk) {
1112 saddr = inet_sk(sk)->inet_saddr;
1113 daddr = inet_sk(sk)->inet_daddr;
1114 } else if (req) {
1115 saddr = inet_rsk(req)->loc_addr;
1116 daddr = inet_rsk(req)->rmt_addr;
1117 } else {
1118 const struct iphdr *iph = ip_hdr(skb);
1119 saddr = iph->saddr;
1120 daddr = iph->daddr;
1121 }
1122
1123 hp = tcp_get_md5sig_pool();
1124 if (!hp)
1125 goto clear_hash_noput;
1126 desc = &hp->md5_desc;
1127
1128 if (crypto_hash_init(desc))
1129 goto clear_hash;
1130
1131 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1132 goto clear_hash;
1133 if (tcp_md5_hash_header(hp, th))
1134 goto clear_hash;
1135 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1136 goto clear_hash;
1137 if (tcp_md5_hash_key(hp, key))
1138 goto clear_hash;
1139 if (crypto_hash_final(desc, md5_hash))
1140 goto clear_hash;
1141
1142 tcp_put_md5sig_pool();
1143 return 0;
1144
1145clear_hash:
1146 tcp_put_md5sig_pool();
1147clear_hash_noput:
1148 memset(md5_hash, 0, 16);
1149 return 1;
1150}
1151EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1152
1153static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
1154{
1155
1156
1157
1158
1159
1160
1161
1162
1163 __u8 *hash_location = NULL;
1164 struct tcp_md5sig_key *hash_expected;
1165 const struct iphdr *iph = ip_hdr(skb);
1166 struct tcphdr *th = tcp_hdr(skb);
1167 int genhash;
1168 unsigned char newhash[16];
1169
1170 hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1171 hash_location = tcp_parse_md5sig_option(th);
1172
1173
1174 if (!hash_expected && !hash_location)
1175 return 0;
1176
1177 if (hash_expected && !hash_location) {
1178 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1179 return 1;
1180 }
1181
1182 if (!hash_expected && hash_location) {
1183 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1184 return 1;
1185 }
1186
1187
1188
1189
1190 genhash = tcp_v4_md5_hash_skb(newhash,
1191 hash_expected,
1192 NULL, NULL, skb);
1193
1194 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1195 if (net_ratelimit()) {
1196 printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1197 &iph->saddr, ntohs(th->source),
1198 &iph->daddr, ntohs(th->dest),
1199 genhash ? " tcp_v4_calc_md5_hash failed" : "");
1200 }
1201 return 1;
1202 }
1203 return 0;
1204}
1205
1206#endif
1207
1208struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1209 .family = PF_INET,
1210 .obj_size = sizeof(struct tcp_request_sock),
1211 .rtx_syn_ack = tcp_v4_rtx_synack,
1212 .send_ack = tcp_v4_reqsk_send_ack,
1213 .destructor = tcp_v4_reqsk_destructor,
1214 .send_reset = tcp_v4_send_reset,
1215 .syn_ack_timeout = tcp_syn_ack_timeout,
1216};
1217
1218#ifdef CONFIG_TCP_MD5SIG
1219static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1220 .md5_lookup = tcp_v4_reqsk_md5_lookup,
1221 .calc_md5_hash = tcp_v4_md5_hash_skb,
1222};
1223#endif
1224
1225int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1226{
1227 struct tcp_extend_values tmp_ext;
1228 struct tcp_options_received tmp_opt;
1229 u8 *hash_location;
1230 struct request_sock *req;
1231 struct inet_request_sock *ireq;
1232 struct tcp_sock *tp = tcp_sk(sk);
1233 struct dst_entry *dst = NULL;
1234 __be32 saddr = ip_hdr(skb)->saddr;
1235 __be32 daddr = ip_hdr(skb)->daddr;
1236 __u32 isn = TCP_SKB_CB(skb)->when;
1237#ifdef CONFIG_SYN_COOKIES
1238 int want_cookie = 0;
1239#else
1240#define want_cookie 0
1241#endif
1242
1243
1244 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1245 goto drop;
1246
1247
1248
1249
1250
1251 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1252 if (net_ratelimit())
1253 syn_flood_warning(skb);
1254#ifdef CONFIG_SYN_COOKIES
1255 if (sysctl_tcp_syncookies) {
1256 want_cookie = 1;
1257 } else
1258#endif
1259 goto drop;
1260 }
1261
1262
1263
1264
1265
1266
1267 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1268 goto drop;
1269
1270 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1271 if (!req)
1272 goto drop;
1273
1274#ifdef CONFIG_TCP_MD5SIG
1275 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1276#endif
1277
1278 tcp_clear_options(&tmp_opt);
1279 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1280 tmp_opt.user_mss = tp->rx_opt.user_mss;
1281 tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
1282
1283 if (tmp_opt.cookie_plus > 0 &&
1284 tmp_opt.saw_tstamp &&
1285 !tp->rx_opt.cookie_out_never &&
1286 (sysctl_tcp_cookie_size > 0 ||
1287 (tp->cookie_values != NULL &&
1288 tp->cookie_values->cookie_desired > 0))) {
1289 u8 *c;
1290 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1291 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1292
1293 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1294 goto drop_and_release;
1295
1296
1297 *mess++ ^= (__force u32)daddr;
1298 *mess++ ^= (__force u32)saddr;
1299
1300
1301 c = (u8 *)mess;
1302 while (l-- > 0)
1303 *c++ ^= *hash_location++;
1304
1305#ifdef CONFIG_SYN_COOKIES
1306 want_cookie = 0;
1307#endif
1308 tmp_ext.cookie_out_never = 0;
1309 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1310 } else if (!tp->rx_opt.cookie_in_always) {
1311
1312 tmp_ext.cookie_out_never = 1;
1313 tmp_ext.cookie_plus = 0;
1314 } else {
1315 goto drop_and_release;
1316 }
1317 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1318
1319 if (want_cookie && !tmp_opt.saw_tstamp)
1320 tcp_clear_options(&tmp_opt);
1321
1322 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1323 tcp_openreq_init(req, &tmp_opt, skb);
1324
1325 ireq = inet_rsk(req);
1326 ireq->loc_addr = daddr;
1327 ireq->rmt_addr = saddr;
1328 ireq->no_srccheck = inet_sk(sk)->transparent;
1329 ireq->opt = tcp_v4_save_options(sk, skb);
1330
1331 if (security_inet_conn_request(sk, skb, req))
1332 goto drop_and_free;
1333
1334 if (!want_cookie || tmp_opt.tstamp_ok)
1335 TCP_ECN_create_request(req, tcp_hdr(skb));
1336
1337 if (want_cookie) {
1338 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1339 req->cookie_ts = tmp_opt.tstamp_ok;
1340 } else if (!isn) {
1341 struct inet_peer *peer = NULL;
1342 struct flowi4 fl4;
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353 if (tmp_opt.saw_tstamp &&
1354 tcp_death_row.sysctl_tw_recycle &&
1355 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1356 fl4.daddr == saddr &&
1357 (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
1358 inet_peer_refcheck(peer);
1359 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1360 (s32)(peer->tcp_ts - req->ts_recent) >
1361 TCP_PAWS_WINDOW) {
1362 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1363 goto drop_and_release;
1364 }
1365 }
1366
1367 else if (!sysctl_tcp_syncookies &&
1368 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1369 (sysctl_max_syn_backlog >> 2)) &&
1370 (!peer || !peer->tcp_ts_stamp) &&
1371 (!dst || !dst_metric(dst, RTAX_RTT))) {
1372
1373
1374
1375
1376
1377
1378
1379 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1380 &saddr, ntohs(tcp_hdr(skb)->source));
1381 goto drop_and_release;
1382 }
1383
1384 isn = tcp_v4_init_sequence(skb);
1385 }
1386 tcp_rsk(req)->snt_isn = isn;
1387
1388 if (tcp_v4_send_synack(sk, dst, req,
1389 (struct request_values *)&tmp_ext) ||
1390 want_cookie)
1391 goto drop_and_free;
1392
1393 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1394 return 0;
1395
1396drop_and_release:
1397 dst_release(dst);
1398drop_and_free:
1399 reqsk_free(req);
1400drop:
1401 return 0;
1402}
1403EXPORT_SYMBOL(tcp_v4_conn_request);
1404
1405
1406
1407
1408
1409
1410struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1411 struct request_sock *req,
1412 struct dst_entry *dst)
1413{
1414 struct inet_request_sock *ireq;
1415 struct inet_sock *newinet;
1416 struct tcp_sock *newtp;
1417 struct sock *newsk;
1418#ifdef CONFIG_TCP_MD5SIG
1419 struct tcp_md5sig_key *key;
1420#endif
1421 struct ip_options_rcu *inet_opt;
1422
1423 if (sk_acceptq_is_full(sk))
1424 goto exit_overflow;
1425
1426 newsk = tcp_create_openreq_child(sk, req, skb);
1427 if (!newsk)
1428 goto exit_nonewsk;
1429
1430 newsk->sk_gso_type = SKB_GSO_TCPV4;
1431
1432 newtp = tcp_sk(newsk);
1433 newinet = inet_sk(newsk);
1434 ireq = inet_rsk(req);
1435 newinet->inet_daddr = ireq->rmt_addr;
1436 newinet->inet_rcv_saddr = ireq->loc_addr;
1437 newinet->inet_saddr = ireq->loc_addr;
1438 inet_opt = ireq->opt;
1439 rcu_assign_pointer(newinet->inet_opt, inet_opt);
1440 ireq->opt = NULL;
1441 newinet->mc_index = inet_iif(skb);
1442 newinet->mc_ttl = ip_hdr(skb)->ttl;
1443 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1444 if (inet_opt)
1445 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1446 newinet->inet_id = newtp->write_seq ^ jiffies;
1447
1448 if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
1449 goto put_and_exit;
1450
1451 sk_setup_caps(newsk, dst);
1452
1453 tcp_mtup_init(newsk);
1454 tcp_sync_mss(newsk, dst_mtu(dst));
1455 newtp->advmss = dst_metric_advmss(dst);
1456 if (tcp_sk(sk)->rx_opt.user_mss &&
1457 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1458 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1459
1460 tcp_initialize_rcv_mss(newsk);
1461
1462#ifdef CONFIG_TCP_MD5SIG
1463
1464 key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
1465 if (key != NULL) {
1466
1467
1468
1469
1470
1471
1472 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1473 if (newkey != NULL)
1474 tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
1475 newkey, key->keylen);
1476 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1477 }
1478#endif
1479
1480 if (__inet_inherit_port(sk, newsk) < 0)
1481 goto put_and_exit;
1482 __inet_hash_nolisten(newsk, NULL);
1483
1484 return newsk;
1485
1486exit_overflow:
1487 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1488exit_nonewsk:
1489 dst_release(dst);
1490exit:
1491 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1492 return NULL;
1493put_and_exit:
1494 sock_put(newsk);
1495 goto exit;
1496}
1497EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1498
1499static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1500{
1501 struct tcphdr *th = tcp_hdr(skb);
1502 const struct iphdr *iph = ip_hdr(skb);
1503 struct sock *nsk;
1504 struct request_sock **prev;
1505
1506 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1507 iph->saddr, iph->daddr);
1508 if (req)
1509 return tcp_check_req(sk, skb, req, prev);
1510
1511 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1512 th->source, iph->daddr, th->dest, inet_iif(skb));
1513
1514 if (nsk) {
1515 if (nsk->sk_state != TCP_TIME_WAIT) {
1516 bh_lock_sock(nsk);
1517 return nsk;
1518 }
1519 inet_twsk_put(inet_twsk(nsk));
1520 return NULL;
1521 }
1522
1523#ifdef CONFIG_SYN_COOKIES
1524 if (!th->syn)
1525 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1526#endif
1527 return sk;
1528}
1529
1530static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1531{
1532 const struct iphdr *iph = ip_hdr(skb);
1533
1534 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1535 if (!tcp_v4_check(skb->len, iph->saddr,
1536 iph->daddr, skb->csum)) {
1537 skb->ip_summed = CHECKSUM_UNNECESSARY;
1538 return 0;
1539 }
1540 }
1541
1542 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1543 skb->len, IPPROTO_TCP, 0);
1544
1545 if (skb->len <= 76) {
1546 return __skb_checksum_complete(skb);
1547 }
1548 return 0;
1549}
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1561{
1562 struct sock *rsk;
1563#ifdef CONFIG_TCP_MD5SIG
1564
1565
1566
1567
1568
1569
1570 if (tcp_v4_inbound_md5_hash(sk, skb))
1571 goto discard;
1572#endif
1573
1574 if (sk->sk_state == TCP_ESTABLISHED) {
1575 sock_rps_save_rxhash(sk, skb->rxhash);
1576 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1577 rsk = sk;
1578 goto reset;
1579 }
1580 return 0;
1581 }
1582
1583 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1584 goto csum_err;
1585
1586 if (sk->sk_state == TCP_LISTEN) {
1587 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1588 if (!nsk)
1589 goto discard;
1590
1591 if (nsk != sk) {
1592 sock_rps_save_rxhash(nsk, skb->rxhash);
1593 if (tcp_child_process(sk, nsk, skb)) {
1594 rsk = nsk;
1595 goto reset;
1596 }
1597 return 0;
1598 }
1599 } else
1600 sock_rps_save_rxhash(sk, skb->rxhash);
1601
1602 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1603 rsk = sk;
1604 goto reset;
1605 }
1606 return 0;
1607
1608reset:
1609 tcp_v4_send_reset(rsk, skb);
1610discard:
1611 kfree_skb(skb);
1612
1613
1614
1615
1616
1617 return 0;
1618
1619csum_err:
1620 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1621 goto discard;
1622}
1623EXPORT_SYMBOL(tcp_v4_do_rcv);
1624
1625
1626
1627
1628
1629int tcp_v4_rcv(struct sk_buff *skb)
1630{
1631 const struct iphdr *iph;
1632 struct tcphdr *th;
1633 struct sock *sk;
1634 int ret;
1635 struct net *net = dev_net(skb->dev);
1636
1637 if (skb->pkt_type != PACKET_HOST)
1638 goto discard_it;
1639
1640
1641 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1642
1643 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1644 goto discard_it;
1645
1646 th = tcp_hdr(skb);
1647
1648 if (th->doff < sizeof(struct tcphdr) / 4)
1649 goto bad_packet;
1650 if (!pskb_may_pull(skb, th->doff * 4))
1651 goto discard_it;
1652
1653
1654
1655
1656
1657 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1658 goto bad_packet;
1659
1660 th = tcp_hdr(skb);
1661 iph = ip_hdr(skb);
1662 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1663 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1664 skb->len - th->doff * 4);
1665 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1666 TCP_SKB_CB(skb)->when = 0;
1667 TCP_SKB_CB(skb)->flags = iph->tos;
1668 TCP_SKB_CB(skb)->sacked = 0;
1669
1670 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1671 if (!sk)
1672 goto no_tcp_socket;
1673
1674process:
1675 if (sk->sk_state == TCP_TIME_WAIT)
1676 goto do_time_wait;
1677
1678 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1679 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1680 goto discard_and_relse;
1681 }
1682
1683 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1684 goto discard_and_relse;
1685 nf_reset(skb);
1686
1687 if (sk_filter(sk, skb))
1688 goto discard_and_relse;
1689
1690 skb->dev = NULL;
1691
1692 bh_lock_sock_nested(sk);
1693 ret = 0;
1694 if (!sock_owned_by_user(sk)) {
1695#ifdef CONFIG_NET_DMA
1696 struct tcp_sock *tp = tcp_sk(sk);
1697 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1698 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1699 if (tp->ucopy.dma_chan)
1700 ret = tcp_v4_do_rcv(sk, skb);
1701 else
1702#endif
1703 {
1704 if (!tcp_prequeue(sk, skb))
1705 ret = tcp_v4_do_rcv(sk, skb);
1706 }
1707 } else if (unlikely(sk_add_backlog(sk, skb))) {
1708 bh_unlock_sock(sk);
1709 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1710 goto discard_and_relse;
1711 }
1712 bh_unlock_sock(sk);
1713
1714 sock_put(sk);
1715
1716 return ret;
1717
1718no_tcp_socket:
1719 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1720 goto discard_it;
1721
1722 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1723bad_packet:
1724 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1725 } else {
1726 tcp_v4_send_reset(NULL, skb);
1727 }
1728
1729discard_it:
1730
1731 kfree_skb(skb);
1732 return 0;
1733
1734discard_and_relse:
1735 sock_put(sk);
1736 goto discard_it;
1737
1738do_time_wait:
1739 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1740 inet_twsk_put(inet_twsk(sk));
1741 goto discard_it;
1742 }
1743
1744 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1745 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1746 inet_twsk_put(inet_twsk(sk));
1747 goto discard_it;
1748 }
1749 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1750 case TCP_TW_SYN: {
1751 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1752 &tcp_hashinfo,
1753 iph->daddr, th->dest,
1754 inet_iif(skb));
1755 if (sk2) {
1756 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1757 inet_twsk_put(inet_twsk(sk));
1758 sk = sk2;
1759 goto process;
1760 }
1761
1762 }
1763 case TCP_TW_ACK:
1764 tcp_v4_timewait_ack(sk, skb);
1765 break;
1766 case TCP_TW_RST:
1767 goto no_tcp_socket;
1768 case TCP_TW_SUCCESS:;
1769 }
1770 goto discard_it;
1771}
1772
1773struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
1774{
1775 struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
1776 struct inet_sock *inet = inet_sk(sk);
1777 struct inet_peer *peer;
1778
1779 if (!rt ||
1780 inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
1781 peer = inet_getpeer_v4(inet->inet_daddr, 1);
1782 *release_it = true;
1783 } else {
1784 if (!rt->peer)
1785 rt_bind_peer(rt, inet->inet_daddr, 1);
1786 peer = rt->peer;
1787 *release_it = false;
1788 }
1789
1790 return peer;
1791}
1792EXPORT_SYMBOL(tcp_v4_get_peer);
1793
1794void *tcp_v4_tw_get_peer(struct sock *sk)
1795{
1796 struct inet_timewait_sock *tw = inet_twsk(sk);
1797
1798 return inet_getpeer_v4(tw->tw_daddr, 1);
1799}
1800EXPORT_SYMBOL(tcp_v4_tw_get_peer);
1801
1802static struct timewait_sock_ops tcp_timewait_sock_ops = {
1803 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1804 .twsk_unique = tcp_twsk_unique,
1805 .twsk_destructor= tcp_twsk_destructor,
1806 .twsk_getpeer = tcp_v4_tw_get_peer,
1807};
1808
1809const struct inet_connection_sock_af_ops ipv4_specific = {
1810 .queue_xmit = ip_queue_xmit,
1811 .send_check = tcp_v4_send_check,
1812 .rebuild_header = inet_sk_rebuild_header,
1813 .conn_request = tcp_v4_conn_request,
1814 .syn_recv_sock = tcp_v4_syn_recv_sock,
1815 .get_peer = tcp_v4_get_peer,
1816 .net_header_len = sizeof(struct iphdr),
1817 .setsockopt = ip_setsockopt,
1818 .getsockopt = ip_getsockopt,
1819 .addr2sockaddr = inet_csk_addr2sockaddr,
1820 .sockaddr_len = sizeof(struct sockaddr_in),
1821 .bind_conflict = inet_csk_bind_conflict,
1822#ifdef CONFIG_COMPAT
1823 .compat_setsockopt = compat_ip_setsockopt,
1824 .compat_getsockopt = compat_ip_getsockopt,
1825#endif
1826};
1827EXPORT_SYMBOL(ipv4_specific);
1828
1829#ifdef CONFIG_TCP_MD5SIG
1830static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1831 .md5_lookup = tcp_v4_md5_lookup,
1832 .calc_md5_hash = tcp_v4_md5_hash_skb,
1833 .md5_add = tcp_v4_md5_add_func,
1834 .md5_parse = tcp_v4_parse_md5_keys,
1835};
1836#endif
1837
1838
1839
1840
1841static int tcp_v4_init_sock(struct sock *sk)
1842{
1843 struct inet_connection_sock *icsk = inet_csk(sk);
1844 struct tcp_sock *tp = tcp_sk(sk);
1845
1846 skb_queue_head_init(&tp->out_of_order_queue);
1847 tcp_init_xmit_timers(sk);
1848 tcp_prequeue_init(tp);
1849
1850 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1851 tp->mdev = TCP_TIMEOUT_INIT;
1852
1853
1854
1855
1856
1857
1858 tp->snd_cwnd = 2;
1859
1860
1861
1862
1863 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1864 tp->snd_cwnd_clamp = ~0;
1865 tp->mss_cache = TCP_MSS_DEFAULT;
1866
1867 tp->reordering = sysctl_tcp_reordering;
1868 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1869
1870 sk->sk_state = TCP_CLOSE;
1871
1872 sk->sk_write_space = sk_stream_write_space;
1873 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1874
1875 icsk->icsk_af_ops = &ipv4_specific;
1876 icsk->icsk_sync_mss = tcp_sync_mss;
1877#ifdef CONFIG_TCP_MD5SIG
1878 tp->af_specific = &tcp_sock_ipv4_specific;
1879#endif
1880
1881
1882 if (sysctl_tcp_cookie_size > 0) {
1883
1884 tp->cookie_values =
1885 kzalloc(sizeof(*tp->cookie_values),
1886 sk->sk_allocation);
1887 if (tp->cookie_values != NULL)
1888 kref_init(&tp->cookie_values->kref);
1889 }
1890
1891
1892
1893
1894 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1895 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1896
1897 local_bh_disable();
1898 percpu_counter_inc(&tcp_sockets_allocated);
1899 local_bh_enable();
1900
1901 return 0;
1902}
1903
1904void tcp_v4_destroy_sock(struct sock *sk)
1905{
1906 struct tcp_sock *tp = tcp_sk(sk);
1907
1908 tcp_clear_xmit_timers(sk);
1909
1910 tcp_cleanup_congestion_control(sk);
1911
1912
1913 tcp_write_queue_purge(sk);
1914
1915
1916 __skb_queue_purge(&tp->out_of_order_queue);
1917
1918#ifdef CONFIG_TCP_MD5SIG
1919
1920 if (tp->md5sig_info) {
1921 tcp_v4_clear_md5_list(sk);
1922 kfree(tp->md5sig_info);
1923 tp->md5sig_info = NULL;
1924 }
1925#endif
1926
1927#ifdef CONFIG_NET_DMA
1928
1929 __skb_queue_purge(&sk->sk_async_wait_queue);
1930#endif
1931
1932
1933 __skb_queue_purge(&tp->ucopy.prequeue);
1934
1935
1936 if (inet_csk(sk)->icsk_bind_hash)
1937 inet_put_port(sk);
1938
1939
1940
1941
1942 if (sk->sk_sndmsg_page) {
1943 __free_page(sk->sk_sndmsg_page);
1944 sk->sk_sndmsg_page = NULL;
1945 }
1946
1947
1948 if (tp->cookie_values != NULL) {
1949 kref_put(&tp->cookie_values->kref,
1950 tcp_cookie_values_release);
1951 tp->cookie_values = NULL;
1952 }
1953
1954 percpu_counter_dec(&tcp_sockets_allocated);
1955}
1956EXPORT_SYMBOL(tcp_v4_destroy_sock);
1957
1958#ifdef CONFIG_PROC_FS
1959
1960
1961static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1962{
1963 return hlist_nulls_empty(head) ? NULL :
1964 list_entry(head->first, struct inet_timewait_sock, tw_node);
1965}
1966
1967static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1968{
1969 return !is_a_nulls(tw->tw_node.next) ?
1970 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1971}
1972
1973
1974
1975
1976
1977
1978static void *listening_get_next(struct seq_file *seq, void *cur)
1979{
1980 struct inet_connection_sock *icsk;
1981 struct hlist_nulls_node *node;
1982 struct sock *sk = cur;
1983 struct inet_listen_hashbucket *ilb;
1984 struct tcp_iter_state *st = seq->private;
1985 struct net *net = seq_file_net(seq);
1986
1987 if (!sk) {
1988 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1989 spin_lock_bh(&ilb->lock);
1990 sk = sk_nulls_head(&ilb->head);
1991 st->offset = 0;
1992 goto get_sk;
1993 }
1994 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1995 ++st->num;
1996 ++st->offset;
1997
1998 if (st->state == TCP_SEQ_STATE_OPENREQ) {
1999 struct request_sock *req = cur;
2000
2001 icsk = inet_csk(st->syn_wait_sk);
2002 req = req->dl_next;
2003 while (1) {
2004 while (req) {
2005 if (req->rsk_ops->family == st->family) {
2006 cur = req;
2007 goto out;
2008 }
2009 req = req->dl_next;
2010 }
2011 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2012 break;
2013get_req:
2014 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2015 }
2016 sk = sk_nulls_next(st->syn_wait_sk);
2017 st->state = TCP_SEQ_STATE_LISTENING;
2018 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2019 } else {
2020 icsk = inet_csk(sk);
2021 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2022 if (reqsk_queue_len(&icsk->icsk_accept_queue))
2023 goto start_req;
2024 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2025 sk = sk_nulls_next(sk);
2026 }
2027get_sk:
2028 sk_nulls_for_each_from(sk, node) {
2029 if (!net_eq(sock_net(sk), net))
2030 continue;
2031 if (sk->sk_family == st->family) {
2032 cur = sk;
2033 goto out;
2034 }
2035 icsk = inet_csk(sk);
2036 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2037 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2038start_req:
2039 st->uid = sock_i_uid(sk);
2040 st->syn_wait_sk = sk;
2041 st->state = TCP_SEQ_STATE_OPENREQ;
2042 st->sbucket = 0;
2043 goto get_req;
2044 }
2045 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2046 }
2047 spin_unlock_bh(&ilb->lock);
2048 st->offset = 0;
2049 if (++st->bucket < INET_LHTABLE_SIZE) {
2050 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2051 spin_lock_bh(&ilb->lock);
2052 sk = sk_nulls_head(&ilb->head);
2053 goto get_sk;
2054 }
2055 cur = NULL;
2056out:
2057 return cur;
2058}
2059
2060static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2061{
2062 struct tcp_iter_state *st = seq->private;
2063 void *rc;
2064
2065 st->bucket = 0;
2066 st->offset = 0;
2067 rc = listening_get_next(seq, NULL);
2068
2069 while (rc && *pos) {
2070 rc = listening_get_next(seq, rc);
2071 --*pos;
2072 }
2073 return rc;
2074}
2075
2076static inline int empty_bucket(struct tcp_iter_state *st)
2077{
2078 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2079 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2080}
2081
2082
2083
2084
2085
2086static void *established_get_first(struct seq_file *seq)
2087{
2088 struct tcp_iter_state *st = seq->private;
2089 struct net *net = seq_file_net(seq);
2090 void *rc = NULL;
2091
2092 st->offset = 0;
2093 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2094 struct sock *sk;
2095 struct hlist_nulls_node *node;
2096 struct inet_timewait_sock *tw;
2097 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2098
2099
2100 if (empty_bucket(st))
2101 continue;
2102
2103 spin_lock_bh(lock);
2104 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2105 if (sk->sk_family != st->family ||
2106 !net_eq(sock_net(sk), net)) {
2107 continue;
2108 }
2109 rc = sk;
2110 goto out;
2111 }
2112 st->state = TCP_SEQ_STATE_TIME_WAIT;
2113 inet_twsk_for_each(tw, node,
2114 &tcp_hashinfo.ehash[st->bucket].twchain) {
2115 if (tw->tw_family != st->family ||
2116 !net_eq(twsk_net(tw), net)) {
2117 continue;
2118 }
2119 rc = tw;
2120 goto out;
2121 }
2122 spin_unlock_bh(lock);
2123 st->state = TCP_SEQ_STATE_ESTABLISHED;
2124 }
2125out:
2126 return rc;
2127}
2128
2129static void *established_get_next(struct seq_file *seq, void *cur)
2130{
2131 struct sock *sk = cur;
2132 struct inet_timewait_sock *tw;
2133 struct hlist_nulls_node *node;
2134 struct tcp_iter_state *st = seq->private;
2135 struct net *net = seq_file_net(seq);
2136
2137 ++st->num;
2138 ++st->offset;
2139
2140 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2141 tw = cur;
2142 tw = tw_next(tw);
2143get_tw:
2144 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2145 tw = tw_next(tw);
2146 }
2147 if (tw) {
2148 cur = tw;
2149 goto out;
2150 }
2151 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2152 st->state = TCP_SEQ_STATE_ESTABLISHED;
2153
2154
2155 st->offset = 0;
2156 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2157 empty_bucket(st))
2158 ;
2159 if (st->bucket > tcp_hashinfo.ehash_mask)
2160 return NULL;
2161
2162 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2163 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2164 } else
2165 sk = sk_nulls_next(sk);
2166
2167 sk_nulls_for_each_from(sk, node) {
2168 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2169 goto found;
2170 }
2171
2172 st->state = TCP_SEQ_STATE_TIME_WAIT;
2173 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2174 goto get_tw;
2175found:
2176 cur = sk;
2177out:
2178 return cur;
2179}
2180
2181static void *established_get_idx(struct seq_file *seq, loff_t pos)
2182{
2183 struct tcp_iter_state *st = seq->private;
2184 void *rc;
2185
2186 st->bucket = 0;
2187 rc = established_get_first(seq);
2188
2189 while (rc && pos) {
2190 rc = established_get_next(seq, rc);
2191 --pos;
2192 }
2193 return rc;
2194}
2195
2196static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2197{
2198 void *rc;
2199 struct tcp_iter_state *st = seq->private;
2200
2201 st->state = TCP_SEQ_STATE_LISTENING;
2202 rc = listening_get_idx(seq, &pos);
2203
2204 if (!rc) {
2205 st->state = TCP_SEQ_STATE_ESTABLISHED;
2206 rc = established_get_idx(seq, pos);
2207 }
2208
2209 return rc;
2210}
2211
2212static void *tcp_seek_last_pos(struct seq_file *seq)
2213{
2214 struct tcp_iter_state *st = seq->private;
2215 int offset = st->offset;
2216 int orig_num = st->num;
2217 void *rc = NULL;
2218
2219 switch (st->state) {
2220 case TCP_SEQ_STATE_OPENREQ:
2221 case TCP_SEQ_STATE_LISTENING:
2222 if (st->bucket >= INET_LHTABLE_SIZE)
2223 break;
2224 st->state = TCP_SEQ_STATE_LISTENING;
2225 rc = listening_get_next(seq, NULL);
2226 while (offset-- && rc)
2227 rc = listening_get_next(seq, rc);
2228 if (rc)
2229 break;
2230 st->bucket = 0;
2231
2232 case TCP_SEQ_STATE_ESTABLISHED:
2233 case TCP_SEQ_STATE_TIME_WAIT:
2234 st->state = TCP_SEQ_STATE_ESTABLISHED;
2235 if (st->bucket > tcp_hashinfo.ehash_mask)
2236 break;
2237 rc = established_get_first(seq);
2238 while (offset-- && rc)
2239 rc = established_get_next(seq, rc);
2240 }
2241
2242 st->num = orig_num;
2243
2244 return rc;
2245}
2246
2247static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2248{
2249 struct tcp_iter_state *st = seq->private;
2250 void *rc;
2251
2252 if (*pos && *pos == st->last_pos) {
2253 rc = tcp_seek_last_pos(seq);
2254 if (rc)
2255 goto out;
2256 }
2257
2258 st->state = TCP_SEQ_STATE_LISTENING;
2259 st->num = 0;
2260 st->bucket = 0;
2261 st->offset = 0;
2262 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2263
2264out:
2265 st->last_pos = *pos;
2266 return rc;
2267}
2268
2269static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2270{
2271 struct tcp_iter_state *st = seq->private;
2272 void *rc = NULL;
2273
2274 if (v == SEQ_START_TOKEN) {
2275 rc = tcp_get_idx(seq, 0);
2276 goto out;
2277 }
2278
2279 switch (st->state) {
2280 case TCP_SEQ_STATE_OPENREQ:
2281 case TCP_SEQ_STATE_LISTENING:
2282 rc = listening_get_next(seq, v);
2283 if (!rc) {
2284 st->state = TCP_SEQ_STATE_ESTABLISHED;
2285 st->bucket = 0;
2286 st->offset = 0;
2287 rc = established_get_first(seq);
2288 }
2289 break;
2290 case TCP_SEQ_STATE_ESTABLISHED:
2291 case TCP_SEQ_STATE_TIME_WAIT:
2292 rc = established_get_next(seq, v);
2293 break;
2294 }
2295out:
2296 ++*pos;
2297 st->last_pos = *pos;
2298 return rc;
2299}
2300
2301static void tcp_seq_stop(struct seq_file *seq, void *v)
2302{
2303 struct tcp_iter_state *st = seq->private;
2304
2305 switch (st->state) {
2306 case TCP_SEQ_STATE_OPENREQ:
2307 if (v) {
2308 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2309 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2310 }
2311 case TCP_SEQ_STATE_LISTENING:
2312 if (v != SEQ_START_TOKEN)
2313 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2314 break;
2315 case TCP_SEQ_STATE_TIME_WAIT:
2316 case TCP_SEQ_STATE_ESTABLISHED:
2317 if (v)
2318 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2319 break;
2320 }
2321}
2322
2323static int tcp_seq_open(struct inode *inode, struct file *file)
2324{
2325 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2326 struct tcp_iter_state *s;
2327 int err;
2328
2329 err = seq_open_net(inode, file, &afinfo->seq_ops,
2330 sizeof(struct tcp_iter_state));
2331 if (err < 0)
2332 return err;
2333
2334 s = ((struct seq_file *)file->private_data)->private;
2335 s->family = afinfo->family;
2336 s->last_pos = 0;
2337 return 0;
2338}
2339
2340int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2341{
2342 int rc = 0;
2343 struct proc_dir_entry *p;
2344
2345 afinfo->seq_fops.open = tcp_seq_open;
2346 afinfo->seq_fops.read = seq_read;
2347 afinfo->seq_fops.llseek = seq_lseek;
2348 afinfo->seq_fops.release = seq_release_net;
2349
2350 afinfo->seq_ops.start = tcp_seq_start;
2351 afinfo->seq_ops.next = tcp_seq_next;
2352 afinfo->seq_ops.stop = tcp_seq_stop;
2353
2354 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2355 &afinfo->seq_fops, afinfo);
2356 if (!p)
2357 rc = -ENOMEM;
2358 return rc;
2359}
2360EXPORT_SYMBOL(tcp_proc_register);
2361
2362void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2363{
2364 proc_net_remove(net, afinfo->name);
2365}
2366EXPORT_SYMBOL(tcp_proc_unregister);
2367
2368static void get_openreq4(struct sock *sk, struct request_sock *req,
2369 struct seq_file *f, int i, int uid, int *len)
2370{
2371 const struct inet_request_sock *ireq = inet_rsk(req);
2372 int ttd = req->expires - jiffies;
2373
2374 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2375 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
2376 i,
2377 ireq->loc_addr,
2378 ntohs(inet_sk(sk)->inet_sport),
2379 ireq->rmt_addr,
2380 ntohs(ireq->rmt_port),
2381 TCP_SYN_RECV,
2382 0, 0,
2383 1,
2384 jiffies_to_clock_t(ttd),
2385 req->retrans,
2386 uid,
2387 0,
2388 0,
2389 atomic_read(&sk->sk_refcnt),
2390 req,
2391 len);
2392}
2393
2394static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2395{
2396 int timer_active;
2397 unsigned long timer_expires;
2398 struct tcp_sock *tp = tcp_sk(sk);
2399 const struct inet_connection_sock *icsk = inet_csk(sk);
2400 struct inet_sock *inet = inet_sk(sk);
2401 __be32 dest = inet->inet_daddr;
2402 __be32 src = inet->inet_rcv_saddr;
2403 __u16 destp = ntohs(inet->inet_dport);
2404 __u16 srcp = ntohs(inet->inet_sport);
2405 int rx_queue;
2406
2407 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2408 timer_active = 1;
2409 timer_expires = icsk->icsk_timeout;
2410 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2411 timer_active = 4;
2412 timer_expires = icsk->icsk_timeout;
2413 } else if (timer_pending(&sk->sk_timer)) {
2414 timer_active = 2;
2415 timer_expires = sk->sk_timer.expires;
2416 } else {
2417 timer_active = 0;
2418 timer_expires = jiffies;
2419 }
2420
2421 if (sk->sk_state == TCP_LISTEN)
2422 rx_queue = sk->sk_ack_backlog;
2423 else
2424
2425
2426
2427 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2428
2429 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2430 "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
2431 i, src, srcp, dest, destp, sk->sk_state,
2432 tp->write_seq - tp->snd_una,
2433 rx_queue,
2434 timer_active,
2435 jiffies_to_clock_t(timer_expires - jiffies),
2436 icsk->icsk_retransmits,
2437 sock_i_uid(sk),
2438 icsk->icsk_probes_out,
2439 sock_i_ino(sk),
2440 atomic_read(&sk->sk_refcnt), sk,
2441 jiffies_to_clock_t(icsk->icsk_rto),
2442 jiffies_to_clock_t(icsk->icsk_ack.ato),
2443 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2444 tp->snd_cwnd,
2445 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
2446 len);
2447}
2448
2449static void get_timewait4_sock(struct inet_timewait_sock *tw,
2450 struct seq_file *f, int i, int *len)
2451{
2452 __be32 dest, src;
2453 __u16 destp, srcp;
2454 int ttd = tw->tw_ttd - jiffies;
2455
2456 if (ttd < 0)
2457 ttd = 0;
2458
2459 dest = tw->tw_daddr;
2460 src = tw->tw_rcv_saddr;
2461 destp = ntohs(tw->tw_dport);
2462 srcp = ntohs(tw->tw_sport);
2463
2464 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2465 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
2466 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2467 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2468 atomic_read(&tw->tw_refcnt), tw, len);
2469}
2470
2471#define TMPSZ 150
2472
2473static int tcp4_seq_show(struct seq_file *seq, void *v)
2474{
2475 struct tcp_iter_state *st;
2476 int len;
2477
2478 if (v == SEQ_START_TOKEN) {
2479 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2480 " sl local_address rem_address st tx_queue "
2481 "rx_queue tr tm->when retrnsmt uid timeout "
2482 "inode");
2483 goto out;
2484 }
2485 st = seq->private;
2486
2487 switch (st->state) {
2488 case TCP_SEQ_STATE_LISTENING:
2489 case TCP_SEQ_STATE_ESTABLISHED:
2490 get_tcp4_sock(v, seq, st->num, &len);
2491 break;
2492 case TCP_SEQ_STATE_OPENREQ:
2493 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2494 break;
2495 case TCP_SEQ_STATE_TIME_WAIT:
2496 get_timewait4_sock(v, seq, st->num, &len);
2497 break;
2498 }
2499 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2500out:
2501 return 0;
2502}
2503
2504static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2505 .name = "tcp",
2506 .family = AF_INET,
2507 .seq_fops = {
2508 .owner = THIS_MODULE,
2509 },
2510 .seq_ops = {
2511 .show = tcp4_seq_show,
2512 },
2513};
2514
2515static int __net_init tcp4_proc_init_net(struct net *net)
2516{
2517 return tcp_proc_register(net, &tcp4_seq_afinfo);
2518}
2519
2520static void __net_exit tcp4_proc_exit_net(struct net *net)
2521{
2522 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2523}
2524
2525static struct pernet_operations tcp4_net_ops = {
2526 .init = tcp4_proc_init_net,
2527 .exit = tcp4_proc_exit_net,
2528};
2529
2530int __init tcp4_proc_init(void)
2531{
2532 return register_pernet_subsys(&tcp4_net_ops);
2533}
2534
2535void tcp4_proc_exit(void)
2536{
2537 unregister_pernet_subsys(&tcp4_net_ops);
2538}
2539#endif
2540
2541struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2542{
2543 const struct iphdr *iph = skb_gro_network_header(skb);
2544
2545 switch (skb->ip_summed) {
2546 case CHECKSUM_COMPLETE:
2547 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2548 skb->csum)) {
2549 skb->ip_summed = CHECKSUM_UNNECESSARY;
2550 break;
2551 }
2552
2553
2554 case CHECKSUM_NONE:
2555 NAPI_GRO_CB(skb)->flush = 1;
2556 return NULL;
2557 }
2558
2559 return tcp_gro_receive(head, skb);
2560}
2561
2562int tcp4_gro_complete(struct sk_buff *skb)
2563{
2564 const struct iphdr *iph = ip_hdr(skb);
2565 struct tcphdr *th = tcp_hdr(skb);
2566
2567 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2568 iph->saddr, iph->daddr, 0);
2569 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2570
2571 return tcp_gro_complete(skb);
2572}
2573
2574struct proto tcp_prot = {
2575 .name = "TCP",
2576 .owner = THIS_MODULE,
2577 .close = tcp_close,
2578 .connect = tcp_v4_connect,
2579 .disconnect = tcp_disconnect,
2580 .accept = inet_csk_accept,
2581 .ioctl = tcp_ioctl,
2582 .init = tcp_v4_init_sock,
2583 .destroy = tcp_v4_destroy_sock,
2584 .shutdown = tcp_shutdown,
2585 .setsockopt = tcp_setsockopt,
2586 .getsockopt = tcp_getsockopt,
2587 .recvmsg = tcp_recvmsg,
2588 .sendmsg = tcp_sendmsg,
2589 .sendpage = tcp_sendpage,
2590 .backlog_rcv = tcp_v4_do_rcv,
2591 .hash = inet_hash,
2592 .unhash = inet_unhash,
2593 .get_port = inet_csk_get_port,
2594 .enter_memory_pressure = tcp_enter_memory_pressure,
2595 .sockets_allocated = &tcp_sockets_allocated,
2596 .orphan_count = &tcp_orphan_count,
2597 .memory_allocated = &tcp_memory_allocated,
2598 .memory_pressure = &tcp_memory_pressure,
2599 .sysctl_mem = sysctl_tcp_mem,
2600 .sysctl_wmem = sysctl_tcp_wmem,
2601 .sysctl_rmem = sysctl_tcp_rmem,
2602 .max_header = MAX_TCP_HEADER,
2603 .obj_size = sizeof(struct tcp_sock),
2604 .slab_flags = SLAB_DESTROY_BY_RCU,
2605 .twsk_prot = &tcp_timewait_sock_ops,
2606 .rsk_prot = &tcp_request_sock_ops,
2607 .h.hashinfo = &tcp_hashinfo,
2608 .no_autobind = true,
2609#ifdef CONFIG_COMPAT
2610 .compat_setsockopt = compat_tcp_setsockopt,
2611 .compat_getsockopt = compat_tcp_getsockopt,
2612#endif
2613};
2614EXPORT_SYMBOL(tcp_prot);
2615
2616
2617static int __net_init tcp_sk_init(struct net *net)
2618{
2619 return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2620 PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2621}
2622
2623static void __net_exit tcp_sk_exit(struct net *net)
2624{
2625 inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2626}
2627
2628static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2629{
2630 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2631}
2632
2633static struct pernet_operations __net_initdata tcp_sk_ops = {
2634 .init = tcp_sk_init,
2635 .exit = tcp_sk_exit,
2636 .exit_batch = tcp_sk_exit_batch,
2637};
2638
2639void __init tcp_v4_init(void)
2640{
2641 inet_hashinfo_init(&tcp_hashinfo);
2642 if (register_pernet_subsys(&tcp_sk_ops))
2643 panic("Failed to create the TCP control socket.\n");
2644}
2645