1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/mm.h>
22#include <linux/module.h>
23#include <linux/sysctl.h>
24#include <linux/workqueue.h>
25#include <net/tcp.h>
26#include <net/inet_common.h>
27#include <net/xfrm.h>
28
29#ifdef CONFIG_SYSCTL
30#define SYNC_INIT 0
31#else
32#define SYNC_INIT 1
33#endif
34
35int sysctl_tcp_syncookies __read_mostly = SYNC_INIT;
36EXPORT_SYMBOL(sysctl_tcp_syncookies);
37
38int sysctl_tcp_abort_on_overflow __read_mostly;
39
40struct inet_timewait_death_row tcp_death_row = {
41 .sysctl_max_tw_buckets = NR_FILE * 2,
42 .period = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
43 .death_lock = __SPIN_LOCK_UNLOCKED(tcp_death_row.death_lock),
44 .hashinfo = &tcp_hashinfo,
45 .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0,
46 (unsigned long)&tcp_death_row),
47 .twkill_work = __WORK_INITIALIZER(tcp_death_row.twkill_work,
48 inet_twdr_twkill_work),
49
50
51 .twcal_hand = -1,
52 .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
53 (unsigned long)&tcp_death_row),
54};
55
56EXPORT_SYMBOL_GPL(tcp_death_row);
57
58static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
59{
60 if (seq == s_win)
61 return 1;
62 if (after(end_seq, s_win) && before(seq, e_win))
63 return 1;
64 return (seq == e_win && seq == end_seq);
65}
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95enum tcp_tw_status
96tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
97 const struct tcphdr *th)
98{
99 struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
100 struct tcp_options_received tmp_opt;
101 int paws_reject = 0;
102
103 tmp_opt.saw_tstamp = 0;
104 if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
105 tcp_parse_options(skb, &tmp_opt, 0);
106
107 if (tmp_opt.saw_tstamp) {
108 tmp_opt.ts_recent = tcptw->tw_ts_recent;
109 tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
110 paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
111 }
112 }
113
114 if (tw->tw_substate == TCP_FIN_WAIT2) {
115
116
117
118 if (paws_reject ||
119 !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
120 tcptw->tw_rcv_nxt,
121 tcptw->tw_rcv_nxt + tcptw->tw_rcv_wnd))
122 return TCP_TW_ACK;
123
124 if (th->rst)
125 goto kill;
126
127 if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt))
128 goto kill_with_rst;
129
130
131 if (!th->ack ||
132 !after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) ||
133 TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) {
134 inet_twsk_put(tw);
135 return TCP_TW_SUCCESS;
136 }
137
138
139
140
141 if (!th->fin ||
142 TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) {
143kill_with_rst:
144 inet_twsk_deschedule(tw, &tcp_death_row);
145 inet_twsk_put(tw);
146 return TCP_TW_RST;
147 }
148
149
150 tw->tw_substate = TCP_TIME_WAIT;
151 tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
152 if (tmp_opt.saw_tstamp) {
153 tcptw->tw_ts_recent_stamp = get_seconds();
154 tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
155 }
156
157
158
159
160
161
162 if (tw->tw_family == AF_INET &&
163 tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp &&
164 tcp_v4_tw_remember_stamp(tw))
165 inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout,
166 TCP_TIMEWAIT_LEN);
167 else
168 inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
169 TCP_TIMEWAIT_LEN);
170 return TCP_TW_ACK;
171 }
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190 if (!paws_reject &&
191 (TCP_SKB_CB(skb)->seq == tcptw->tw_rcv_nxt &&
192 (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq || th->rst))) {
193
194
195 if (th->rst) {
196
197
198
199
200 if (sysctl_tcp_rfc1337 == 0) {
201kill:
202 inet_twsk_deschedule(tw, &tcp_death_row);
203 inet_twsk_put(tw);
204 return TCP_TW_SUCCESS;
205 }
206 }
207 inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
208 TCP_TIMEWAIT_LEN);
209
210 if (tmp_opt.saw_tstamp) {
211 tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
212 tcptw->tw_ts_recent_stamp = get_seconds();
213 }
214
215 inet_twsk_put(tw);
216 return TCP_TW_SUCCESS;
217 }
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236 if (th->syn && !th->rst && !th->ack && !paws_reject &&
237 (after(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt) ||
238 (tmp_opt.saw_tstamp &&
239 (s32)(tcptw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) {
240 u32 isn = tcptw->tw_snd_nxt + 65535 + 2;
241 if (isn == 0)
242 isn++;
243 TCP_SKB_CB(skb)->when = isn;
244 return TCP_TW_SYN;
245 }
246
247 if (paws_reject)
248 NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED);
249
250 if (!th->rst) {
251
252
253
254
255
256
257 if (paws_reject || th->ack)
258 inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
259 TCP_TIMEWAIT_LEN);
260
261
262
263
264 return TCP_TW_ACK;
265 }
266 inet_twsk_put(tw);
267 return TCP_TW_SUCCESS;
268}
269
270
271
272
273void tcp_time_wait(struct sock *sk, int state, int timeo)
274{
275 struct inet_timewait_sock *tw = NULL;
276 const struct inet_connection_sock *icsk = inet_csk(sk);
277 const struct tcp_sock *tp = tcp_sk(sk);
278 int recycle_ok = 0;
279
280 if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
281 recycle_ok = icsk->icsk_af_ops->remember_stamp(sk);
282
283 if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets)
284 tw = inet_twsk_alloc(sk, state);
285
286 if (tw != NULL) {
287 struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
288 const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
289
290 tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale;
291 tcptw->tw_rcv_nxt = tp->rcv_nxt;
292 tcptw->tw_snd_nxt = tp->snd_nxt;
293 tcptw->tw_rcv_wnd = tcp_receive_window(tp);
294 tcptw->tw_ts_recent = tp->rx_opt.ts_recent;
295 tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
296
297#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
298 if (tw->tw_family == PF_INET6) {
299 struct ipv6_pinfo *np = inet6_sk(sk);
300 struct inet6_timewait_sock *tw6;
301
302 tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
303 tw6 = inet6_twsk((struct sock *)tw);
304 ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr);
305 ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr);
306 tw->tw_ipv6only = np->ipv6only;
307 }
308#endif
309
310#ifdef CONFIG_TCP_MD5SIG
311
312
313
314
315
316
317 do {
318 struct tcp_md5sig_key *key;
319 memset(tcptw->tw_md5_key, 0, sizeof(tcptw->tw_md5_key));
320 tcptw->tw_md5_keylen = 0;
321 key = tp->af_specific->md5_lookup(sk, sk);
322 if (key != NULL) {
323 memcpy(&tcptw->tw_md5_key, key->key, key->keylen);
324 tcptw->tw_md5_keylen = key->keylen;
325 if (tcp_alloc_md5sig_pool(sk) == NULL)
326 BUG();
327 }
328 } while (0);
329#endif
330
331
332 __inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
333
334
335 if (timeo < rto)
336 timeo = rto;
337
338 if (recycle_ok) {
339 tw->tw_timeout = rto;
340 } else {
341 tw->tw_timeout = TCP_TIMEWAIT_LEN;
342 if (state == TCP_TIME_WAIT)
343 timeo = TCP_TIMEWAIT_LEN;
344 }
345
346 inet_twsk_schedule(tw, &tcp_death_row, timeo,
347 TCP_TIMEWAIT_LEN);
348 inet_twsk_put(tw);
349 } else {
350
351
352
353
354 LIMIT_NETDEBUG(KERN_INFO "TCP: time wait bucket table overflow\n");
355 }
356
357 tcp_update_metrics(sk);
358 tcp_done(sk);
359}
360
361void tcp_twsk_destructor(struct sock *sk)
362{
363#ifdef CONFIG_TCP_MD5SIG
364 struct tcp_timewait_sock *twsk = tcp_twsk(sk);
365 if (twsk->tw_md5_keylen)
366 tcp_free_md5sig_pool();
367#endif
368}
369
370EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
371
372static inline void TCP_ECN_openreq_child(struct tcp_sock *tp,
373 struct request_sock *req)
374{
375 tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0;
376}
377
378
379
380
381
382
383
384struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb)
385{
386 struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
387
388 if (newsk != NULL) {
389 const struct inet_request_sock *ireq = inet_rsk(req);
390 struct tcp_request_sock *treq = tcp_rsk(req);
391 struct inet_connection_sock *newicsk = inet_csk(newsk);
392 struct tcp_sock *newtp;
393
394
395 newtp = tcp_sk(newsk);
396 newtp->pred_flags = 0;
397 newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1;
398 newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1;
399 newtp->snd_up = treq->snt_isn + 1;
400
401 tcp_prequeue_init(newtp);
402
403 tcp_init_wl(newtp, treq->rcv_isn);
404
405 newtp->srtt = 0;
406 newtp->mdev = TCP_TIMEOUT_INIT;
407 newicsk->icsk_rto = TCP_TIMEOUT_INIT;
408
409 newtp->packets_out = 0;
410 newtp->retrans_out = 0;
411 newtp->sacked_out = 0;
412 newtp->fackets_out = 0;
413 newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
414
415
416
417
418
419
420 newtp->snd_cwnd = 2;
421 newtp->snd_cwnd_cnt = 0;
422 newtp->bytes_acked = 0;
423
424 newtp->frto_counter = 0;
425 newtp->frto_highmark = 0;
426
427 newicsk->icsk_ca_ops = &tcp_init_congestion_ops;
428
429 tcp_set_ca_state(newsk, TCP_CA_Open);
430 tcp_init_xmit_timers(newsk);
431 skb_queue_head_init(&newtp->out_of_order_queue);
432 newtp->write_seq = treq->snt_isn + 1;
433 newtp->pushed_seq = newtp->write_seq;
434
435 newtp->rx_opt.saw_tstamp = 0;
436
437 newtp->rx_opt.dsack = 0;
438 newtp->rx_opt.num_sacks = 0;
439
440 newtp->urg_data = 0;
441
442 if (sock_flag(newsk, SOCK_KEEPOPEN))
443 inet_csk_reset_keepalive_timer(newsk,
444 keepalive_time_when(newtp));
445
446 newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
447 if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
448 if (sysctl_tcp_fack)
449 tcp_enable_fack(newtp);
450 }
451 newtp->window_clamp = req->window_clamp;
452 newtp->rcv_ssthresh = req->rcv_wnd;
453 newtp->rcv_wnd = req->rcv_wnd;
454 newtp->rx_opt.wscale_ok = ireq->wscale_ok;
455 if (newtp->rx_opt.wscale_ok) {
456 newtp->rx_opt.snd_wscale = ireq->snd_wscale;
457 newtp->rx_opt.rcv_wscale = ireq->rcv_wscale;
458 } else {
459 newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
460 newtp->window_clamp = min(newtp->window_clamp, 65535U);
461 }
462 newtp->snd_wnd = (ntohs(tcp_hdr(skb)->window) <<
463 newtp->rx_opt.snd_wscale);
464 newtp->max_window = newtp->snd_wnd;
465
466 if (newtp->rx_opt.tstamp_ok) {
467 newtp->rx_opt.ts_recent = req->ts_recent;
468 newtp->rx_opt.ts_recent_stamp = get_seconds();
469 newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
470 } else {
471 newtp->rx_opt.ts_recent_stamp = 0;
472 newtp->tcp_header_len = sizeof(struct tcphdr);
473 }
474#ifdef CONFIG_TCP_MD5SIG
475 newtp->md5sig_info = NULL;
476 if (newtp->af_specific->md5_lookup(sk, newsk))
477 newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
478#endif
479 if (skb->len >= TCP_MIN_RCVMSS+newtp->tcp_header_len)
480 newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
481 newtp->rx_opt.mss_clamp = req->mss;
482 TCP_ECN_openreq_child(newtp, req);
483
484 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
485 }
486 return newsk;
487}
488
489
490
491
492
493
494struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
495 struct request_sock *req,
496 struct request_sock **prev)
497{
498 const struct tcphdr *th = tcp_hdr(skb);
499 __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
500 int paws_reject = 0;
501 struct tcp_options_received tmp_opt;
502 struct sock *child;
503
504 tmp_opt.saw_tstamp = 0;
505 if (th->doff > (sizeof(struct tcphdr)>>2)) {
506 tcp_parse_options(skb, &tmp_opt, 0);
507
508 if (tmp_opt.saw_tstamp) {
509 tmp_opt.ts_recent = req->ts_recent;
510
511
512
513
514 tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
515 paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
516 }
517 }
518
519
520 if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn &&
521 flg == TCP_FLAG_SYN &&
522 !paws_reject) {
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540 req->rsk_ops->rtx_syn_ack(sk, req);
541 return NULL;
542 }
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598 if ((flg & TCP_FLAG_ACK) &&
599 (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1))
600 return sk;
601
602
603
604
605
606
607
608
609 if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
610 tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) {
611
612 if (!(flg & TCP_FLAG_RST))
613 req->rsk_ops->send_ack(sk, skb, req);
614 if (paws_reject)
615 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
616 return NULL;
617 }
618
619
620
621 if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1))
622 req->ts_recent = tmp_opt.rcv_tsval;
623
624 if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
625
626
627 flg &= ~TCP_FLAG_SYN;
628 }
629
630
631
632
633 if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) {
634 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
635 goto embryonic_reset;
636 }
637
638
639
640
641 if (!(flg & TCP_FLAG_ACK))
642 return NULL;
643
644
645 if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
646 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
647 inet_rsk(req)->acked = 1;
648 return NULL;
649 }
650
651
652
653
654
655
656
657 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
658 if (child == NULL)
659 goto listen_overflow;
660
661 inet_csk_reqsk_queue_unlink(sk, req, prev);
662 inet_csk_reqsk_queue_removed(sk, req);
663
664 inet_csk_reqsk_queue_add(sk, req, child);
665 return child;
666
667listen_overflow:
668 if (!sysctl_tcp_abort_on_overflow) {
669 inet_rsk(req)->acked = 1;
670 return NULL;
671 }
672
673embryonic_reset:
674 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
675 if (!(flg & TCP_FLAG_RST))
676 req->rsk_ops->send_reset(sk, skb);
677
678 inet_csk_reqsk_queue_drop(sk, req, prev);
679 return NULL;
680}
681
682
683
684
685
686
687
688int tcp_child_process(struct sock *parent, struct sock *child,
689 struct sk_buff *skb)
690{
691 int ret = 0;
692 int state = child->sk_state;
693
694 if (!sock_owned_by_user(child)) {
695 ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb),
696 skb->len);
697
698 if (state == TCP_SYN_RECV && child->sk_state != state)
699 parent->sk_data_ready(parent, 0);
700 } else {
701
702
703
704
705 sk_add_backlog(child, skb);
706 }
707
708 bh_unlock_sock(child);
709 sock_put(child);
710 return ret;
711}
712
713EXPORT_SYMBOL(tcp_check_req);
714EXPORT_SYMBOL(tcp_child_process);
715EXPORT_SYMBOL(tcp_create_openreq_child);
716EXPORT_SYMBOL(tcp_timewait_state_process);
717