1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64#include <linux/mm.h>
65#include <linux/slab.h>
66#include <linux/module.h>
67#include <linux/sysctl.h>
68#include <linux/kernel.h>
69#include <net/dst.h>
70#include <net/tcp.h>
71#include <net/inet_common.h>
72#include <linux/ipsec.h>
73#include <asm/unaligned.h>
74#include <net/netdma.h>
75
76int sysctl_tcp_timestamps __read_mostly = 1;
77int sysctl_tcp_window_scaling __read_mostly = 1;
78int sysctl_tcp_sack __read_mostly = 1;
79int sysctl_tcp_fack __read_mostly = 1;
80int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
81EXPORT_SYMBOL(sysctl_tcp_reordering);
82int sysctl_tcp_ecn __read_mostly = 2;
83EXPORT_SYMBOL(sysctl_tcp_ecn);
84int sysctl_tcp_dsack __read_mostly = 1;
85int sysctl_tcp_app_win __read_mostly = 31;
86int sysctl_tcp_adv_win_scale __read_mostly = 2;
87EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
88
89int sysctl_tcp_stdurg __read_mostly;
90int sysctl_tcp_rfc1337 __read_mostly;
91int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
92int sysctl_tcp_frto __read_mostly = 2;
93int sysctl_tcp_frto_response __read_mostly;
94int sysctl_tcp_nometrics_save __read_mostly;
95
96int sysctl_tcp_thin_dupack __read_mostly;
97
98int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
99int sysctl_tcp_abc __read_mostly;
100
101#define FLAG_DATA 0x01
102#define FLAG_WIN_UPDATE 0x02
103#define FLAG_DATA_ACKED 0x04
104#define FLAG_RETRANS_DATA_ACKED 0x08
105#define FLAG_SYN_ACKED 0x10
106#define FLAG_DATA_SACKED 0x20
107#define FLAG_ECE 0x40
108#define FLAG_DATA_LOST 0x80
109#define FLAG_SLOWPATH 0x100
110#define FLAG_ONLY_ORIG_SACKED 0x200
111#define FLAG_SND_UNA_ADVANCED 0x400
112#define FLAG_DSACKING_ACK 0x800
113#define FLAG_NONHEAD_RETRANS_ACKED 0x1000
114#define FLAG_SACK_RENEGING 0x2000
115
116#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
117#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
118#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
119#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
120#define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED)
121
122#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
123#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
124
125
126
127
128static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
129{
130 struct inet_connection_sock *icsk = inet_csk(sk);
131 const unsigned int lss = icsk->icsk_ack.last_seg_size;
132 unsigned int len;
133
134 icsk->icsk_ack.last_seg_size = 0;
135
136
137
138
139 len = skb_shinfo(skb)->gso_size ? : skb->len;
140 if (len >= icsk->icsk_ack.rcv_mss) {
141 icsk->icsk_ack.rcv_mss = len;
142 } else {
143
144
145
146
147
148 len += skb->data - skb_transport_header(skb);
149 if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
150
151
152
153
154
155 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
156 !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
157
158
159
160
161 len -= tcp_sk(sk)->tcp_header_len;
162 icsk->icsk_ack.last_seg_size = len;
163 if (len == lss) {
164 icsk->icsk_ack.rcv_mss = len;
165 return;
166 }
167 }
168 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
169 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
170 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
171 }
172}
173
174static void tcp_incr_quickack(struct sock *sk)
175{
176 struct inet_connection_sock *icsk = inet_csk(sk);
177 unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
178
179 if (quickacks == 0)
180 quickacks = 2;
181 if (quickacks > icsk->icsk_ack.quick)
182 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
183}
184
185static void tcp_enter_quickack_mode(struct sock *sk)
186{
187 struct inet_connection_sock *icsk = inet_csk(sk);
188 tcp_incr_quickack(sk);
189 icsk->icsk_ack.pingpong = 0;
190 icsk->icsk_ack.ato = TCP_ATO_MIN;
191}
192
193
194
195
196
197static inline int tcp_in_quickack_mode(const struct sock *sk)
198{
199 const struct inet_connection_sock *icsk = inet_csk(sk);
200 return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong;
201}
202
203static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
204{
205 if (tp->ecn_flags & TCP_ECN_OK)
206 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
207}
208
209static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb)
210{
211 if (tcp_hdr(skb)->cwr)
212 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
213}
214
215static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)
216{
217 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
218}
219
220static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
221{
222 if (tp->ecn_flags & TCP_ECN_OK) {
223 if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags))
224 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
225
226
227
228 else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags)))
229 tcp_enter_quickack_mode((struct sock *)tp);
230 }
231}
232
233static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th)
234{
235 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
236 tp->ecn_flags &= ~TCP_ECN_OK;
237}
238
239static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th)
240{
241 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
242 tp->ecn_flags &= ~TCP_ECN_OK;
243}
244
245static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
246{
247 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
248 return 1;
249 return 0;
250}
251
252
253
254
255
256
257static void tcp_fixup_sndbuf(struct sock *sk)
258{
259 int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
260 sizeof(struct sk_buff);
261
262 if (sk->sk_sndbuf < 3 * sndmem) {
263 sk->sk_sndbuf = 3 * sndmem;
264 if (sk->sk_sndbuf > sysctl_tcp_wmem[2])
265 sk->sk_sndbuf = sysctl_tcp_wmem[2];
266 }
267}
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
296{
297 struct tcp_sock *tp = tcp_sk(sk);
298
299 int truesize = tcp_win_from_space(skb->truesize) >> 1;
300 int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
301
302 while (tp->rcv_ssthresh <= window) {
303 if (truesize <= skb->len)
304 return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
305
306 truesize >>= 1;
307 window >>= 1;
308 }
309 return 0;
310}
311
312static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)
313{
314 struct tcp_sock *tp = tcp_sk(sk);
315
316
317 if (tp->rcv_ssthresh < tp->window_clamp &&
318 (int)tp->rcv_ssthresh < tcp_space(sk) &&
319 !tcp_memory_pressure) {
320 int incr;
321
322
323
324
325 if (tcp_win_from_space(skb->truesize) <= skb->len)
326 incr = 2 * tp->advmss;
327 else
328 incr = __tcp_grow_window(sk, skb);
329
330 if (incr) {
331 tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
332 tp->window_clamp);
333 inet_csk(sk)->icsk_ack.quick |= 1;
334 }
335 }
336}
337
338
339
340static void tcp_fixup_rcvbuf(struct sock *sk)
341{
342 struct tcp_sock *tp = tcp_sk(sk);
343 int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
344
345
346
347
348
349 while (tcp_win_from_space(rcvmem) < tp->advmss)
350 rcvmem += 128;
351 if (sk->sk_rcvbuf < 4 * rcvmem)
352 sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]);
353}
354
355
356
357
358static void tcp_init_buffer_space(struct sock *sk)
359{
360 struct tcp_sock *tp = tcp_sk(sk);
361 int maxwin;
362
363 if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
364 tcp_fixup_rcvbuf(sk);
365 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
366 tcp_fixup_sndbuf(sk);
367
368 tp->rcvq_space.space = tp->rcv_wnd;
369
370 maxwin = tcp_full_space(sk);
371
372 if (tp->window_clamp >= maxwin) {
373 tp->window_clamp = maxwin;
374
375 if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
376 tp->window_clamp = max(maxwin -
377 (maxwin >> sysctl_tcp_app_win),
378 4 * tp->advmss);
379 }
380
381
382 if (sysctl_tcp_app_win &&
383 tp->window_clamp > 2 * tp->advmss &&
384 tp->window_clamp + tp->advmss > maxwin)
385 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
386
387 tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
388 tp->snd_cwnd_stamp = tcp_time_stamp;
389}
390
391
392static void tcp_clamp_window(struct sock *sk)
393{
394 struct tcp_sock *tp = tcp_sk(sk);
395 struct inet_connection_sock *icsk = inet_csk(sk);
396
397 icsk->icsk_ack.quick = 0;
398
399 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
400 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
401 !tcp_memory_pressure &&
402 atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
403 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
404 sysctl_tcp_rmem[2]);
405 }
406 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
407 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
408}
409
410
411
412
413
414
415
416
417void tcp_initialize_rcv_mss(struct sock *sk)
418{
419 struct tcp_sock *tp = tcp_sk(sk);
420 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
421
422 hint = min(hint, tp->rcv_wnd / 2);
423 hint = min(hint, TCP_MSS_DEFAULT);
424 hint = max(hint, TCP_MIN_MSS);
425
426 inet_csk(sk)->icsk_ack.rcv_mss = hint;
427}
428EXPORT_SYMBOL(tcp_initialize_rcv_mss);
429
430
431
432
433
434
435
436
437
438
439
440
441static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
442{
443 u32 new_sample = tp->rcv_rtt_est.rtt;
444 long m = sample;
445
446 if (m == 0)
447 m = 1;
448
449 if (new_sample != 0) {
450
451
452
453
454
455
456
457
458
459
460 if (!win_dep) {
461 m -= (new_sample >> 3);
462 new_sample += m;
463 } else if (m < new_sample)
464 new_sample = m << 3;
465 } else {
466
467 new_sample = m << 3;
468 }
469
470 if (tp->rcv_rtt_est.rtt != new_sample)
471 tp->rcv_rtt_est.rtt = new_sample;
472}
473
474static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
475{
476 if (tp->rcv_rtt_est.time == 0)
477 goto new_measure;
478 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
479 return;
480 tcp_rcv_rtt_update(tp, jiffies - tp->rcv_rtt_est.time, 1);
481
482new_measure:
483 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
484 tp->rcv_rtt_est.time = tcp_time_stamp;
485}
486
487static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
488 const struct sk_buff *skb)
489{
490 struct tcp_sock *tp = tcp_sk(sk);
491 if (tp->rx_opt.rcv_tsecr &&
492 (TCP_SKB_CB(skb)->end_seq -
493 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
494 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
495}
496
497
498
499
500
501void tcp_rcv_space_adjust(struct sock *sk)
502{
503 struct tcp_sock *tp = tcp_sk(sk);
504 int time;
505 int space;
506
507 if (tp->rcvq_space.time == 0)
508 goto new_measure;
509
510 time = tcp_time_stamp - tp->rcvq_space.time;
511 if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
512 return;
513
514 space = 2 * (tp->copied_seq - tp->rcvq_space.seq);
515
516 space = max(tp->rcvq_space.space, space);
517
518 if (tp->rcvq_space.space != space) {
519 int rcvmem;
520
521 tp->rcvq_space.space = space;
522
523 if (sysctl_tcp_moderate_rcvbuf &&
524 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
525 int new_clamp = space;
526
527
528
529
530
531 space /= tp->advmss;
532 if (!space)
533 space = 1;
534 rcvmem = (tp->advmss + MAX_TCP_HEADER +
535 16 + sizeof(struct sk_buff));
536 while (tcp_win_from_space(rcvmem) < tp->advmss)
537 rcvmem += 128;
538 space *= rcvmem;
539 space = min(space, sysctl_tcp_rmem[2]);
540 if (space > sk->sk_rcvbuf) {
541 sk->sk_rcvbuf = space;
542
543
544 tp->window_clamp = new_clamp;
545 }
546 }
547 }
548
549new_measure:
550 tp->rcvq_space.seq = tp->copied_seq;
551 tp->rcvq_space.time = tcp_time_stamp;
552}
553
554
555
556
557
558
559
560
561
562
563
564static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
565{
566 struct tcp_sock *tp = tcp_sk(sk);
567 struct inet_connection_sock *icsk = inet_csk(sk);
568 u32 now;
569
570 inet_csk_schedule_ack(sk);
571
572 tcp_measure_rcv_mss(sk, skb);
573
574 tcp_rcv_rtt_measure(tp);
575
576 now = tcp_time_stamp;
577
578 if (!icsk->icsk_ack.ato) {
579
580
581
582 tcp_incr_quickack(sk);
583 icsk->icsk_ack.ato = TCP_ATO_MIN;
584 } else {
585 int m = now - icsk->icsk_ack.lrcvtime;
586
587 if (m <= TCP_ATO_MIN / 2) {
588
589 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
590 } else if (m < icsk->icsk_ack.ato) {
591 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
592 if (icsk->icsk_ack.ato > icsk->icsk_rto)
593 icsk->icsk_ack.ato = icsk->icsk_rto;
594 } else if (m > icsk->icsk_rto) {
595
596
597
598 tcp_incr_quickack(sk);
599 sk_mem_reclaim(sk);
600 }
601 }
602 icsk->icsk_ack.lrcvtime = now;
603
604 TCP_ECN_check_ce(tp, skb);
605
606 if (skb->len >= 128)
607 tcp_grow_window(sk, skb);
608}
609
610
611
612
613
614
615
616
617
618
619static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
620{
621 struct tcp_sock *tp = tcp_sk(sk);
622 long m = mrtt;
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640 if (m == 0)
641 m = 1;
642 if (tp->srtt != 0) {
643 m -= (tp->srtt >> 3);
644 tp->srtt += m;
645 if (m < 0) {
646 m = -m;
647 m -= (tp->mdev >> 2);
648
649
650
651
652
653
654
655
656 if (m > 0)
657 m >>= 3;
658 } else {
659 m -= (tp->mdev >> 2);
660 }
661 tp->mdev += m;
662 if (tp->mdev > tp->mdev_max) {
663 tp->mdev_max = tp->mdev;
664 if (tp->mdev_max > tp->rttvar)
665 tp->rttvar = tp->mdev_max;
666 }
667 if (after(tp->snd_una, tp->rtt_seq)) {
668 if (tp->mdev_max < tp->rttvar)
669 tp->rttvar -= (tp->rttvar - tp->mdev_max) >> 2;
670 tp->rtt_seq = tp->snd_nxt;
671 tp->mdev_max = tcp_rto_min(sk);
672 }
673 } else {
674
675 tp->srtt = m << 3;
676 tp->mdev = m << 1;
677 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
678 tp->rtt_seq = tp->snd_nxt;
679 }
680}
681
682
683
684
685static inline void tcp_set_rto(struct sock *sk)
686{
687 const struct tcp_sock *tp = tcp_sk(sk);
688
689
690
691
692
693
694
695
696
697
698 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
699
700
701
702
703
704
705
706
707
708
709 tcp_bound_rto(sk);
710}
711
712
713
714
715
716void tcp_update_metrics(struct sock *sk)
717{
718 struct tcp_sock *tp = tcp_sk(sk);
719 struct dst_entry *dst = __sk_dst_get(sk);
720
721 if (sysctl_tcp_nometrics_save)
722 return;
723
724 dst_confirm(dst);
725
726 if (dst && (dst->flags & DST_HOST)) {
727 const struct inet_connection_sock *icsk = inet_csk(sk);
728 int m;
729 unsigned long rtt;
730
731 if (icsk->icsk_backoff || !tp->srtt) {
732
733
734
735
736 if (!(dst_metric_locked(dst, RTAX_RTT)))
737 dst_metric_set(dst, RTAX_RTT, 0);
738 return;
739 }
740
741 rtt = dst_metric_rtt(dst, RTAX_RTT);
742 m = rtt - tp->srtt;
743
744
745
746
747
748 if (!(dst_metric_locked(dst, RTAX_RTT))) {
749 if (m <= 0)
750 set_dst_metric_rtt(dst, RTAX_RTT, tp->srtt);
751 else
752 set_dst_metric_rtt(dst, RTAX_RTT, rtt - (m >> 3));
753 }
754
755 if (!(dst_metric_locked(dst, RTAX_RTTVAR))) {
756 unsigned long var;
757 if (m < 0)
758 m = -m;
759
760
761 m >>= 1;
762 if (m < tp->mdev)
763 m = tp->mdev;
764
765 var = dst_metric_rtt(dst, RTAX_RTTVAR);
766 if (m >= var)
767 var = m;
768 else
769 var -= (var - m) >> 2;
770
771 set_dst_metric_rtt(dst, RTAX_RTTVAR, var);
772 }
773
774 if (tcp_in_initial_slowstart(tp)) {
775
776 if (dst_metric(dst, RTAX_SSTHRESH) &&
777 !dst_metric_locked(dst, RTAX_SSTHRESH) &&
778 (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH))
779 dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_cwnd >> 1);
780 if (!dst_metric_locked(dst, RTAX_CWND) &&
781 tp->snd_cwnd > dst_metric(dst, RTAX_CWND))
782 dst_metric_set(dst, RTAX_CWND, tp->snd_cwnd);
783 } else if (tp->snd_cwnd > tp->snd_ssthresh &&
784 icsk->icsk_ca_state == TCP_CA_Open) {
785
786 if (!dst_metric_locked(dst, RTAX_SSTHRESH))
787 dst_metric_set(dst, RTAX_SSTHRESH,
788 max(tp->snd_cwnd >> 1, tp->snd_ssthresh));
789 if (!dst_metric_locked(dst, RTAX_CWND))
790 dst_metric_set(dst, RTAX_CWND,
791 (dst_metric(dst, RTAX_CWND) +
792 tp->snd_cwnd) >> 1);
793 } else {
794
795
796
797 if (!dst_metric_locked(dst, RTAX_CWND))
798 dst_metric_set(dst, RTAX_CWND,
799 (dst_metric(dst, RTAX_CWND) +
800 tp->snd_ssthresh) >> 1);
801 if (dst_metric(dst, RTAX_SSTHRESH) &&
802 !dst_metric_locked(dst, RTAX_SSTHRESH) &&
803 tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH))
804 dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_ssthresh);
805 }
806
807 if (!dst_metric_locked(dst, RTAX_REORDERING)) {
808 if (dst_metric(dst, RTAX_REORDERING) < tp->reordering &&
809 tp->reordering != sysctl_tcp_reordering)
810 dst_metric_set(dst, RTAX_REORDERING, tp->reordering);
811 }
812 }
813}
814
815__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
816{
817 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
818
819 if (!cwnd)
820 cwnd = rfc3390_bytes_to_packets(tp->mss_cache);
821 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
822}
823
824
825void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
826{
827 struct tcp_sock *tp = tcp_sk(sk);
828 const struct inet_connection_sock *icsk = inet_csk(sk);
829
830 tp->prior_ssthresh = 0;
831 tp->bytes_acked = 0;
832 if (icsk->icsk_ca_state < TCP_CA_CWR) {
833 tp->undo_marker = 0;
834 if (set_ssthresh)
835 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
836 tp->snd_cwnd = min(tp->snd_cwnd,
837 tcp_packets_in_flight(tp) + 1U);
838 tp->snd_cwnd_cnt = 0;
839 tp->high_seq = tp->snd_nxt;
840 tp->snd_cwnd_stamp = tcp_time_stamp;
841 TCP_ECN_queue_cwr(tp);
842
843 tcp_set_ca_state(sk, TCP_CA_CWR);
844 }
845}
846
847
848
849
850
851static void tcp_disable_fack(struct tcp_sock *tp)
852{
853
854 if (tcp_is_fack(tp))
855 tp->lost_skb_hint = NULL;
856 tp->rx_opt.sack_ok &= ~2;
857}
858
859
860static void tcp_dsack_seen(struct tcp_sock *tp)
861{
862 tp->rx_opt.sack_ok |= 4;
863}
864
865
866
867static void tcp_init_metrics(struct sock *sk)
868{
869 struct tcp_sock *tp = tcp_sk(sk);
870 struct dst_entry *dst = __sk_dst_get(sk);
871
872 if (dst == NULL)
873 goto reset;
874
875 dst_confirm(dst);
876
877 if (dst_metric_locked(dst, RTAX_CWND))
878 tp->snd_cwnd_clamp = dst_metric(dst, RTAX_CWND);
879 if (dst_metric(dst, RTAX_SSTHRESH)) {
880 tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH);
881 if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
882 tp->snd_ssthresh = tp->snd_cwnd_clamp;
883 }
884 if (dst_metric(dst, RTAX_REORDERING) &&
885 tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
886 tcp_disable_fack(tp);
887 tp->reordering = dst_metric(dst, RTAX_REORDERING);
888 }
889
890 if (dst_metric(dst, RTAX_RTT) == 0)
891 goto reset;
892
893 if (!tp->srtt && dst_metric_rtt(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3))
894 goto reset;
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910 if (dst_metric_rtt(dst, RTAX_RTT) > tp->srtt) {
911 tp->srtt = dst_metric_rtt(dst, RTAX_RTT);
912 tp->rtt_seq = tp->snd_nxt;
913 }
914 if (dst_metric_rtt(dst, RTAX_RTTVAR) > tp->mdev) {
915 tp->mdev = dst_metric_rtt(dst, RTAX_RTTVAR);
916 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
917 }
918 tcp_set_rto(sk);
919 if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) {
920reset:
921
922
923
924
925 if (!tp->rx_opt.saw_tstamp && tp->srtt) {
926 tp->srtt = 0;
927 tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
928 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
929 }
930 }
931 tp->snd_cwnd = tcp_init_cwnd(tp, dst);
932 tp->snd_cwnd_stamp = tcp_time_stamp;
933}
934
935static void tcp_update_reordering(struct sock *sk, const int metric,
936 const int ts)
937{
938 struct tcp_sock *tp = tcp_sk(sk);
939 if (metric > tp->reordering) {
940 int mib_idx;
941
942 tp->reordering = min(TCP_MAX_REORDERING, metric);
943
944
945 if (ts)
946 mib_idx = LINUX_MIB_TCPTSREORDER;
947 else if (tcp_is_reno(tp))
948 mib_idx = LINUX_MIB_TCPRENOREORDER;
949 else if (tcp_is_fack(tp))
950 mib_idx = LINUX_MIB_TCPFACKREORDER;
951 else
952 mib_idx = LINUX_MIB_TCPSACKREORDER;
953
954 NET_INC_STATS_BH(sock_net(sk), mib_idx);
955#if FASTRETRANS_DEBUG > 1
956 printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
957 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
958 tp->reordering,
959 tp->fackets_out,
960 tp->sacked_out,
961 tp->undo_marker ? tp->undo_retrans : 0);
962#endif
963 tcp_disable_fack(tp);
964 }
965}
966
967
968static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
969{
970 if ((tp->retransmit_skb_hint == NULL) ||
971 before(TCP_SKB_CB(skb)->seq,
972 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
973 tp->retransmit_skb_hint = skb;
974
975 if (!tp->lost_out ||
976 after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high))
977 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
978}
979
980static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
981{
982 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
983 tcp_verify_retransmit_hint(tp, skb);
984
985 tp->lost_out += tcp_skb_pcount(skb);
986 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
987 }
988}
989
990static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
991 struct sk_buff *skb)
992{
993 tcp_verify_retransmit_hint(tp, skb);
994
995 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
996 tp->lost_out += tcp_skb_pcount(skb);
997 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
998 }
999}
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
1098 u32 start_seq, u32 end_seq)
1099{
1100
1101 if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
1102 return 0;
1103
1104
1105 if (!before(start_seq, tp->snd_nxt))
1106 return 0;
1107
1108
1109
1110
1111 if (after(start_seq, tp->snd_una))
1112 return 1;
1113
1114 if (!is_dsack || !tp->undo_marker)
1115 return 0;
1116
1117
1118 if (!after(end_seq, tp->snd_una))
1119 return 0;
1120
1121 if (!before(start_seq, tp->undo_marker))
1122 return 1;
1123
1124
1125 if (!after(end_seq, tp->undo_marker))
1126 return 0;
1127
1128
1129
1130
1131 return !before(start_seq, end_seq - tp->max_window);
1132}
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143static void tcp_mark_lost_retrans(struct sock *sk)
1144{
1145 const struct inet_connection_sock *icsk = inet_csk(sk);
1146 struct tcp_sock *tp = tcp_sk(sk);
1147 struct sk_buff *skb;
1148 int cnt = 0;
1149 u32 new_low_seq = tp->snd_nxt;
1150 u32 received_upto = tcp_highest_sack_seq(tp);
1151
1152 if (!tcp_is_fack(tp) || !tp->retrans_out ||
1153 !after(received_upto, tp->lost_retrans_low) ||
1154 icsk->icsk_ca_state != TCP_CA_Recovery)
1155 return;
1156
1157 tcp_for_write_queue(skb, sk) {
1158 u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
1159
1160 if (skb == tcp_send_head(sk))
1161 break;
1162 if (cnt == tp->retrans_out)
1163 break;
1164 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1165 continue;
1166
1167 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS))
1168 continue;
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181 if (after(received_upto, ack_seq)) {
1182 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1183 tp->retrans_out -= tcp_skb_pcount(skb);
1184
1185 tcp_skb_mark_lost_uncond_verify(tp, skb);
1186 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
1187 } else {
1188 if (before(ack_seq, new_low_seq))
1189 new_low_seq = ack_seq;
1190 cnt += tcp_skb_pcount(skb);
1191 }
1192 }
1193
1194 if (tp->retrans_out)
1195 tp->lost_retrans_low = new_low_seq;
1196}
1197
1198static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb,
1199 struct tcp_sack_block_wire *sp, int num_sacks,
1200 u32 prior_snd_una)
1201{
1202 struct tcp_sock *tp = tcp_sk(sk);
1203 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
1204 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
1205 int dup_sack = 0;
1206
1207 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1208 dup_sack = 1;
1209 tcp_dsack_seen(tp);
1210 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
1211 } else if (num_sacks > 1) {
1212 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
1213 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
1214
1215 if (!after(end_seq_0, end_seq_1) &&
1216 !before(start_seq_0, start_seq_1)) {
1217 dup_sack = 1;
1218 tcp_dsack_seen(tp);
1219 NET_INC_STATS_BH(sock_net(sk),
1220 LINUX_MIB_TCPDSACKOFORECV);
1221 }
1222 }
1223
1224
1225 if (dup_sack && tp->undo_marker && tp->undo_retrans &&
1226 !after(end_seq_0, prior_snd_una) &&
1227 after(end_seq_0, tp->undo_marker))
1228 tp->undo_retrans--;
1229
1230 return dup_sack;
1231}
1232
1233struct tcp_sacktag_state {
1234 int reord;
1235 int fack_count;
1236 int flag;
1237};
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1248 u32 start_seq, u32 end_seq)
1249{
1250 int in_sack, err;
1251 unsigned int pkt_len;
1252 unsigned int mss;
1253
1254 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1255 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1256
1257 if (tcp_skb_pcount(skb) > 1 && !in_sack &&
1258 after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
1259 mss = tcp_skb_mss(skb);
1260 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1261
1262 if (!in_sack) {
1263 pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
1264 if (pkt_len < mss)
1265 pkt_len = mss;
1266 } else {
1267 pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
1268 if (pkt_len < mss)
1269 return -EINVAL;
1270 }
1271
1272
1273
1274
1275 if (pkt_len > mss) {
1276 unsigned int new_len = (pkt_len / mss) * mss;
1277 if (!in_sack && new_len < pkt_len) {
1278 new_len += mss;
1279 if (new_len > skb->len)
1280 return 0;
1281 }
1282 pkt_len = new_len;
1283 }
1284 err = tcp_fragment(sk, skb, pkt_len, mss);
1285 if (err < 0)
1286 return err;
1287 }
1288
1289 return in_sack;
1290}
1291
1292static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
1293 struct tcp_sacktag_state *state,
1294 int dup_sack, int pcount)
1295{
1296 struct tcp_sock *tp = tcp_sk(sk);
1297 u8 sacked = TCP_SKB_CB(skb)->sacked;
1298 int fack_count = state->fack_count;
1299
1300
1301 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1302 if (tp->undo_marker && tp->undo_retrans &&
1303 after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
1304 tp->undo_retrans--;
1305 if (sacked & TCPCB_SACKED_ACKED)
1306 state->reord = min(fack_count, state->reord);
1307 }
1308
1309
1310 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1311 return sacked;
1312
1313 if (!(sacked & TCPCB_SACKED_ACKED)) {
1314 if (sacked & TCPCB_SACKED_RETRANS) {
1315
1316
1317
1318
1319 if (sacked & TCPCB_LOST) {
1320 sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1321 tp->lost_out -= pcount;
1322 tp->retrans_out -= pcount;
1323 }
1324 } else {
1325 if (!(sacked & TCPCB_RETRANS)) {
1326
1327
1328
1329 if (before(TCP_SKB_CB(skb)->seq,
1330 tcp_highest_sack_seq(tp)))
1331 state->reord = min(fack_count,
1332 state->reord);
1333
1334
1335 if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))
1336 state->flag |= FLAG_ONLY_ORIG_SACKED;
1337 }
1338
1339 if (sacked & TCPCB_LOST) {
1340 sacked &= ~TCPCB_LOST;
1341 tp->lost_out -= pcount;
1342 }
1343 }
1344
1345 sacked |= TCPCB_SACKED_ACKED;
1346 state->flag |= FLAG_DATA_SACKED;
1347 tp->sacked_out += pcount;
1348
1349 fack_count += pcount;
1350
1351
1352 if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
1353 before(TCP_SKB_CB(skb)->seq,
1354 TCP_SKB_CB(tp->lost_skb_hint)->seq))
1355 tp->lost_cnt_hint += pcount;
1356
1357 if (fack_count > tp->fackets_out)
1358 tp->fackets_out = fack_count;
1359 }
1360
1361
1362
1363
1364
1365 if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
1366 sacked &= ~TCPCB_SACKED_RETRANS;
1367 tp->retrans_out -= pcount;
1368 }
1369
1370 return sacked;
1371}
1372
1373static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1374 struct tcp_sacktag_state *state,
1375 unsigned int pcount, int shifted, int mss,
1376 int dup_sack)
1377{
1378 struct tcp_sock *tp = tcp_sk(sk);
1379 struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
1380
1381 BUG_ON(!pcount);
1382
1383
1384 if (!tcp_is_fack(tp) && tcp_is_sack(tp) && tp->lost_skb_hint &&
1385 !before(TCP_SKB_CB(tp->lost_skb_hint)->seq, TCP_SKB_CB(skb)->seq))
1386 tp->lost_cnt_hint += pcount;
1387
1388 TCP_SKB_CB(prev)->end_seq += shifted;
1389 TCP_SKB_CB(skb)->seq += shifted;
1390
1391 skb_shinfo(prev)->gso_segs += pcount;
1392 BUG_ON(skb_shinfo(skb)->gso_segs < pcount);
1393 skb_shinfo(skb)->gso_segs -= pcount;
1394
1395
1396
1397
1398
1399
1400 if (!skb_shinfo(prev)->gso_size) {
1401 skb_shinfo(prev)->gso_size = mss;
1402 skb_shinfo(prev)->gso_type = sk->sk_gso_type;
1403 }
1404
1405
1406 if (skb_shinfo(skb)->gso_segs <= 1) {
1407 skb_shinfo(skb)->gso_size = 0;
1408 skb_shinfo(skb)->gso_type = 0;
1409 }
1410
1411
1412 tcp_sacktag_one(skb, sk, state, dup_sack, pcount);
1413
1414
1415 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
1416
1417 if (skb->len > 0) {
1418 BUG_ON(!tcp_skb_pcount(skb));
1419 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED);
1420 return 0;
1421 }
1422
1423
1424
1425 if (skb == tp->retransmit_skb_hint)
1426 tp->retransmit_skb_hint = prev;
1427 if (skb == tp->scoreboard_skb_hint)
1428 tp->scoreboard_skb_hint = prev;
1429 if (skb == tp->lost_skb_hint) {
1430 tp->lost_skb_hint = prev;
1431 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1432 }
1433
1434 TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags;
1435 if (skb == tcp_highest_sack(sk))
1436 tcp_advance_highest_sack(sk, skb);
1437
1438 tcp_unlink_write_queue(skb, sk);
1439 sk_wmem_free_skb(sk, skb);
1440
1441 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED);
1442
1443 return 1;
1444}
1445
1446
1447
1448
1449static int tcp_skb_seglen(struct sk_buff *skb)
1450{
1451 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
1452}
1453
1454
1455static int skb_can_shift(struct sk_buff *skb)
1456{
1457 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1458}
1459
1460
1461
1462
1463static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1464 struct tcp_sacktag_state *state,
1465 u32 start_seq, u32 end_seq,
1466 int dup_sack)
1467{
1468 struct tcp_sock *tp = tcp_sk(sk);
1469 struct sk_buff *prev;
1470 int mss;
1471 int pcount = 0;
1472 int len;
1473 int in_sack;
1474
1475 if (!sk_can_gso(sk))
1476 goto fallback;
1477
1478
1479 if (!dup_sack &&
1480 (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
1481 goto fallback;
1482 if (!skb_can_shift(skb))
1483 goto fallback;
1484
1485 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1486 goto fallback;
1487
1488
1489 if (unlikely(skb == tcp_write_queue_head(sk)))
1490 goto fallback;
1491 prev = tcp_write_queue_prev(sk, skb);
1492
1493 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1494 goto fallback;
1495
1496 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1497 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1498
1499 if (in_sack) {
1500 len = skb->len;
1501 pcount = tcp_skb_pcount(skb);
1502 mss = tcp_skb_seglen(skb);
1503
1504
1505
1506
1507 if (mss != tcp_skb_seglen(prev))
1508 goto fallback;
1509 } else {
1510 if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
1511 goto noop;
1512
1513
1514
1515
1516 if (tcp_skb_pcount(skb) <= 1)
1517 goto noop;
1518
1519 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1520 if (!in_sack) {
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532 goto fallback;
1533 }
1534
1535 len = end_seq - TCP_SKB_CB(skb)->seq;
1536 BUG_ON(len < 0);
1537 BUG_ON(len > skb->len);
1538
1539
1540
1541
1542
1543 mss = tcp_skb_mss(skb);
1544
1545
1546
1547
1548 if (mss != tcp_skb_seglen(prev))
1549 goto fallback;
1550
1551 if (len == mss) {
1552 pcount = 1;
1553 } else if (len < mss) {
1554 goto noop;
1555 } else {
1556 pcount = len / mss;
1557 len = pcount * mss;
1558 }
1559 }
1560
1561 if (!skb_shift(prev, skb, len))
1562 goto fallback;
1563 if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
1564 goto out;
1565
1566
1567
1568
1569 if (prev == tcp_write_queue_tail(sk))
1570 goto out;
1571 skb = tcp_write_queue_next(sk, prev);
1572
1573 if (!skb_can_shift(skb) ||
1574 (skb == tcp_send_head(sk)) ||
1575 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1576 (mss != tcp_skb_seglen(skb)))
1577 goto out;
1578
1579 len = skb->len;
1580 if (skb_shift(prev, skb, len)) {
1581 pcount += tcp_skb_pcount(skb);
1582 tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
1583 }
1584
1585out:
1586 state->fack_count += pcount;
1587 return prev;
1588
1589noop:
1590 return skb;
1591
1592fallback:
1593 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
1594 return NULL;
1595}
1596
1597static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1598 struct tcp_sack_block *next_dup,
1599 struct tcp_sacktag_state *state,
1600 u32 start_seq, u32 end_seq,
1601 int dup_sack_in)
1602{
1603 struct tcp_sock *tp = tcp_sk(sk);
1604 struct sk_buff *tmp;
1605
1606 tcp_for_write_queue_from(skb, sk) {
1607 int in_sack = 0;
1608 int dup_sack = dup_sack_in;
1609
1610 if (skb == tcp_send_head(sk))
1611 break;
1612
1613
1614 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1615 break;
1616
1617 if ((next_dup != NULL) &&
1618 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
1619 in_sack = tcp_match_skb_to_sack(sk, skb,
1620 next_dup->start_seq,
1621 next_dup->end_seq);
1622 if (in_sack > 0)
1623 dup_sack = 1;
1624 }
1625
1626
1627
1628
1629
1630 if (in_sack <= 0) {
1631 tmp = tcp_shift_skb_data(sk, skb, state,
1632 start_seq, end_seq, dup_sack);
1633 if (tmp != NULL) {
1634 if (tmp != skb) {
1635 skb = tmp;
1636 continue;
1637 }
1638
1639 in_sack = 0;
1640 } else {
1641 in_sack = tcp_match_skb_to_sack(sk, skb,
1642 start_seq,
1643 end_seq);
1644 }
1645 }
1646
1647 if (unlikely(in_sack < 0))
1648 break;
1649
1650 if (in_sack) {
1651 TCP_SKB_CB(skb)->sacked = tcp_sacktag_one(skb, sk,
1652 state,
1653 dup_sack,
1654 tcp_skb_pcount(skb));
1655
1656 if (!before(TCP_SKB_CB(skb)->seq,
1657 tcp_highest_sack_seq(tp)))
1658 tcp_advance_highest_sack(sk, skb);
1659 }
1660
1661 state->fack_count += tcp_skb_pcount(skb);
1662 }
1663 return skb;
1664}
1665
1666
1667
1668
1669static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1670 struct tcp_sacktag_state *state,
1671 u32 skip_to_seq)
1672{
1673 tcp_for_write_queue_from(skb, sk) {
1674 if (skb == tcp_send_head(sk))
1675 break;
1676
1677 if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
1678 break;
1679
1680 state->fack_count += tcp_skb_pcount(skb);
1681 }
1682 return skb;
1683}
1684
1685static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1686 struct sock *sk,
1687 struct tcp_sack_block *next_dup,
1688 struct tcp_sacktag_state *state,
1689 u32 skip_to_seq)
1690{
1691 if (next_dup == NULL)
1692 return skb;
1693
1694 if (before(next_dup->start_seq, skip_to_seq)) {
1695 skb = tcp_sacktag_skip(skb, sk, state, next_dup->start_seq);
1696 skb = tcp_sacktag_walk(skb, sk, NULL, state,
1697 next_dup->start_seq, next_dup->end_seq,
1698 1);
1699 }
1700
1701 return skb;
1702}
1703
1704static int tcp_sack_cache_ok(struct tcp_sock *tp, struct tcp_sack_block *cache)
1705{
1706 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1707}
1708
1709static int
1710tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
1711 u32 prior_snd_una)
1712{
1713 const struct inet_connection_sock *icsk = inet_csk(sk);
1714 struct tcp_sock *tp = tcp_sk(sk);
1715 unsigned char *ptr = (skb_transport_header(ack_skb) +
1716 TCP_SKB_CB(ack_skb)->sacked);
1717 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1718 struct tcp_sack_block sp[TCP_NUM_SACKS];
1719 struct tcp_sack_block *cache;
1720 struct tcp_sacktag_state state;
1721 struct sk_buff *skb;
1722 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1723 int used_sacks;
1724 int found_dup_sack = 0;
1725 int i, j;
1726 int first_sack_index;
1727
1728 state.flag = 0;
1729 state.reord = tp->packets_out;
1730
1731 if (!tp->sacked_out) {
1732 if (WARN_ON(tp->fackets_out))
1733 tp->fackets_out = 0;
1734 tcp_highest_sack_reset(sk);
1735 }
1736
1737 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1738 num_sacks, prior_snd_una);
1739 if (found_dup_sack)
1740 state.flag |= FLAG_DSACKING_ACK;
1741
1742
1743
1744
1745
1746 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
1747 return 0;
1748
1749 if (!tp->packets_out)
1750 goto out;
1751
1752 used_sacks = 0;
1753 first_sack_index = 0;
1754 for (i = 0; i < num_sacks; i++) {
1755 int dup_sack = !i && found_dup_sack;
1756
1757 sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
1758 sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
1759
1760 if (!tcp_is_sackblock_valid(tp, dup_sack,
1761 sp[used_sacks].start_seq,
1762 sp[used_sacks].end_seq)) {
1763 int mib_idx;
1764
1765 if (dup_sack) {
1766 if (!tp->undo_marker)
1767 mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
1768 else
1769 mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
1770 } else {
1771
1772 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1773 !after(sp[used_sacks].end_seq, tp->snd_una))
1774 continue;
1775 mib_idx = LINUX_MIB_TCPSACKDISCARD;
1776 }
1777
1778 NET_INC_STATS_BH(sock_net(sk), mib_idx);
1779 if (i == 0)
1780 first_sack_index = -1;
1781 continue;
1782 }
1783
1784
1785 if (!after(sp[used_sacks].end_seq, prior_snd_una))
1786 continue;
1787
1788 used_sacks++;
1789 }
1790
1791
1792 for (i = used_sacks - 1; i > 0; i--) {
1793 for (j = 0; j < i; j++) {
1794 if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
1795 swap(sp[j], sp[j + 1]);
1796
1797
1798 if (j == first_sack_index)
1799 first_sack_index = j + 1;
1800 }
1801 }
1802 }
1803
1804 skb = tcp_write_queue_head(sk);
1805 state.fack_count = 0;
1806 i = 0;
1807
1808 if (!tp->sacked_out) {
1809
1810 cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1811 } else {
1812 cache = tp->recv_sack_cache;
1813
1814 while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
1815 !cache->end_seq)
1816 cache++;
1817 }
1818
1819 while (i < used_sacks) {
1820 u32 start_seq = sp[i].start_seq;
1821 u32 end_seq = sp[i].end_seq;
1822 int dup_sack = (found_dup_sack && (i == first_sack_index));
1823 struct tcp_sack_block *next_dup = NULL;
1824
1825 if (found_dup_sack && ((i + 1) == first_sack_index))
1826 next_dup = &sp[i + 1];
1827
1828
1829 if (after(end_seq, tp->high_seq))
1830 state.flag |= FLAG_DATA_LOST;
1831
1832
1833 while (tcp_sack_cache_ok(tp, cache) &&
1834 !before(start_seq, cache->end_seq))
1835 cache++;
1836
1837
1838 if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
1839 after(end_seq, cache->start_seq)) {
1840
1841
1842 if (before(start_seq, cache->start_seq)) {
1843 skb = tcp_sacktag_skip(skb, sk, &state,
1844 start_seq);
1845 skb = tcp_sacktag_walk(skb, sk, next_dup,
1846 &state,
1847 start_seq,
1848 cache->start_seq,
1849 dup_sack);
1850 }
1851
1852
1853 if (!after(end_seq, cache->end_seq))
1854 goto advance_sp;
1855
1856 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1857 &state,
1858 cache->end_seq);
1859
1860
1861 if (tcp_highest_sack_seq(tp) == cache->end_seq) {
1862
1863 skb = tcp_highest_sack(sk);
1864 if (skb == NULL)
1865 break;
1866 state.fack_count = tp->fackets_out;
1867 cache++;
1868 goto walk;
1869 }
1870
1871 skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq);
1872
1873 cache++;
1874 continue;
1875 }
1876
1877 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
1878 skb = tcp_highest_sack(sk);
1879 if (skb == NULL)
1880 break;
1881 state.fack_count = tp->fackets_out;
1882 }
1883 skb = tcp_sacktag_skip(skb, sk, &state, start_seq);
1884
1885walk:
1886 skb = tcp_sacktag_walk(skb, sk, next_dup, &state,
1887 start_seq, end_seq, dup_sack);
1888
1889advance_sp:
1890
1891
1892
1893 if (after(end_seq, tp->frto_highmark))
1894 state.flag &= ~FLAG_ONLY_ORIG_SACKED;
1895
1896 i++;
1897 }
1898
1899
1900 for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
1901 tp->recv_sack_cache[i].start_seq = 0;
1902 tp->recv_sack_cache[i].end_seq = 0;
1903 }
1904 for (j = 0; j < used_sacks; j++)
1905 tp->recv_sack_cache[i++] = sp[j];
1906
1907 tcp_mark_lost_retrans(sk);
1908
1909 tcp_verify_left_out(tp);
1910
1911 if ((state.reord < tp->fackets_out) &&
1912 ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) &&
1913 (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
1914 tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
1915
1916out:
1917
1918#if FASTRETRANS_DEBUG > 0
1919 WARN_ON((int)tp->sacked_out < 0);
1920 WARN_ON((int)tp->lost_out < 0);
1921 WARN_ON((int)tp->retrans_out < 0);
1922 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1923#endif
1924 return state.flag;
1925}
1926
1927
1928
1929
1930static int tcp_limit_reno_sacked(struct tcp_sock *tp)
1931{
1932 u32 holes;
1933
1934 holes = max(tp->lost_out, 1U);
1935 holes = min(holes, tp->packets_out);
1936
1937 if ((tp->sacked_out + holes) > tp->packets_out) {
1938 tp->sacked_out = tp->packets_out - holes;
1939 return 1;
1940 }
1941 return 0;
1942}
1943
1944
1945
1946
1947
1948static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1949{
1950 struct tcp_sock *tp = tcp_sk(sk);
1951 if (tcp_limit_reno_sacked(tp))
1952 tcp_update_reordering(sk, tp->packets_out + addend, 0);
1953}
1954
1955
1956
1957static void tcp_add_reno_sack(struct sock *sk)
1958{
1959 struct tcp_sock *tp = tcp_sk(sk);
1960 tp->sacked_out++;
1961 tcp_check_reno_reordering(sk, 0);
1962 tcp_verify_left_out(tp);
1963}
1964
1965
1966
1967static void tcp_remove_reno_sacks(struct sock *sk, int acked)
1968{
1969 struct tcp_sock *tp = tcp_sk(sk);
1970
1971 if (acked > 0) {
1972
1973 if (acked - 1 >= tp->sacked_out)
1974 tp->sacked_out = 0;
1975 else
1976 tp->sacked_out -= acked - 1;
1977 }
1978 tcp_check_reno_reordering(sk, acked);
1979 tcp_verify_left_out(tp);
1980}
1981
1982static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1983{
1984 tp->sacked_out = 0;
1985}
1986
1987static int tcp_is_sackfrto(const struct tcp_sock *tp)
1988{
1989 return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp);
1990}
1991
1992
1993
1994
1995int tcp_use_frto(struct sock *sk)
1996{
1997 const struct tcp_sock *tp = tcp_sk(sk);
1998 const struct inet_connection_sock *icsk = inet_csk(sk);
1999 struct sk_buff *skb;
2000
2001 if (!sysctl_tcp_frto)
2002 return 0;
2003
2004
2005 if (icsk->icsk_mtup.probe_size)
2006 return 0;
2007
2008 if (tcp_is_sackfrto(tp))
2009 return 1;
2010
2011
2012 if (tp->retrans_out > 1)
2013 return 0;
2014
2015 skb = tcp_write_queue_head(sk);
2016 if (tcp_skb_is_last(sk, skb))
2017 return 1;
2018 skb = tcp_write_queue_next(sk, skb);
2019 tcp_for_write_queue_from(skb, sk) {
2020 if (skb == tcp_send_head(sk))
2021 break;
2022 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2023 return 0;
2024
2025 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2026 break;
2027 }
2028 return 1;
2029}
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043void tcp_enter_frto(struct sock *sk)
2044{
2045 const struct inet_connection_sock *icsk = inet_csk(sk);
2046 struct tcp_sock *tp = tcp_sk(sk);
2047 struct sk_buff *skb;
2048
2049 if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) ||
2050 tp->snd_una == tp->high_seq ||
2051 ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&
2052 !icsk->icsk_retransmits)) {
2053 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063 if (tp->frto_counter) {
2064 u32 stored_cwnd;
2065 stored_cwnd = tp->snd_cwnd;
2066 tp->snd_cwnd = 2;
2067 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2068 tp->snd_cwnd = stored_cwnd;
2069 } else {
2070 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2071 }
2072
2073
2074
2075
2076
2077
2078
2079 tcp_ca_event(sk, CA_EVENT_FRTO);
2080 }
2081
2082 tp->undo_marker = tp->snd_una;
2083 tp->undo_retrans = 0;
2084
2085 skb = tcp_write_queue_head(sk);
2086 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2087 tp->undo_marker = 0;
2088 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2089 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2090 tp->retrans_out -= tcp_skb_pcount(skb);
2091 }
2092 tcp_verify_left_out(tp);
2093
2094
2095 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
2096
2097
2098
2099
2100 if (tcp_is_sackfrto(tp) && (tp->frto_counter ||
2101 ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
2102 after(tp->high_seq, tp->snd_una)) {
2103 tp->frto_highmark = tp->high_seq;
2104 } else {
2105 tp->frto_highmark = tp->snd_nxt;
2106 }
2107 tcp_set_ca_state(sk, TCP_CA_Disorder);
2108 tp->high_seq = tp->snd_nxt;
2109 tp->frto_counter = 1;
2110}
2111
2112
2113
2114
2115
2116static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
2117{
2118 struct tcp_sock *tp = tcp_sk(sk);
2119 struct sk_buff *skb;
2120
2121 tp->lost_out = 0;
2122 tp->retrans_out = 0;
2123 if (tcp_is_reno(tp))
2124 tcp_reset_reno_sack(tp);
2125
2126 tcp_for_write_queue(skb, sk) {
2127 if (skb == tcp_send_head(sk))
2128 break;
2129
2130 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2131
2132
2133
2134
2135 if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) {
2136
2137 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
2138 tp->retrans_out += tcp_skb_pcount(skb);
2139
2140 flag |= FLAG_DATA_ACKED;
2141 } else {
2142 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2143 tp->undo_marker = 0;
2144 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2145 }
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2157 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
2158 tp->lost_out += tcp_skb_pcount(skb);
2159 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
2160 }
2161 }
2162 tcp_verify_left_out(tp);
2163
2164 tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
2165 tp->snd_cwnd_cnt = 0;
2166 tp->snd_cwnd_stamp = tcp_time_stamp;
2167 tp->frto_counter = 0;
2168 tp->bytes_acked = 0;
2169
2170 tp->reordering = min_t(unsigned int, tp->reordering,
2171 sysctl_tcp_reordering);
2172 tcp_set_ca_state(sk, TCP_CA_Loss);
2173 tp->high_seq = tp->snd_nxt;
2174 TCP_ECN_queue_cwr(tp);
2175
2176 tcp_clear_all_retrans_hints(tp);
2177}
2178
2179static void tcp_clear_retrans_partial(struct tcp_sock *tp)
2180{
2181 tp->retrans_out = 0;
2182 tp->lost_out = 0;
2183
2184 tp->undo_marker = 0;
2185 tp->undo_retrans = 0;
2186}
2187
2188void tcp_clear_retrans(struct tcp_sock *tp)
2189{
2190 tcp_clear_retrans_partial(tp);
2191
2192 tp->fackets_out = 0;
2193 tp->sacked_out = 0;
2194}
2195
2196
2197
2198
2199
2200void tcp_enter_loss(struct sock *sk, int how)
2201{
2202 const struct inet_connection_sock *icsk = inet_csk(sk);
2203 struct tcp_sock *tp = tcp_sk(sk);
2204 struct sk_buff *skb;
2205
2206
2207 if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq ||
2208 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
2209 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2210 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2211 tcp_ca_event(sk, CA_EVENT_LOSS);
2212 }
2213 tp->snd_cwnd = 1;
2214 tp->snd_cwnd_cnt = 0;
2215 tp->snd_cwnd_stamp = tcp_time_stamp;
2216
2217 tp->bytes_acked = 0;
2218 tcp_clear_retrans_partial(tp);
2219
2220 if (tcp_is_reno(tp))
2221 tcp_reset_reno_sack(tp);
2222
2223 if (!how) {
2224
2225
2226 tp->undo_marker = tp->snd_una;
2227 } else {
2228 tp->sacked_out = 0;
2229 tp->fackets_out = 0;
2230 }
2231 tcp_clear_all_retrans_hints(tp);
2232
2233 tcp_for_write_queue(skb, sk) {
2234 if (skb == tcp_send_head(sk))
2235 break;
2236
2237 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2238 tp->undo_marker = 0;
2239 TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
2240 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
2241 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
2242 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
2243 tp->lost_out += tcp_skb_pcount(skb);
2244 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
2245 }
2246 }
2247 tcp_verify_left_out(tp);
2248
2249 tp->reordering = min_t(unsigned int, tp->reordering,
2250 sysctl_tcp_reordering);
2251 tcp_set_ca_state(sk, TCP_CA_Loss);
2252 tp->high_seq = tp->snd_nxt;
2253 TCP_ECN_queue_cwr(tp);
2254
2255 tp->frto_counter = 0;
2256}
2257
2258
2259
2260
2261
2262
2263
2264static int tcp_check_sack_reneging(struct sock *sk, int flag)
2265{
2266 if (flag & FLAG_SACK_RENEGING) {
2267 struct inet_connection_sock *icsk = inet_csk(sk);
2268 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
2269
2270 tcp_enter_loss(sk, 1);
2271 icsk->icsk_retransmits++;
2272 tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
2273 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2274 icsk->icsk_rto, TCP_RTO_MAX);
2275 return 1;
2276 }
2277 return 0;
2278}
2279
2280static inline int tcp_fackets_out(struct tcp_sock *tp)
2281{
2282 return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
2283}
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300static inline int tcp_dupack_heuristics(struct tcp_sock *tp)
2301{
2302 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
2303}
2304
2305static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
2306{
2307 return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto;
2308}
2309
2310static inline int tcp_head_timedout(struct sock *sk)
2311{
2312 struct tcp_sock *tp = tcp_sk(sk);
2313
2314 return tp->packets_out &&
2315 tcp_skb_timedout(sk, tcp_write_queue_head(sk));
2316}
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411static int tcp_time_to_recover(struct sock *sk)
2412{
2413 struct tcp_sock *tp = tcp_sk(sk);
2414 __u32 packets_out;
2415
2416
2417 if (tp->frto_counter)
2418 return 0;
2419
2420
2421 if (tp->lost_out)
2422 return 1;
2423
2424
2425 if (tcp_dupack_heuristics(tp) > tp->reordering)
2426 return 1;
2427
2428
2429
2430
2431 if (tcp_is_fack(tp) && tcp_head_timedout(sk))
2432 return 1;
2433
2434
2435
2436
2437 packets_out = tp->packets_out;
2438 if (packets_out <= tp->reordering &&
2439 tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
2440 !tcp_may_send_now(sk)) {
2441
2442
2443
2444 return 1;
2445 }
2446
2447
2448
2449
2450
2451
2452 if ((tp->thin_dupack || sysctl_tcp_thin_dupack) &&
2453 tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 &&
2454 tcp_is_sack(tp) && !tcp_send_head(sk))
2455 return 1;
2456
2457 return 0;
2458}
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472static void tcp_timeout_skbs(struct sock *sk)
2473{
2474 struct tcp_sock *tp = tcp_sk(sk);
2475 struct sk_buff *skb;
2476
2477 if (!tcp_is_fack(tp) || !tcp_head_timedout(sk))
2478 return;
2479
2480 skb = tp->scoreboard_skb_hint;
2481 if (tp->scoreboard_skb_hint == NULL)
2482 skb = tcp_write_queue_head(sk);
2483
2484 tcp_for_write_queue_from(skb, sk) {
2485 if (skb == tcp_send_head(sk))
2486 break;
2487 if (!tcp_skb_timedout(sk, skb))
2488 break;
2489
2490 tcp_skb_mark_lost(tp, skb);
2491 }
2492
2493 tp->scoreboard_skb_hint = skb;
2494
2495 tcp_verify_left_out(tp);
2496}
2497
2498
2499
2500
2501static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2502{
2503 struct tcp_sock *tp = tcp_sk(sk);
2504 struct sk_buff *skb;
2505 int cnt, oldcnt;
2506 int err;
2507 unsigned int mss;
2508
2509 WARN_ON(packets > tp->packets_out);
2510 if (tp->lost_skb_hint) {
2511 skb = tp->lost_skb_hint;
2512 cnt = tp->lost_cnt_hint;
2513
2514 if (mark_head && skb != tcp_write_queue_head(sk))
2515 return;
2516 } else {
2517 skb = tcp_write_queue_head(sk);
2518 cnt = 0;
2519 }
2520
2521 tcp_for_write_queue_from(skb, sk) {
2522 if (skb == tcp_send_head(sk))
2523 break;
2524
2525
2526 tp->lost_skb_hint = skb;
2527 tp->lost_cnt_hint = cnt;
2528
2529 if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
2530 break;
2531
2532 oldcnt = cnt;
2533 if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
2534 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2535 cnt += tcp_skb_pcount(skb);
2536
2537 if (cnt > packets) {
2538 if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
2539 (oldcnt >= packets))
2540 break;
2541
2542 mss = skb_shinfo(skb)->gso_size;
2543 err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss);
2544 if (err < 0)
2545 break;
2546 cnt = packets;
2547 }
2548
2549 tcp_skb_mark_lost(tp, skb);
2550
2551 if (mark_head)
2552 break;
2553 }
2554 tcp_verify_left_out(tp);
2555}
2556
2557
2558
2559static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2560{
2561 struct tcp_sock *tp = tcp_sk(sk);
2562
2563 if (tcp_is_reno(tp)) {
2564 tcp_mark_head_lost(sk, 1, 1);
2565 } else if (tcp_is_fack(tp)) {
2566 int lost = tp->fackets_out - tp->reordering;
2567 if (lost <= 0)
2568 lost = 1;
2569 tcp_mark_head_lost(sk, lost, 0);
2570 } else {
2571 int sacked_upto = tp->sacked_out - tp->reordering;
2572 if (sacked_upto >= 0)
2573 tcp_mark_head_lost(sk, sacked_upto, 0);
2574 else if (fast_rexmit)
2575 tcp_mark_head_lost(sk, 1, 1);
2576 }
2577
2578 tcp_timeout_skbs(sk);
2579}
2580
2581
2582
2583
2584static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
2585{
2586 tp->snd_cwnd = min(tp->snd_cwnd,
2587 tcp_packets_in_flight(tp) + tcp_max_burst(tp));
2588 tp->snd_cwnd_stamp = tcp_time_stamp;
2589}
2590
2591
2592
2593
2594static inline u32 tcp_cwnd_min(const struct sock *sk)
2595{
2596 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
2597
2598 return ca_ops->min_cwnd ? ca_ops->min_cwnd(sk) : tcp_sk(sk)->snd_ssthresh;
2599}
2600
2601
2602static void tcp_cwnd_down(struct sock *sk, int flag)
2603{
2604 struct tcp_sock *tp = tcp_sk(sk);
2605 int decr = tp->snd_cwnd_cnt + 1;
2606
2607 if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) ||
2608 (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) {
2609 tp->snd_cwnd_cnt = decr & 1;
2610 decr >>= 1;
2611
2612 if (decr && tp->snd_cwnd > tcp_cwnd_min(sk))
2613 tp->snd_cwnd -= decr;
2614
2615 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
2616 tp->snd_cwnd_stamp = tcp_time_stamp;
2617 }
2618}
2619
2620
2621
2622
2623static inline int tcp_packet_delayed(struct tcp_sock *tp)
2624{
2625 return !tp->retrans_stamp ||
2626 (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2627 before(tp->rx_opt.rcv_tsecr, tp->retrans_stamp));
2628}
2629
2630
2631
2632#if FASTRETRANS_DEBUG > 1
2633static void DBGUNDO(struct sock *sk, const char *msg)
2634{
2635 struct tcp_sock *tp = tcp_sk(sk);
2636 struct inet_sock *inet = inet_sk(sk);
2637
2638 if (sk->sk_family == AF_INET) {
2639 printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2640 msg,
2641 &inet->inet_daddr, ntohs(inet->inet_dport),
2642 tp->snd_cwnd, tcp_left_out(tp),
2643 tp->snd_ssthresh, tp->prior_ssthresh,
2644 tp->packets_out);
2645 }
2646#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
2647 else if (sk->sk_family == AF_INET6) {
2648 struct ipv6_pinfo *np = inet6_sk(sk);
2649 printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2650 msg,
2651 &np->daddr, ntohs(inet->inet_dport),
2652 tp->snd_cwnd, tcp_left_out(tp),
2653 tp->snd_ssthresh, tp->prior_ssthresh,
2654 tp->packets_out);
2655 }
2656#endif
2657}
2658#else
2659#define DBGUNDO(x...) do { } while (0)
2660#endif
2661
2662static void tcp_undo_cwr(struct sock *sk, const int undo)
2663{
2664 struct tcp_sock *tp = tcp_sk(sk);
2665
2666 if (tp->prior_ssthresh) {
2667 const struct inet_connection_sock *icsk = inet_csk(sk);
2668
2669 if (icsk->icsk_ca_ops->undo_cwnd)
2670 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
2671 else
2672 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
2673
2674 if (undo && tp->prior_ssthresh > tp->snd_ssthresh) {
2675 tp->snd_ssthresh = tp->prior_ssthresh;
2676 TCP_ECN_withdraw_cwr(tp);
2677 }
2678 } else {
2679 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
2680 }
2681 tcp_moderate_cwnd(tp);
2682 tp->snd_cwnd_stamp = tcp_time_stamp;
2683}
2684
2685static inline int tcp_may_undo(struct tcp_sock *tp)
2686{
2687 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2688}
2689
2690
2691static int tcp_try_undo_recovery(struct sock *sk)
2692{
2693 struct tcp_sock *tp = tcp_sk(sk);
2694
2695 if (tcp_may_undo(tp)) {
2696 int mib_idx;
2697
2698
2699
2700
2701 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2702 tcp_undo_cwr(sk, 1);
2703 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2704 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2705 else
2706 mib_idx = LINUX_MIB_TCPFULLUNDO;
2707
2708 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2709 tp->undo_marker = 0;
2710 }
2711 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2712
2713
2714
2715 tcp_moderate_cwnd(tp);
2716 return 1;
2717 }
2718 tcp_set_ca_state(sk, TCP_CA_Open);
2719 return 0;
2720}
2721
2722
2723static void tcp_try_undo_dsack(struct sock *sk)
2724{
2725 struct tcp_sock *tp = tcp_sk(sk);
2726
2727 if (tp->undo_marker && !tp->undo_retrans) {
2728 DBGUNDO(sk, "D-SACK");
2729 tcp_undo_cwr(sk, 1);
2730 tp->undo_marker = 0;
2731 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2732 }
2733}
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749static int tcp_any_retrans_done(struct sock *sk)
2750{
2751 struct tcp_sock *tp = tcp_sk(sk);
2752 struct sk_buff *skb;
2753
2754 if (tp->retrans_out)
2755 return 1;
2756
2757 skb = tcp_write_queue_head(sk);
2758 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2759 return 1;
2760
2761 return 0;
2762}
2763
2764
2765
2766static int tcp_try_undo_partial(struct sock *sk, int acked)
2767{
2768 struct tcp_sock *tp = tcp_sk(sk);
2769
2770 int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering);
2771
2772 if (tcp_may_undo(tp)) {
2773
2774
2775
2776 if (!tcp_any_retrans_done(sk))
2777 tp->retrans_stamp = 0;
2778
2779 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
2780
2781 DBGUNDO(sk, "Hoe");
2782 tcp_undo_cwr(sk, 0);
2783 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2784
2785
2786
2787
2788
2789 failed = 0;
2790 }
2791 return failed;
2792}
2793
2794
2795static int tcp_try_undo_loss(struct sock *sk)
2796{
2797 struct tcp_sock *tp = tcp_sk(sk);
2798
2799 if (tcp_may_undo(tp)) {
2800 struct sk_buff *skb;
2801 tcp_for_write_queue(skb, sk) {
2802 if (skb == tcp_send_head(sk))
2803 break;
2804 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2805 }
2806
2807 tcp_clear_all_retrans_hints(tp);
2808
2809 DBGUNDO(sk, "partial loss");
2810 tp->lost_out = 0;
2811 tcp_undo_cwr(sk, 1);
2812 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2813 inet_csk(sk)->icsk_retransmits = 0;
2814 tp->undo_marker = 0;
2815 if (tcp_is_sack(tp))
2816 tcp_set_ca_state(sk, TCP_CA_Open);
2817 return 1;
2818 }
2819 return 0;
2820}
2821
2822static inline void tcp_complete_cwr(struct sock *sk)
2823{
2824 struct tcp_sock *tp = tcp_sk(sk);
2825 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
2826 tp->snd_cwnd_stamp = tcp_time_stamp;
2827 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2828}
2829
2830static void tcp_try_keep_open(struct sock *sk)
2831{
2832 struct tcp_sock *tp = tcp_sk(sk);
2833 int state = TCP_CA_Open;
2834
2835 if (tcp_left_out(tp) || tcp_any_retrans_done(sk) || tp->undo_marker)
2836 state = TCP_CA_Disorder;
2837
2838 if (inet_csk(sk)->icsk_ca_state != state) {
2839 tcp_set_ca_state(sk, state);
2840 tp->high_seq = tp->snd_nxt;
2841 }
2842}
2843
2844static void tcp_try_to_open(struct sock *sk, int flag)
2845{
2846 struct tcp_sock *tp = tcp_sk(sk);
2847
2848 tcp_verify_left_out(tp);
2849
2850 if (!tp->frto_counter && !tcp_any_retrans_done(sk))
2851 tp->retrans_stamp = 0;
2852
2853 if (flag & FLAG_ECE)
2854 tcp_enter_cwr(sk, 1);
2855
2856 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2857 tcp_try_keep_open(sk);
2858 tcp_moderate_cwnd(tp);
2859 } else {
2860 tcp_cwnd_down(sk, flag);
2861 }
2862}
2863
2864static void tcp_mtup_probe_failed(struct sock *sk)
2865{
2866 struct inet_connection_sock *icsk = inet_csk(sk);
2867
2868 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2869 icsk->icsk_mtup.probe_size = 0;
2870}
2871
2872static void tcp_mtup_probe_success(struct sock *sk)
2873{
2874 struct tcp_sock *tp = tcp_sk(sk);
2875 struct inet_connection_sock *icsk = inet_csk(sk);
2876
2877
2878 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2879 tp->snd_cwnd = tp->snd_cwnd *
2880 tcp_mss_to_mtu(sk, tp->mss_cache) /
2881 icsk->icsk_mtup.probe_size;
2882 tp->snd_cwnd_cnt = 0;
2883 tp->snd_cwnd_stamp = tcp_time_stamp;
2884 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2885
2886 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2887 icsk->icsk_mtup.probe_size = 0;
2888 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2889}
2890
2891
2892
2893
2894
2895void tcp_simple_retransmit(struct sock *sk)
2896{
2897 const struct inet_connection_sock *icsk = inet_csk(sk);
2898 struct tcp_sock *tp = tcp_sk(sk);
2899 struct sk_buff *skb;
2900 unsigned int mss = tcp_current_mss(sk);
2901 u32 prior_lost = tp->lost_out;
2902
2903 tcp_for_write_queue(skb, sk) {
2904 if (skb == tcp_send_head(sk))
2905 break;
2906 if (tcp_skb_seglen(skb) > mss &&
2907 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2908 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2909 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2910 tp->retrans_out -= tcp_skb_pcount(skb);
2911 }
2912 tcp_skb_mark_lost_uncond_verify(tp, skb);
2913 }
2914 }
2915
2916 tcp_clear_retrans_hints_partial(tp);
2917
2918 if (prior_lost == tp->lost_out)
2919 return;
2920
2921 if (tcp_is_reno(tp))
2922 tcp_limit_reno_sacked(tp);
2923
2924 tcp_verify_left_out(tp);
2925
2926
2927
2928
2929
2930
2931 if (icsk->icsk_ca_state != TCP_CA_Loss) {
2932 tp->high_seq = tp->snd_nxt;
2933 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2934 tp->prior_ssthresh = 0;
2935 tp->undo_marker = 0;
2936 tcp_set_ca_state(sk, TCP_CA_Loss);
2937 }
2938 tcp_xmit_retransmit_queue(sk);
2939}
2940EXPORT_SYMBOL(tcp_simple_retransmit);
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2954{
2955 struct inet_connection_sock *icsk = inet_csk(sk);
2956 struct tcp_sock *tp = tcp_sk(sk);
2957 int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
2958 int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
2959 (tcp_fackets_out(tp) > tp->reordering));
2960 int fast_rexmit = 0, mib_idx;
2961
2962 if (WARN_ON(!tp->packets_out && tp->sacked_out))
2963 tp->sacked_out = 0;
2964 if (WARN_ON(!tp->sacked_out && tp->fackets_out))
2965 tp->fackets_out = 0;
2966
2967
2968
2969 if (flag & FLAG_ECE)
2970 tp->prior_ssthresh = 0;
2971
2972
2973 if (tcp_check_sack_reneging(sk, flag))
2974 return;
2975
2976
2977 if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) &&
2978 before(tp->snd_una, tp->high_seq) &&
2979 icsk->icsk_ca_state != TCP_CA_Open &&
2980 tp->fackets_out > tp->reordering) {
2981 tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0);
2982 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS);
2983 }
2984
2985
2986 tcp_verify_left_out(tp);
2987
2988
2989
2990 if (icsk->icsk_ca_state == TCP_CA_Open) {
2991 WARN_ON(tp->retrans_out != 0);
2992 tp->retrans_stamp = 0;
2993 } else if (!before(tp->snd_una, tp->high_seq)) {
2994 switch (icsk->icsk_ca_state) {
2995 case TCP_CA_Loss:
2996 icsk->icsk_retransmits = 0;
2997 if (tcp_try_undo_recovery(sk))
2998 return;
2999 break;
3000
3001 case TCP_CA_CWR:
3002
3003
3004 if (tp->snd_una != tp->high_seq) {
3005 tcp_complete_cwr(sk);
3006 tcp_set_ca_state(sk, TCP_CA_Open);
3007 }
3008 break;
3009
3010 case TCP_CA_Disorder:
3011 tcp_try_undo_dsack(sk);
3012 if (!tp->undo_marker ||
3013
3014
3015 tcp_is_reno(tp) || tp->snd_una != tp->high_seq) {
3016 tp->undo_marker = 0;
3017 tcp_set_ca_state(sk, TCP_CA_Open);
3018 }
3019 break;
3020
3021 case TCP_CA_Recovery:
3022 if (tcp_is_reno(tp))
3023 tcp_reset_reno_sack(tp);
3024 if (tcp_try_undo_recovery(sk))
3025 return;
3026 tcp_complete_cwr(sk);
3027 break;
3028 }
3029 }
3030
3031
3032 switch (icsk->icsk_ca_state) {
3033 case TCP_CA_Recovery:
3034 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
3035 if (tcp_is_reno(tp) && is_dupack)
3036 tcp_add_reno_sack(sk);
3037 } else
3038 do_lost = tcp_try_undo_partial(sk, pkts_acked);
3039 break;
3040 case TCP_CA_Loss:
3041 if (flag & FLAG_DATA_ACKED)
3042 icsk->icsk_retransmits = 0;
3043 if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED)
3044 tcp_reset_reno_sack(tp);
3045 if (!tcp_try_undo_loss(sk)) {
3046 tcp_moderate_cwnd(tp);
3047 tcp_xmit_retransmit_queue(sk);
3048 return;
3049 }
3050 if (icsk->icsk_ca_state != TCP_CA_Open)
3051 return;
3052
3053 default:
3054 if (tcp_is_reno(tp)) {
3055 if (flag & FLAG_SND_UNA_ADVANCED)
3056 tcp_reset_reno_sack(tp);
3057 if (is_dupack)
3058 tcp_add_reno_sack(sk);
3059 }
3060
3061 if (icsk->icsk_ca_state == TCP_CA_Disorder)
3062 tcp_try_undo_dsack(sk);
3063
3064 if (!tcp_time_to_recover(sk)) {
3065 tcp_try_to_open(sk, flag);
3066 return;
3067 }
3068
3069
3070 if (icsk->icsk_ca_state < TCP_CA_CWR &&
3071 icsk->icsk_mtup.probe_size &&
3072 tp->snd_una == tp->mtu_probe.probe_seq_start) {
3073 tcp_mtup_probe_failed(sk);
3074
3075 tp->snd_cwnd++;
3076 tcp_simple_retransmit(sk);
3077 return;
3078 }
3079
3080
3081
3082 if (tcp_is_reno(tp))
3083 mib_idx = LINUX_MIB_TCPRENORECOVERY;
3084 else
3085 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
3086
3087 NET_INC_STATS_BH(sock_net(sk), mib_idx);
3088
3089 tp->high_seq = tp->snd_nxt;
3090 tp->prior_ssthresh = 0;
3091 tp->undo_marker = tp->snd_una;
3092 tp->undo_retrans = tp->retrans_out;
3093
3094 if (icsk->icsk_ca_state < TCP_CA_CWR) {
3095 if (!(flag & FLAG_ECE))
3096 tp->prior_ssthresh = tcp_current_ssthresh(sk);
3097 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
3098 TCP_ECN_queue_cwr(tp);
3099 }
3100
3101 tp->bytes_acked = 0;
3102 tp->snd_cwnd_cnt = 0;
3103 tcp_set_ca_state(sk, TCP_CA_Recovery);
3104 fast_rexmit = 1;
3105 }
3106
3107 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
3108 tcp_update_scoreboard(sk, fast_rexmit);
3109 tcp_cwnd_down(sk, flag);
3110 tcp_xmit_retransmit_queue(sk);
3111}
3112
3113static void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
3114{
3115 tcp_rtt_estimator(sk, seq_rtt);
3116 tcp_set_rto(sk);
3117 inet_csk(sk)->icsk_backoff = 0;
3118}
3119
3120
3121
3122
3123static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
3124{
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140 struct tcp_sock *tp = tcp_sk(sk);
3141
3142 tcp_valid_rtt_meas(sk, tcp_time_stamp - tp->rx_opt.rcv_tsecr);
3143}
3144
3145static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag)
3146{
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156 if (flag & FLAG_RETRANS_DATA_ACKED)
3157 return;
3158
3159 tcp_valid_rtt_meas(sk, seq_rtt);
3160}
3161
3162static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
3163 const s32 seq_rtt)
3164{
3165 const struct tcp_sock *tp = tcp_sk(sk);
3166
3167 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
3168 tcp_ack_saw_tstamp(sk, flag);
3169 else if (seq_rtt >= 0)
3170 tcp_ack_no_tstamp(sk, seq_rtt, flag);
3171}
3172
3173static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
3174{
3175 const struct inet_connection_sock *icsk = inet_csk(sk);
3176 icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight);
3177 tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
3178}
3179
3180
3181
3182
3183static void tcp_rearm_rto(struct sock *sk)
3184{
3185 struct tcp_sock *tp = tcp_sk(sk);
3186
3187 if (!tp->packets_out) {
3188 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
3189 } else {
3190 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3191 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
3192 }
3193}
3194
3195
3196static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
3197{
3198 struct tcp_sock *tp = tcp_sk(sk);
3199 u32 packets_acked;
3200
3201 BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
3202
3203 packets_acked = tcp_skb_pcount(skb);
3204 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
3205 return 0;
3206 packets_acked -= tcp_skb_pcount(skb);
3207
3208 if (packets_acked) {
3209 BUG_ON(tcp_skb_pcount(skb) == 0);
3210 BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
3211 }
3212
3213 return packets_acked;
3214}
3215
3216
3217
3218
3219
3220static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3221 u32 prior_snd_una)
3222{
3223 struct tcp_sock *tp = tcp_sk(sk);
3224 const struct inet_connection_sock *icsk = inet_csk(sk);
3225 struct sk_buff *skb;
3226 u32 now = tcp_time_stamp;
3227 int fully_acked = 1;
3228 int flag = 0;
3229 u32 pkts_acked = 0;
3230 u32 reord = tp->packets_out;
3231 u32 prior_sacked = tp->sacked_out;
3232 s32 seq_rtt = -1;
3233 s32 ca_seq_rtt = -1;
3234 ktime_t last_ackt = net_invalid_timestamp();
3235
3236 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
3237 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3238 u32 acked_pcount;
3239 u8 sacked = scb->sacked;
3240
3241
3242 if (after(scb->end_seq, tp->snd_una)) {
3243 if (tcp_skb_pcount(skb) == 1 ||
3244 !after(tp->snd_una, scb->seq))
3245 break;
3246
3247 acked_pcount = tcp_tso_acked(sk, skb);
3248 if (!acked_pcount)
3249 break;
3250
3251 fully_acked = 0;
3252 } else {
3253 acked_pcount = tcp_skb_pcount(skb);
3254 }
3255
3256 if (sacked & TCPCB_RETRANS) {
3257 if (sacked & TCPCB_SACKED_RETRANS)
3258 tp->retrans_out -= acked_pcount;
3259 flag |= FLAG_RETRANS_DATA_ACKED;
3260 ca_seq_rtt = -1;
3261 seq_rtt = -1;
3262 if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1))
3263 flag |= FLAG_NONHEAD_RETRANS_ACKED;
3264 } else {
3265 ca_seq_rtt = now - scb->when;
3266 last_ackt = skb->tstamp;
3267 if (seq_rtt < 0) {
3268 seq_rtt = ca_seq_rtt;
3269 }
3270 if (!(sacked & TCPCB_SACKED_ACKED))
3271 reord = min(pkts_acked, reord);
3272 }
3273
3274 if (sacked & TCPCB_SACKED_ACKED)
3275 tp->sacked_out -= acked_pcount;
3276 if (sacked & TCPCB_LOST)
3277 tp->lost_out -= acked_pcount;
3278
3279 tp->packets_out -= acked_pcount;
3280 pkts_acked += acked_pcount;
3281
3282
3283
3284
3285
3286
3287
3288
3289 if (!(scb->flags & TCPHDR_SYN)) {
3290 flag |= FLAG_DATA_ACKED;
3291 } else {
3292 flag |= FLAG_SYN_ACKED;
3293 tp->retrans_stamp = 0;
3294 }
3295
3296 if (!fully_acked)
3297 break;
3298
3299 tcp_unlink_write_queue(skb, sk);
3300 sk_wmem_free_skb(sk, skb);
3301 tp->scoreboard_skb_hint = NULL;
3302 if (skb == tp->retransmit_skb_hint)
3303 tp->retransmit_skb_hint = NULL;
3304 if (skb == tp->lost_skb_hint)
3305 tp->lost_skb_hint = NULL;
3306 }
3307
3308 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
3309 tp->snd_up = tp->snd_una;
3310
3311 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
3312 flag |= FLAG_SACK_RENEGING;
3313
3314 if (flag & FLAG_ACKED) {
3315 const struct tcp_congestion_ops *ca_ops
3316 = inet_csk(sk)->icsk_ca_ops;
3317
3318 if (unlikely(icsk->icsk_mtup.probe_size &&
3319 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
3320 tcp_mtup_probe_success(sk);
3321 }
3322
3323 tcp_ack_update_rtt(sk, flag, seq_rtt);
3324 tcp_rearm_rto(sk);
3325
3326 if (tcp_is_reno(tp)) {
3327 tcp_remove_reno_sacks(sk, pkts_acked);
3328 } else {
3329 int delta;
3330
3331
3332 if (reord < prior_fackets)
3333 tcp_update_reordering(sk, tp->fackets_out - reord, 0);
3334
3335 delta = tcp_is_fack(tp) ? pkts_acked :
3336 prior_sacked - tp->sacked_out;
3337 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3338 }
3339
3340 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
3341
3342 if (ca_ops->pkts_acked) {
3343 s32 rtt_us = -1;
3344
3345
3346 if (!(flag & FLAG_RETRANS_DATA_ACKED)) {
3347
3348 if (ca_ops->flags & TCP_CONG_RTT_STAMP &&
3349 !ktime_equal(last_ackt,
3350 net_invalid_timestamp()))
3351 rtt_us = ktime_us_delta(ktime_get_real(),
3352 last_ackt);
3353 else if (ca_seq_rtt > 0)
3354 rtt_us = jiffies_to_usecs(ca_seq_rtt);
3355 }
3356
3357 ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
3358 }
3359 }
3360
3361#if FASTRETRANS_DEBUG > 0
3362 WARN_ON((int)tp->sacked_out < 0);
3363 WARN_ON((int)tp->lost_out < 0);
3364 WARN_ON((int)tp->retrans_out < 0);
3365 if (!tp->packets_out && tcp_is_sack(tp)) {
3366 icsk = inet_csk(sk);
3367 if (tp->lost_out) {
3368 printk(KERN_DEBUG "Leak l=%u %d\n",
3369 tp->lost_out, icsk->icsk_ca_state);
3370 tp->lost_out = 0;
3371 }
3372 if (tp->sacked_out) {
3373 printk(KERN_DEBUG "Leak s=%u %d\n",
3374 tp->sacked_out, icsk->icsk_ca_state);
3375 tp->sacked_out = 0;
3376 }
3377 if (tp->retrans_out) {
3378 printk(KERN_DEBUG "Leak r=%u %d\n",
3379 tp->retrans_out, icsk->icsk_ca_state);
3380 tp->retrans_out = 0;
3381 }
3382 }
3383#endif
3384 return flag;
3385}
3386
3387static void tcp_ack_probe(struct sock *sk)
3388{
3389 const struct tcp_sock *tp = tcp_sk(sk);
3390 struct inet_connection_sock *icsk = inet_csk(sk);
3391
3392
3393
3394 if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
3395 icsk->icsk_backoff = 0;
3396 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
3397
3398
3399
3400 } else {
3401 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3402 min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
3403 TCP_RTO_MAX);
3404 }
3405}
3406
3407static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag)
3408{
3409 return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3410 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3411}
3412
3413static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3414{
3415 const struct tcp_sock *tp = tcp_sk(sk);
3416 return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
3417 !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR));
3418}
3419
3420
3421
3422
3423static inline int tcp_may_update_window(const struct tcp_sock *tp,
3424 const u32 ack, const u32 ack_seq,
3425 const u32 nwin)
3426{
3427 return after(ack, tp->snd_una) ||
3428 after(ack_seq, tp->snd_wl1) ||
3429 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
3430}
3431
3432
3433
3434
3435
3436
3437static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack,
3438 u32 ack_seq)
3439{
3440 struct tcp_sock *tp = tcp_sk(sk);
3441 int flag = 0;
3442 u32 nwin = ntohs(tcp_hdr(skb)->window);
3443
3444 if (likely(!tcp_hdr(skb)->syn))
3445 nwin <<= tp->rx_opt.snd_wscale;
3446
3447 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
3448 flag |= FLAG_WIN_UPDATE;
3449 tcp_update_wl(tp, ack_seq);
3450
3451 if (tp->snd_wnd != nwin) {
3452 tp->snd_wnd = nwin;
3453
3454
3455
3456
3457 tp->pred_flags = 0;
3458 tcp_fast_path_check(sk);
3459
3460 if (nwin > tp->max_window) {
3461 tp->max_window = nwin;
3462 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
3463 }
3464 }
3465 }
3466
3467 tp->snd_una = ack;
3468
3469 return flag;
3470}
3471
3472
3473
3474
3475static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
3476{
3477 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
3478 tp->snd_cwnd_cnt = 0;
3479 tp->bytes_acked = 0;
3480 TCP_ECN_queue_cwr(tp);
3481 tcp_moderate_cwnd(tp);
3482}
3483
3484
3485
3486
3487static void tcp_ratehalving_spur_to_response(struct sock *sk)
3488{
3489 tcp_enter_cwr(sk, 0);
3490}
3491
3492static void tcp_undo_spur_to_response(struct sock *sk, int flag)
3493{
3494 if (flag & FLAG_ECE)
3495 tcp_ratehalving_spur_to_response(sk);
3496 else
3497 tcp_undo_cwr(sk, 1);
3498}
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530static int tcp_process_frto(struct sock *sk, int flag)
3531{
3532 struct tcp_sock *tp = tcp_sk(sk);
3533
3534 tcp_verify_left_out(tp);
3535
3536
3537 if (flag & FLAG_DATA_ACKED)
3538 inet_csk(sk)->icsk_retransmits = 0;
3539
3540 if ((flag & FLAG_NONHEAD_RETRANS_ACKED) ||
3541 ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED)))
3542 tp->undo_marker = 0;
3543
3544 if (!before(tp->snd_una, tp->frto_highmark)) {
3545 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
3546 return 1;
3547 }
3548
3549 if (!tcp_is_sackfrto(tp)) {
3550
3551
3552
3553
3554 if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP))
3555 return 1;
3556
3557 if (!(flag & FLAG_DATA_ACKED)) {
3558 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
3559 flag);
3560 return 1;
3561 }
3562 } else {
3563 if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
3564
3565 tp->snd_cwnd = min(tp->snd_cwnd,
3566 tcp_packets_in_flight(tp));
3567 return 1;
3568 }
3569
3570 if ((tp->frto_counter >= 2) &&
3571 (!(flag & FLAG_FORWARD_PROGRESS) ||
3572 ((flag & FLAG_DATA_SACKED) &&
3573 !(flag & FLAG_ONLY_ORIG_SACKED)))) {
3574
3575 if (!(flag & FLAG_FORWARD_PROGRESS) &&
3576 (flag & FLAG_NOT_DUP))
3577 return 1;
3578
3579 tcp_enter_frto_loss(sk, 3, flag);
3580 return 1;
3581 }
3582 }
3583
3584 if (tp->frto_counter == 1) {
3585
3586 tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
3587 tp->frto_counter = 2;
3588
3589 if (!tcp_may_send_now(sk))
3590 tcp_enter_frto_loss(sk, 2, flag);
3591
3592 return 1;
3593 } else {
3594 switch (sysctl_tcp_frto_response) {
3595 case 2:
3596 tcp_undo_spur_to_response(sk, flag);
3597 break;
3598 case 1:
3599 tcp_conservative_spur_to_response(tp);
3600 break;
3601 default:
3602 tcp_ratehalving_spur_to_response(sk);
3603 break;
3604 }
3605 tp->frto_counter = 0;
3606 tp->undo_marker = 0;
3607 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS);
3608 }
3609 return 0;
3610}
3611
3612
3613static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3614{
3615 struct inet_connection_sock *icsk = inet_csk(sk);
3616 struct tcp_sock *tp = tcp_sk(sk);
3617 u32 prior_snd_una = tp->snd_una;
3618 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3619 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3620 u32 prior_in_flight;
3621 u32 prior_fackets;
3622 int prior_packets;
3623 int frto_cwnd = 0;
3624
3625
3626
3627
3628 if (before(ack, prior_snd_una))
3629 goto old_ack;
3630
3631
3632
3633
3634 if (after(ack, tp->snd_nxt))
3635 goto invalid_ack;
3636
3637 if (after(ack, prior_snd_una))
3638 flag |= FLAG_SND_UNA_ADVANCED;
3639
3640 if (sysctl_tcp_abc) {
3641 if (icsk->icsk_ca_state < TCP_CA_CWR)
3642 tp->bytes_acked += ack - prior_snd_una;
3643 else if (icsk->icsk_ca_state == TCP_CA_Loss)
3644
3645 tp->bytes_acked += min(ack - prior_snd_una,
3646 tp->mss_cache);
3647 }
3648
3649 prior_fackets = tp->fackets_out;
3650 prior_in_flight = tcp_packets_in_flight(tp);
3651
3652 if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
3653
3654
3655
3656
3657 tcp_update_wl(tp, ack_seq);
3658 tp->snd_una = ack;
3659 flag |= FLAG_WIN_UPDATE;
3660
3661 tcp_ca_event(sk, CA_EVENT_FAST_ACK);
3662
3663 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS);
3664 } else {
3665 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3666 flag |= FLAG_DATA;
3667 else
3668 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS);
3669
3670 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3671
3672 if (TCP_SKB_CB(skb)->sacked)
3673 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3674
3675 if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
3676 flag |= FLAG_ECE;
3677
3678 tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
3679 }
3680
3681
3682
3683
3684 sk->sk_err_soft = 0;
3685 icsk->icsk_probes_out = 0;
3686 tp->rcv_tstamp = tcp_time_stamp;
3687 prior_packets = tp->packets_out;
3688 if (!prior_packets)
3689 goto no_queue;
3690
3691
3692 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
3693
3694 if (tp->frto_counter)
3695 frto_cwnd = tcp_process_frto(sk, flag);
3696
3697 if (before(tp->frto_highmark, tp->snd_una))
3698 tp->frto_highmark = 0;
3699
3700 if (tcp_ack_is_dubious(sk, flag)) {
3701
3702 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
3703 tcp_may_raise_cwnd(sk, flag))
3704 tcp_cong_avoid(sk, ack, prior_in_flight);
3705 tcp_fastretrans_alert(sk, prior_packets - tp->packets_out,
3706 flag);
3707 } else {
3708 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
3709 tcp_cong_avoid(sk, ack, prior_in_flight);
3710 }
3711
3712 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
3713 dst_confirm(__sk_dst_get(sk));
3714
3715 return 1;
3716
3717no_queue:
3718
3719
3720
3721
3722 if (tcp_send_head(sk))
3723 tcp_ack_probe(sk);
3724 return 1;
3725
3726invalid_ack:
3727 SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3728 return -1;
3729
3730old_ack:
3731 if (TCP_SKB_CB(skb)->sacked) {
3732 tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3733 if (icsk->icsk_ca_state == TCP_CA_Open)
3734 tcp_try_keep_open(sk);
3735 }
3736
3737 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3738 return 0;
3739}
3740
3741
3742
3743
3744
3745void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3746 u8 **hvpp, int estab)
3747{
3748 unsigned char *ptr;
3749 struct tcphdr *th = tcp_hdr(skb);
3750 int length = (th->doff * 4) - sizeof(struct tcphdr);
3751
3752 ptr = (unsigned char *)(th + 1);
3753 opt_rx->saw_tstamp = 0;
3754
3755 while (length > 0) {
3756 int opcode = *ptr++;
3757 int opsize;
3758
3759 switch (opcode) {
3760 case TCPOPT_EOL:
3761 return;
3762 case TCPOPT_NOP:
3763 length--;
3764 continue;
3765 default:
3766 opsize = *ptr++;
3767 if (opsize < 2)
3768 return;
3769 if (opsize > length)
3770 return;
3771 switch (opcode) {
3772 case TCPOPT_MSS:
3773 if (opsize == TCPOLEN_MSS && th->syn && !estab) {
3774 u16 in_mss = get_unaligned_be16(ptr);
3775 if (in_mss) {
3776 if (opt_rx->user_mss &&
3777 opt_rx->user_mss < in_mss)
3778 in_mss = opt_rx->user_mss;
3779 opt_rx->mss_clamp = in_mss;
3780 }
3781 }
3782 break;
3783 case TCPOPT_WINDOW:
3784 if (opsize == TCPOLEN_WINDOW && th->syn &&
3785 !estab && sysctl_tcp_window_scaling) {
3786 __u8 snd_wscale = *(__u8 *)ptr;
3787 opt_rx->wscale_ok = 1;
3788 if (snd_wscale > 14) {
3789 if (net_ratelimit())
3790 printk(KERN_INFO "tcp_parse_options: Illegal window "
3791 "scaling value %d >14 received.\n",
3792 snd_wscale);
3793 snd_wscale = 14;
3794 }
3795 opt_rx->snd_wscale = snd_wscale;
3796 }
3797 break;
3798 case TCPOPT_TIMESTAMP:
3799 if ((opsize == TCPOLEN_TIMESTAMP) &&
3800 ((estab && opt_rx->tstamp_ok) ||
3801 (!estab && sysctl_tcp_timestamps))) {
3802 opt_rx->saw_tstamp = 1;
3803 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
3804 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
3805 }
3806 break;
3807 case TCPOPT_SACK_PERM:
3808 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3809 !estab && sysctl_tcp_sack) {
3810 opt_rx->sack_ok = 1;
3811 tcp_sack_reset(opt_rx);
3812 }
3813 break;
3814
3815 case TCPOPT_SACK:
3816 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
3817 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
3818 opt_rx->sack_ok) {
3819 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
3820 }
3821 break;
3822#ifdef CONFIG_TCP_MD5SIG
3823 case TCPOPT_MD5SIG:
3824
3825
3826
3827
3828 break;
3829#endif
3830 case TCPOPT_COOKIE:
3831
3832
3833 switch (opsize) {
3834 case TCPOLEN_COOKIE_BASE:
3835
3836 break;
3837 case TCPOLEN_COOKIE_PAIR:
3838
3839 break;
3840 case TCPOLEN_COOKIE_MIN+0:
3841 case TCPOLEN_COOKIE_MIN+2:
3842 case TCPOLEN_COOKIE_MIN+4:
3843 case TCPOLEN_COOKIE_MIN+6:
3844 case TCPOLEN_COOKIE_MAX:
3845
3846 opt_rx->cookie_plus = opsize;
3847 *hvpp = ptr;
3848 break;
3849 default:
3850
3851 break;
3852 }
3853 break;
3854 }
3855
3856 ptr += opsize-2;
3857 length -= opsize;
3858 }
3859 }
3860}
3861EXPORT_SYMBOL(tcp_parse_options);
3862
3863static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
3864{
3865 __be32 *ptr = (__be32 *)(th + 1);
3866
3867 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
3868 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
3869 tp->rx_opt.saw_tstamp = 1;
3870 ++ptr;
3871 tp->rx_opt.rcv_tsval = ntohl(*ptr);
3872 ++ptr;
3873 tp->rx_opt.rcv_tsecr = ntohl(*ptr);
3874 return 1;
3875 }
3876 return 0;
3877}
3878
3879
3880
3881
3882static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
3883 struct tcp_sock *tp, u8 **hvpp)
3884{
3885
3886
3887
3888 if (th->doff == (sizeof(*th) / 4)) {
3889 tp->rx_opt.saw_tstamp = 0;
3890 return 0;
3891 } else if (tp->rx_opt.tstamp_ok &&
3892 th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
3893 if (tcp_parse_aligned_timestamp(tp, th))
3894 return 1;
3895 }
3896 tcp_parse_options(skb, &tp->rx_opt, hvpp, 1);
3897 return 1;
3898}
3899
3900#ifdef CONFIG_TCP_MD5SIG
3901
3902
3903
3904u8 *tcp_parse_md5sig_option(struct tcphdr *th)
3905{
3906 int length = (th->doff << 2) - sizeof (*th);
3907 u8 *ptr = (u8*)(th + 1);
3908
3909
3910 if (length < TCPOLEN_MD5SIG)
3911 return NULL;
3912
3913 while (length > 0) {
3914 int opcode = *ptr++;
3915 int opsize;
3916
3917 switch(opcode) {
3918 case TCPOPT_EOL:
3919 return NULL;
3920 case TCPOPT_NOP:
3921 length--;
3922 continue;
3923 default:
3924 opsize = *ptr++;
3925 if (opsize < 2 || opsize > length)
3926 return NULL;
3927 if (opcode == TCPOPT_MD5SIG)
3928 return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
3929 }
3930 ptr += opsize - 2;
3931 length -= opsize;
3932 }
3933 return NULL;
3934}
3935EXPORT_SYMBOL(tcp_parse_md5sig_option);
3936#endif
3937
3938static inline void tcp_store_ts_recent(struct tcp_sock *tp)
3939{
3940 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
3941 tp->rx_opt.ts_recent_stamp = get_seconds();
3942}
3943
3944static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3945{
3946 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
3947
3948
3949
3950
3951
3952
3953
3954 if (tcp_paws_check(&tp->rx_opt, 0))
3955 tcp_store_ts_recent(tp);
3956 }
3957}
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
3983{
3984 struct tcp_sock *tp = tcp_sk(sk);
3985 struct tcphdr *th = tcp_hdr(skb);
3986 u32 seq = TCP_SKB_CB(skb)->seq;
3987 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3988
3989 return (
3990 (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
3991
3992
3993 ack == tp->snd_una &&
3994
3995
3996 !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
3997
3998
3999 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
4000}
4001
4002static inline int tcp_paws_discard(const struct sock *sk,
4003 const struct sk_buff *skb)
4004{
4005 const struct tcp_sock *tp = tcp_sk(sk);
4006
4007 return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
4008 !tcp_disordered_ack(sk, skb);
4009}
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024static inline int tcp_sequence(struct tcp_sock *tp, u32 seq, u32 end_seq)
4025{
4026 return !before(end_seq, tp->rcv_wup) &&
4027 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
4028}
4029
4030
4031static void tcp_reset(struct sock *sk)
4032{
4033
4034 switch (sk->sk_state) {
4035 case TCP_SYN_SENT:
4036 sk->sk_err = ECONNREFUSED;
4037 break;
4038 case TCP_CLOSE_WAIT:
4039 sk->sk_err = EPIPE;
4040 break;
4041 case TCP_CLOSE:
4042 return;
4043 default:
4044 sk->sk_err = ECONNRESET;
4045 }
4046
4047 smp_wmb();
4048
4049 if (!sock_flag(sk, SOCK_DEAD))
4050 sk->sk_error_report(sk);
4051
4052 tcp_done(sk);
4053}
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
4070{
4071 struct tcp_sock *tp = tcp_sk(sk);
4072
4073 inet_csk_schedule_ack(sk);
4074
4075 sk->sk_shutdown |= RCV_SHUTDOWN;
4076 sock_set_flag(sk, SOCK_DONE);
4077
4078 switch (sk->sk_state) {
4079 case TCP_SYN_RECV:
4080 case TCP_ESTABLISHED:
4081
4082 tcp_set_state(sk, TCP_CLOSE_WAIT);
4083 inet_csk(sk)->icsk_ack.pingpong = 1;
4084 break;
4085
4086 case TCP_CLOSE_WAIT:
4087 case TCP_CLOSING:
4088
4089
4090
4091 break;
4092 case TCP_LAST_ACK:
4093
4094 break;
4095
4096 case TCP_FIN_WAIT1:
4097
4098
4099
4100
4101 tcp_send_ack(sk);
4102 tcp_set_state(sk, TCP_CLOSING);
4103 break;
4104 case TCP_FIN_WAIT2:
4105
4106 tcp_send_ack(sk);
4107 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
4108 break;
4109 default:
4110
4111
4112
4113 printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
4114 __func__, sk->sk_state);
4115 break;
4116 }
4117
4118
4119
4120
4121 __skb_queue_purge(&tp->out_of_order_queue);
4122 if (tcp_is_sack(tp))
4123 tcp_sack_reset(&tp->rx_opt);
4124 sk_mem_reclaim(sk);
4125
4126 if (!sock_flag(sk, SOCK_DEAD)) {
4127 sk->sk_state_change(sk);
4128
4129
4130 if (sk->sk_shutdown == SHUTDOWN_MASK ||
4131 sk->sk_state == TCP_CLOSE)
4132 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
4133 else
4134 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
4135 }
4136}
4137
4138static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
4139 u32 end_seq)
4140{
4141 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
4142 if (before(seq, sp->start_seq))
4143 sp->start_seq = seq;
4144 if (after(end_seq, sp->end_seq))
4145 sp->end_seq = end_seq;
4146 return 1;
4147 }
4148 return 0;
4149}
4150
4151static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4152{
4153 struct tcp_sock *tp = tcp_sk(sk);
4154
4155 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
4156 int mib_idx;
4157
4158 if (before(seq, tp->rcv_nxt))
4159 mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
4160 else
4161 mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
4162
4163 NET_INC_STATS_BH(sock_net(sk), mib_idx);
4164
4165 tp->rx_opt.dsack = 1;
4166 tp->duplicate_sack[0].start_seq = seq;
4167 tp->duplicate_sack[0].end_seq = end_seq;
4168 }
4169}
4170
4171static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
4172{
4173 struct tcp_sock *tp = tcp_sk(sk);
4174
4175 if (!tp->rx_opt.dsack)
4176 tcp_dsack_set(sk, seq, end_seq);
4177 else
4178 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
4179}
4180
4181static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
4182{
4183 struct tcp_sock *tp = tcp_sk(sk);
4184
4185 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
4186 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4187 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4188 tcp_enter_quickack_mode(sk);
4189
4190 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
4191 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4192
4193 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
4194 end_seq = tp->rcv_nxt;
4195 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
4196 }
4197 }
4198
4199 tcp_send_ack(sk);
4200}
4201
4202
4203
4204
4205static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
4206{
4207 int this_sack;
4208 struct tcp_sack_block *sp = &tp->selective_acks[0];
4209 struct tcp_sack_block *swalk = sp + 1;
4210
4211
4212
4213
4214 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
4215 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
4216 int i;
4217
4218
4219
4220
4221 tp->rx_opt.num_sacks--;
4222 for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
4223 sp[i] = sp[i + 1];
4224 continue;
4225 }
4226 this_sack++, swalk++;
4227 }
4228}
4229
4230static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
4231{
4232 struct tcp_sock *tp = tcp_sk(sk);
4233 struct tcp_sack_block *sp = &tp->selective_acks[0];
4234 int cur_sacks = tp->rx_opt.num_sacks;
4235 int this_sack;
4236
4237 if (!cur_sacks)
4238 goto new_sack;
4239
4240 for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
4241 if (tcp_sack_extend(sp, seq, end_seq)) {
4242
4243 for (; this_sack > 0; this_sack--, sp--)
4244 swap(*sp, *(sp - 1));
4245 if (cur_sacks > 1)
4246 tcp_sack_maybe_coalesce(tp);
4247 return;
4248 }
4249 }
4250
4251
4252
4253
4254
4255
4256
4257 if (this_sack >= TCP_NUM_SACKS) {
4258 this_sack--;
4259 tp->rx_opt.num_sacks--;
4260 sp--;
4261 }
4262 for (; this_sack > 0; this_sack--, sp--)
4263 *sp = *(sp - 1);
4264
4265new_sack:
4266
4267 sp->start_seq = seq;
4268 sp->end_seq = end_seq;
4269 tp->rx_opt.num_sacks++;
4270}
4271
4272
4273
4274static void tcp_sack_remove(struct tcp_sock *tp)
4275{
4276 struct tcp_sack_block *sp = &tp->selective_acks[0];
4277 int num_sacks = tp->rx_opt.num_sacks;
4278 int this_sack;
4279
4280
4281 if (skb_queue_empty(&tp->out_of_order_queue)) {
4282 tp->rx_opt.num_sacks = 0;
4283 return;
4284 }
4285
4286 for (this_sack = 0; this_sack < num_sacks;) {
4287
4288 if (!before(tp->rcv_nxt, sp->start_seq)) {
4289 int i;
4290
4291
4292 WARN_ON(before(tp->rcv_nxt, sp->end_seq));
4293
4294
4295 for (i=this_sack+1; i < num_sacks; i++)
4296 tp->selective_acks[i-1] = tp->selective_acks[i];
4297 num_sacks--;
4298 continue;
4299 }
4300 this_sack++;
4301 sp++;
4302 }
4303 tp->rx_opt.num_sacks = num_sacks;
4304}
4305
4306
4307
4308
4309static void tcp_ofo_queue(struct sock *sk)
4310{
4311 struct tcp_sock *tp = tcp_sk(sk);
4312 __u32 dsack_high = tp->rcv_nxt;
4313 struct sk_buff *skb;
4314
4315 while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
4316 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
4317 break;
4318
4319 if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
4320 __u32 dsack = dsack_high;
4321 if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
4322 dsack_high = TCP_SKB_CB(skb)->end_seq;
4323 tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
4324 }
4325
4326 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4327 SOCK_DEBUG(sk, "ofo packet was already received\n");
4328 __skb_unlink(skb, &tp->out_of_order_queue);
4329 __kfree_skb(skb);
4330 continue;
4331 }
4332 SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
4333 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4334 TCP_SKB_CB(skb)->end_seq);
4335
4336 __skb_unlink(skb, &tp->out_of_order_queue);
4337 __skb_queue_tail(&sk->sk_receive_queue, skb);
4338 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4339 if (tcp_hdr(skb)->fin)
4340 tcp_fin(skb, sk, tcp_hdr(skb));
4341 }
4342}
4343
4344static int tcp_prune_ofo_queue(struct sock *sk);
4345static int tcp_prune_queue(struct sock *sk);
4346
4347static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
4348{
4349 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
4350 !sk_rmem_schedule(sk, size)) {
4351
4352 if (tcp_prune_queue(sk) < 0)
4353 return -1;
4354
4355 if (!sk_rmem_schedule(sk, size)) {
4356 if (!tcp_prune_ofo_queue(sk))
4357 return -1;
4358
4359 if (!sk_rmem_schedule(sk, size))
4360 return -1;
4361 }
4362 }
4363 return 0;
4364}
4365
4366static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4367{
4368 struct tcphdr *th = tcp_hdr(skb);
4369 struct tcp_sock *tp = tcp_sk(sk);
4370 int eaten = -1;
4371
4372 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
4373 goto drop;
4374
4375 skb_dst_drop(skb);
4376 __skb_pull(skb, th->doff * 4);
4377
4378 TCP_ECN_accept_cwr(tp, skb);
4379
4380 tp->rx_opt.dsack = 0;
4381
4382
4383
4384
4385
4386 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
4387 if (tcp_receive_window(tp) == 0)
4388 goto out_of_window;
4389
4390
4391 if (tp->ucopy.task == current &&
4392 tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
4393 sock_owned_by_user(sk) && !tp->urg_data) {
4394 int chunk = min_t(unsigned int, skb->len,
4395 tp->ucopy.len);
4396
4397 __set_current_state(TASK_RUNNING);
4398
4399 local_bh_enable();
4400 if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) {
4401 tp->ucopy.len -= chunk;
4402 tp->copied_seq += chunk;
4403 eaten = (chunk == skb->len);
4404 tcp_rcv_space_adjust(sk);
4405 }
4406 local_bh_disable();
4407 }
4408
4409 if (eaten <= 0) {
4410queue_and_out:
4411 if (eaten < 0 &&
4412 tcp_try_rmem_schedule(sk, skb->truesize))
4413 goto drop;
4414
4415 skb_set_owner_r(skb, sk);
4416 __skb_queue_tail(&sk->sk_receive_queue, skb);
4417 }
4418 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4419 if (skb->len)
4420 tcp_event_data_recv(sk, skb);
4421 if (th->fin)
4422 tcp_fin(skb, sk, th);
4423
4424 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4425 tcp_ofo_queue(sk);
4426
4427
4428
4429
4430 if (skb_queue_empty(&tp->out_of_order_queue))
4431 inet_csk(sk)->icsk_ack.pingpong = 0;
4432 }
4433
4434 if (tp->rx_opt.num_sacks)
4435 tcp_sack_remove(tp);
4436
4437 tcp_fast_path_check(sk);
4438
4439 if (eaten > 0)
4440 __kfree_skb(skb);
4441 else if (!sock_flag(sk, SOCK_DEAD))
4442 sk->sk_data_ready(sk, 0);
4443 return;
4444 }
4445
4446 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4447
4448 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4449 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4450
4451out_of_window:
4452 tcp_enter_quickack_mode(sk);
4453 inet_csk_schedule_ack(sk);
4454drop:
4455 __kfree_skb(skb);
4456 return;
4457 }
4458
4459
4460 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
4461 goto out_of_window;
4462
4463 tcp_enter_quickack_mode(sk);
4464
4465 if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4466
4467 SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
4468 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4469 TCP_SKB_CB(skb)->end_seq);
4470
4471 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
4472
4473
4474
4475
4476 if (!tcp_receive_window(tp))
4477 goto out_of_window;
4478 goto queue_and_out;
4479 }
4480
4481 TCP_ECN_check_ce(tp, skb);
4482
4483 if (tcp_try_rmem_schedule(sk, skb->truesize))
4484 goto drop;
4485
4486
4487 tp->pred_flags = 0;
4488 inet_csk_schedule_ack(sk);
4489
4490 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
4491 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4492
4493 skb_set_owner_r(skb, sk);
4494
4495 if (!skb_peek(&tp->out_of_order_queue)) {
4496
4497 if (tcp_is_sack(tp)) {
4498 tp->rx_opt.num_sacks = 1;
4499 tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
4500 tp->selective_acks[0].end_seq =
4501 TCP_SKB_CB(skb)->end_seq;
4502 }
4503 __skb_queue_head(&tp->out_of_order_queue, skb);
4504 } else {
4505 struct sk_buff *skb1 = skb_peek_tail(&tp->out_of_order_queue);
4506 u32 seq = TCP_SKB_CB(skb)->seq;
4507 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4508
4509 if (seq == TCP_SKB_CB(skb1)->end_seq) {
4510 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4511
4512 if (!tp->rx_opt.num_sacks ||
4513 tp->selective_acks[0].end_seq != seq)
4514 goto add_sack;
4515
4516
4517 tp->selective_acks[0].end_seq = end_seq;
4518 return;
4519 }
4520
4521
4522 while (1) {
4523 if (!after(TCP_SKB_CB(skb1)->seq, seq))
4524 break;
4525 if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
4526 skb1 = NULL;
4527 break;
4528 }
4529 skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
4530 }
4531
4532
4533 if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4534 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4535
4536 __kfree_skb(skb);
4537 tcp_dsack_set(sk, seq, end_seq);
4538 goto add_sack;
4539 }
4540 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4541
4542 tcp_dsack_set(sk, seq,
4543 TCP_SKB_CB(skb1)->end_seq);
4544 } else {
4545 if (skb_queue_is_first(&tp->out_of_order_queue,
4546 skb1))
4547 skb1 = NULL;
4548 else
4549 skb1 = skb_queue_prev(
4550 &tp->out_of_order_queue,
4551 skb1);
4552 }
4553 }
4554 if (!skb1)
4555 __skb_queue_head(&tp->out_of_order_queue, skb);
4556 else
4557 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4558
4559
4560 while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
4561 skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
4562
4563 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4564 break;
4565 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4566 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4567 end_seq);
4568 break;
4569 }
4570 __skb_unlink(skb1, &tp->out_of_order_queue);
4571 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4572 TCP_SKB_CB(skb1)->end_seq);
4573 __kfree_skb(skb1);
4574 }
4575
4576add_sack:
4577 if (tcp_is_sack(tp))
4578 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4579 }
4580}
4581
4582static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
4583 struct sk_buff_head *list)
4584{
4585 struct sk_buff *next = NULL;
4586
4587 if (!skb_queue_is_last(list, skb))
4588 next = skb_queue_next(list, skb);
4589
4590 __skb_unlink(skb, list);
4591 __kfree_skb(skb);
4592 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
4593
4594 return next;
4595}
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605static void
4606tcp_collapse(struct sock *sk, struct sk_buff_head *list,
4607 struct sk_buff *head, struct sk_buff *tail,
4608 u32 start, u32 end)
4609{
4610 struct sk_buff *skb, *n;
4611 bool end_of_skbs;
4612
4613
4614
4615 skb = head;
4616restart:
4617 end_of_skbs = true;
4618 skb_queue_walk_from_safe(list, skb, n) {
4619 if (skb == tail)
4620 break;
4621
4622 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4623 skb = tcp_collapse_one(sk, skb, list);
4624 if (!skb)
4625 break;
4626 goto restart;
4627 }
4628
4629
4630
4631
4632
4633
4634 if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin &&
4635 (tcp_win_from_space(skb->truesize) > skb->len ||
4636 before(TCP_SKB_CB(skb)->seq, start))) {
4637 end_of_skbs = false;
4638 break;
4639 }
4640
4641 if (!skb_queue_is_last(list, skb)) {
4642 struct sk_buff *next = skb_queue_next(list, skb);
4643 if (next != tail &&
4644 TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) {
4645 end_of_skbs = false;
4646 break;
4647 }
4648 }
4649
4650
4651 start = TCP_SKB_CB(skb)->end_seq;
4652 }
4653 if (end_of_skbs || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin)
4654 return;
4655
4656 while (before(start, end)) {
4657 struct sk_buff *nskb;
4658 unsigned int header = skb_headroom(skb);
4659 int copy = SKB_MAX_ORDER(header, 0);
4660
4661
4662 if (copy < 0)
4663 return;
4664 if (end - start < copy)
4665 copy = end - start;
4666 nskb = alloc_skb(copy + header, GFP_ATOMIC);
4667 if (!nskb)
4668 return;
4669
4670 skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
4671 skb_set_network_header(nskb, (skb_network_header(skb) -
4672 skb->head));
4673 skb_set_transport_header(nskb, (skb_transport_header(skb) -
4674 skb->head));
4675 skb_reserve(nskb, header);
4676 memcpy(nskb->head, skb->head, header);
4677 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
4678 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
4679 __skb_queue_before(list, skb, nskb);
4680 skb_set_owner_r(nskb, sk);
4681
4682
4683 while (copy > 0) {
4684 int offset = start - TCP_SKB_CB(skb)->seq;
4685 int size = TCP_SKB_CB(skb)->end_seq - start;
4686
4687 BUG_ON(offset < 0);
4688 if (size > 0) {
4689 size = min(copy, size);
4690 if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
4691 BUG();
4692 TCP_SKB_CB(nskb)->end_seq += size;
4693 copy -= size;
4694 start += size;
4695 }
4696 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4697 skb = tcp_collapse_one(sk, skb, list);
4698 if (!skb ||
4699 skb == tail ||
4700 tcp_hdr(skb)->syn ||
4701 tcp_hdr(skb)->fin)
4702 return;
4703 }
4704 }
4705 }
4706}
4707
4708
4709
4710
4711static void tcp_collapse_ofo_queue(struct sock *sk)
4712{
4713 struct tcp_sock *tp = tcp_sk(sk);
4714 struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
4715 struct sk_buff *head;
4716 u32 start, end;
4717
4718 if (skb == NULL)
4719 return;
4720
4721 start = TCP_SKB_CB(skb)->seq;
4722 end = TCP_SKB_CB(skb)->end_seq;
4723 head = skb;
4724
4725 for (;;) {
4726 struct sk_buff *next = NULL;
4727
4728 if (!skb_queue_is_last(&tp->out_of_order_queue, skb))
4729 next = skb_queue_next(&tp->out_of_order_queue, skb);
4730 skb = next;
4731
4732
4733
4734 if (!skb ||
4735 after(TCP_SKB_CB(skb)->seq, end) ||
4736 before(TCP_SKB_CB(skb)->end_seq, start)) {
4737 tcp_collapse(sk, &tp->out_of_order_queue,
4738 head, skb, start, end);
4739 head = skb;
4740 if (!skb)
4741 break;
4742
4743 start = TCP_SKB_CB(skb)->seq;
4744 end = TCP_SKB_CB(skb)->end_seq;
4745 } else {
4746 if (before(TCP_SKB_CB(skb)->seq, start))
4747 start = TCP_SKB_CB(skb)->seq;
4748 if (after(TCP_SKB_CB(skb)->end_seq, end))
4749 end = TCP_SKB_CB(skb)->end_seq;
4750 }
4751 }
4752}
4753
4754
4755
4756
4757
4758static int tcp_prune_ofo_queue(struct sock *sk)
4759{
4760 struct tcp_sock *tp = tcp_sk(sk);
4761 int res = 0;
4762
4763 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4764 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED);
4765 __skb_queue_purge(&tp->out_of_order_queue);
4766
4767
4768
4769
4770
4771
4772 if (tp->rx_opt.sack_ok)
4773 tcp_sack_reset(&tp->rx_opt);
4774 sk_mem_reclaim(sk);
4775 res = 1;
4776 }
4777 return res;
4778}
4779
4780
4781
4782
4783
4784
4785
4786
4787static int tcp_prune_queue(struct sock *sk)
4788{
4789 struct tcp_sock *tp = tcp_sk(sk);
4790
4791 SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
4792
4793 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PRUNECALLED);
4794
4795 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
4796 tcp_clamp_window(sk);
4797 else if (tcp_memory_pressure)
4798 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
4799
4800 tcp_collapse_ofo_queue(sk);
4801 if (!skb_queue_empty(&sk->sk_receive_queue))
4802 tcp_collapse(sk, &sk->sk_receive_queue,
4803 skb_peek(&sk->sk_receive_queue),
4804 NULL,
4805 tp->copied_seq, tp->rcv_nxt);
4806 sk_mem_reclaim(sk);
4807
4808 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4809 return 0;
4810
4811
4812
4813
4814 tcp_prune_ofo_queue(sk);
4815
4816 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4817 return 0;
4818
4819
4820
4821
4822
4823 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_RCVPRUNED);
4824
4825
4826 tp->pred_flags = 0;
4827 return -1;
4828}
4829
4830
4831
4832
4833
4834void tcp_cwnd_application_limited(struct sock *sk)
4835{
4836 struct tcp_sock *tp = tcp_sk(sk);
4837
4838 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
4839 sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
4840
4841 u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
4842 u32 win_used = max(tp->snd_cwnd_used, init_win);
4843 if (win_used < tp->snd_cwnd) {
4844 tp->snd_ssthresh = tcp_current_ssthresh(sk);
4845 tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
4846 }
4847 tp->snd_cwnd_used = 0;
4848 }
4849 tp->snd_cwnd_stamp = tcp_time_stamp;
4850}
4851
4852static int tcp_should_expand_sndbuf(struct sock *sk)
4853{
4854 struct tcp_sock *tp = tcp_sk(sk);
4855
4856
4857
4858
4859 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
4860 return 0;
4861
4862
4863 if (tcp_memory_pressure)
4864 return 0;
4865
4866
4867 if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
4868 return 0;
4869
4870
4871 if (tp->packets_out >= tp->snd_cwnd)
4872 return 0;
4873
4874 return 1;
4875}
4876
4877
4878
4879
4880
4881
4882
4883static void tcp_new_space(struct sock *sk)
4884{
4885 struct tcp_sock *tp = tcp_sk(sk);
4886
4887 if (tcp_should_expand_sndbuf(sk)) {
4888 int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
4889 MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
4890 int demanded = max_t(unsigned int, tp->snd_cwnd,
4891 tp->reordering + 1);
4892 sndmem *= 2 * demanded;
4893 if (sndmem > sk->sk_sndbuf)
4894 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
4895 tp->snd_cwnd_stamp = tcp_time_stamp;
4896 }
4897
4898 sk->sk_write_space(sk);
4899}
4900
4901static void tcp_check_space(struct sock *sk)
4902{
4903 if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
4904 sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
4905 if (sk->sk_socket &&
4906 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
4907 tcp_new_space(sk);
4908 }
4909}
4910
4911static inline void tcp_data_snd_check(struct sock *sk)
4912{
4913 tcp_push_pending_frames(sk);
4914 tcp_check_space(sk);
4915}
4916
4917
4918
4919
4920static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
4921{
4922 struct tcp_sock *tp = tcp_sk(sk);
4923
4924
4925 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
4926
4927
4928
4929 __tcp_select_window(sk) >= tp->rcv_wnd) ||
4930
4931 tcp_in_quickack_mode(sk) ||
4932
4933 (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
4934
4935 tcp_send_ack(sk);
4936 } else {
4937
4938 tcp_send_delayed_ack(sk);
4939 }
4940}
4941
4942static inline void tcp_ack_snd_check(struct sock *sk)
4943{
4944 if (!inet_csk_ack_scheduled(sk)) {
4945
4946 return;
4947 }
4948 __tcp_ack_snd_check(sk, 1);
4949}
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961static void tcp_check_urg(struct sock *sk, struct tcphdr *th)
4962{
4963 struct tcp_sock *tp = tcp_sk(sk);
4964 u32 ptr = ntohs(th->urg_ptr);
4965
4966 if (ptr && !sysctl_tcp_stdurg)
4967 ptr--;
4968 ptr += ntohl(th->seq);
4969
4970
4971 if (after(tp->copied_seq, ptr))
4972 return;
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984 if (before(ptr, tp->rcv_nxt))
4985 return;
4986
4987
4988 if (tp->urg_data && !after(ptr, tp->urg_seq))
4989 return;
4990
4991
4992 sk_send_sigurg(sk);
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
5010 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
5011 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
5012 tp->copied_seq++;
5013 if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
5014 __skb_unlink(skb, &sk->sk_receive_queue);
5015 __kfree_skb(skb);
5016 }
5017 }
5018
5019 tp->urg_data = TCP_URG_NOTYET;
5020 tp->urg_seq = ptr;
5021
5022
5023 tp->pred_flags = 0;
5024}
5025
5026
5027static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
5028{
5029 struct tcp_sock *tp = tcp_sk(sk);
5030
5031
5032 if (th->urg)
5033 tcp_check_urg(sk, th);
5034
5035
5036 if (tp->urg_data == TCP_URG_NOTYET) {
5037 u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
5038 th->syn;
5039
5040
5041 if (ptr < skb->len) {
5042 u8 tmp;
5043 if (skb_copy_bits(skb, ptr, &tmp, 1))
5044 BUG();
5045 tp->urg_data = TCP_URG_VALID | tmp;
5046 if (!sock_flag(sk, SOCK_DEAD))
5047 sk->sk_data_ready(sk, 0);
5048 }
5049 }
5050}
5051
5052static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
5053{
5054 struct tcp_sock *tp = tcp_sk(sk);
5055 int chunk = skb->len - hlen;
5056 int err;
5057
5058 local_bh_enable();
5059 if (skb_csum_unnecessary(skb))
5060 err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk);
5061 else
5062 err = skb_copy_and_csum_datagram_iovec(skb, hlen,
5063 tp->ucopy.iov);
5064
5065 if (!err) {
5066 tp->ucopy.len -= chunk;
5067 tp->copied_seq += chunk;
5068 tcp_rcv_space_adjust(sk);
5069 }
5070
5071 local_bh_disable();
5072 return err;
5073}
5074
5075static __sum16 __tcp_checksum_complete_user(struct sock *sk,
5076 struct sk_buff *skb)
5077{
5078 __sum16 result;
5079
5080 if (sock_owned_by_user(sk)) {
5081 local_bh_enable();
5082 result = __tcp_checksum_complete(skb);
5083 local_bh_disable();
5084 } else {
5085 result = __tcp_checksum_complete(skb);
5086 }
5087 return result;
5088}
5089
5090static inline int tcp_checksum_complete_user(struct sock *sk,
5091 struct sk_buff *skb)
5092{
5093 return !skb_csum_unnecessary(skb) &&
5094 __tcp_checksum_complete_user(sk, skb);
5095}
5096
5097#ifdef CONFIG_NET_DMA
5098static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
5099 int hlen)
5100{
5101 struct tcp_sock *tp = tcp_sk(sk);
5102 int chunk = skb->len - hlen;
5103 int dma_cookie;
5104 int copied_early = 0;
5105
5106 if (tp->ucopy.wakeup)
5107 return 0;
5108
5109 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
5110 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
5111
5112 if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
5113
5114 dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
5115 skb, hlen,
5116 tp->ucopy.iov, chunk,
5117 tp->ucopy.pinned_list);
5118
5119 if (dma_cookie < 0)
5120 goto out;
5121
5122 tp->ucopy.dma_cookie = dma_cookie;
5123 copied_early = 1;
5124
5125 tp->ucopy.len -= chunk;
5126 tp->copied_seq += chunk;
5127 tcp_rcv_space_adjust(sk);
5128
5129 if ((tp->ucopy.len == 0) ||
5130 (tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) ||
5131 (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) {
5132 tp->ucopy.wakeup = 1;
5133 sk->sk_data_ready(sk, 0);
5134 }
5135 } else if (chunk > 0) {
5136 tp->ucopy.wakeup = 1;
5137 sk->sk_data_ready(sk, 0);
5138 }
5139out:
5140 return copied_early;
5141}
5142#endif
5143
5144
5145
5146
5147static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5148 struct tcphdr *th, int syn_inerr)
5149{
5150 u8 *hash_location;
5151 struct tcp_sock *tp = tcp_sk(sk);
5152
5153
5154 if (tcp_fast_parse_options(skb, th, tp, &hash_location) &&
5155 tp->rx_opt.saw_tstamp &&
5156 tcp_paws_discard(sk, skb)) {
5157 if (!th->rst) {
5158 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
5159 tcp_send_dupack(sk, skb);
5160 goto discard;
5161 }
5162
5163 }
5164
5165
5166 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
5167
5168
5169
5170
5171
5172
5173 if (!th->rst)
5174 tcp_send_dupack(sk, skb);
5175 goto discard;
5176 }
5177
5178
5179 if (th->rst) {
5180 tcp_reset(sk);
5181 goto discard;
5182 }
5183
5184
5185
5186
5187 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
5188
5189
5190
5191
5192 if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
5193 if (syn_inerr)
5194 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5195 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
5196 tcp_reset(sk);
5197 return -1;
5198 }
5199
5200 return 1;
5201
5202discard:
5203 __kfree_skb(skb);
5204 return 0;
5205}
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5231 struct tcphdr *th, unsigned len)
5232{
5233 struct tcp_sock *tp = tcp_sk(sk);
5234 int res;
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251 tp->rx_opt.saw_tstamp = 0;
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
5263 TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
5264 !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
5265 int tcp_header_len = tp->tcp_header_len;
5266
5267
5268
5269
5270
5271
5272
5273 if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
5274
5275 if (!tcp_parse_aligned_timestamp(tp, th))
5276 goto slow_path;
5277
5278
5279 if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
5280 goto slow_path;
5281
5282
5283
5284
5285
5286
5287 }
5288
5289 if (len <= tcp_header_len) {
5290
5291 if (len == tcp_header_len) {
5292
5293
5294
5295
5296 if (tcp_header_len ==
5297 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5298 tp->rcv_nxt == tp->rcv_wup)
5299 tcp_store_ts_recent(tp);
5300
5301
5302
5303
5304 tcp_ack(sk, skb, 0);
5305 __kfree_skb(skb);
5306 tcp_data_snd_check(sk);
5307 return 0;
5308 } else {
5309 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5310 goto discard;
5311 }
5312 } else {
5313 int eaten = 0;
5314 int copied_early = 0;
5315
5316 if (tp->copied_seq == tp->rcv_nxt &&
5317 len - tcp_header_len <= tp->ucopy.len) {
5318#ifdef CONFIG_NET_DMA
5319 if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
5320 copied_early = 1;
5321 eaten = 1;
5322 }
5323#endif
5324 if (tp->ucopy.task == current &&
5325 sock_owned_by_user(sk) && !copied_early) {
5326 __set_current_state(TASK_RUNNING);
5327
5328 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len))
5329 eaten = 1;
5330 }
5331 if (eaten) {
5332
5333
5334
5335
5336 if (tcp_header_len ==
5337 (sizeof(struct tcphdr) +
5338 TCPOLEN_TSTAMP_ALIGNED) &&
5339 tp->rcv_nxt == tp->rcv_wup)
5340 tcp_store_ts_recent(tp);
5341
5342 tcp_rcv_rtt_measure_ts(sk, skb);
5343
5344 __skb_pull(skb, tcp_header_len);
5345 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
5346 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
5347 }
5348 if (copied_early)
5349 tcp_cleanup_rbuf(sk, skb->len);
5350 }
5351 if (!eaten) {
5352 if (tcp_checksum_complete_user(sk, skb))
5353 goto csum_error;
5354
5355
5356
5357
5358
5359 if (tcp_header_len ==
5360 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5361 tp->rcv_nxt == tp->rcv_wup)
5362 tcp_store_ts_recent(tp);
5363
5364 tcp_rcv_rtt_measure_ts(sk, skb);
5365
5366 if ((int)skb->truesize > sk->sk_forward_alloc)
5367 goto step5;
5368
5369 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
5370
5371
5372 __skb_pull(skb, tcp_header_len);
5373 __skb_queue_tail(&sk->sk_receive_queue, skb);
5374 skb_set_owner_r(skb, sk);
5375 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
5376 }
5377
5378 tcp_event_data_recv(sk, skb);
5379
5380 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
5381
5382 tcp_ack(sk, skb, FLAG_DATA);
5383 tcp_data_snd_check(sk);
5384 if (!inet_csk_ack_scheduled(sk))
5385 goto no_ack;
5386 }
5387
5388 if (!copied_early || tp->rcv_nxt != tp->rcv_wup)
5389 __tcp_ack_snd_check(sk, 0);
5390no_ack:
5391#ifdef CONFIG_NET_DMA
5392 if (copied_early)
5393 __skb_queue_tail(&sk->sk_async_wait_queue, skb);
5394 else
5395#endif
5396 if (eaten)
5397 __kfree_skb(skb);
5398 else
5399 sk->sk_data_ready(sk, 0);
5400 return 0;
5401 }
5402 }
5403
5404slow_path:
5405 if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
5406 goto csum_error;
5407
5408
5409
5410
5411
5412 res = tcp_validate_incoming(sk, skb, th, 1);
5413 if (res <= 0)
5414 return -res;
5415
5416step5:
5417 if (th->ack && tcp_ack(sk, skb, FLAG_SLOWPATH) < 0)
5418 goto discard;
5419
5420 tcp_rcv_rtt_measure_ts(sk, skb);
5421
5422
5423 tcp_urg(sk, skb, th);
5424
5425
5426 tcp_data_queue(sk, skb);
5427
5428 tcp_data_snd_check(sk);
5429 tcp_ack_snd_check(sk);
5430 return 0;
5431
5432csum_error:
5433 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5434
5435discard:
5436 __kfree_skb(skb);
5437 return 0;
5438}
5439EXPORT_SYMBOL(tcp_rcv_established);
5440
5441static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5442 struct tcphdr *th, unsigned len)
5443{
5444 u8 *hash_location;
5445 struct inet_connection_sock *icsk = inet_csk(sk);
5446 struct tcp_sock *tp = tcp_sk(sk);
5447 struct tcp_cookie_values *cvp = tp->cookie_values;
5448 int saved_clamp = tp->rx_opt.mss_clamp;
5449
5450 tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0);
5451
5452 if (th->ack) {
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
5465 goto reset_and_undo;
5466
5467 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
5468 !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
5469 tcp_time_stamp)) {
5470 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED);
5471 goto reset_and_undo;
5472 }
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482 if (th->rst) {
5483 tcp_reset(sk);
5484 goto discard;
5485 }
5486
5487
5488
5489
5490
5491
5492
5493
5494 if (!th->syn)
5495 goto discard_and_undo;
5496
5497
5498
5499
5500
5501
5502
5503
5504 TCP_ECN_rcv_synack(tp, th);
5505
5506 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
5507 tcp_ack(sk, skb, FLAG_SLOWPATH);
5508
5509
5510
5511
5512 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5513 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5514
5515
5516
5517
5518 tp->snd_wnd = ntohs(th->window);
5519 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5520
5521 if (!tp->rx_opt.wscale_ok) {
5522 tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
5523 tp->window_clamp = min(tp->window_clamp, 65535U);
5524 }
5525
5526 if (tp->rx_opt.saw_tstamp) {
5527 tp->rx_opt.tstamp_ok = 1;
5528 tp->tcp_header_len =
5529 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5530 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5531 tcp_store_ts_recent(tp);
5532 } else {
5533 tp->tcp_header_len = sizeof(struct tcphdr);
5534 }
5535
5536 if (tcp_is_sack(tp) && sysctl_tcp_fack)
5537 tcp_enable_fack(tp);
5538
5539 tcp_mtup_init(sk);
5540 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5541 tcp_initialize_rcv_mss(sk);
5542
5543
5544
5545
5546 tp->copied_seq = tp->rcv_nxt;
5547
5548 if (cvp != NULL &&
5549 cvp->cookie_pair_size > 0 &&
5550 tp->rx_opt.cookie_plus > 0) {
5551 int cookie_size = tp->rx_opt.cookie_plus
5552 - TCPOLEN_COOKIE_BASE;
5553 int cookie_pair_size = cookie_size
5554 + cvp->cookie_desired;
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565 if (sizeof(cvp->cookie_pair) >= cookie_pair_size) {
5566 memcpy(&cvp->cookie_pair[cvp->cookie_desired],
5567 hash_location, cookie_size);
5568 cvp->cookie_pair_size = cookie_pair_size;
5569 }
5570 }
5571
5572 smp_mb();
5573 tcp_set_state(sk, TCP_ESTABLISHED);
5574
5575 security_inet_conn_established(sk, skb);
5576
5577
5578 icsk->icsk_af_ops->rebuild_header(sk);
5579
5580 tcp_init_metrics(sk);
5581
5582 tcp_init_congestion_control(sk);
5583
5584
5585
5586
5587 tp->lsndtime = tcp_time_stamp;
5588
5589 tcp_init_buffer_space(sk);
5590
5591 if (sock_flag(sk, SOCK_KEEPOPEN))
5592 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
5593
5594 if (!tp->rx_opt.snd_wscale)
5595 __tcp_fast_path_on(tp, tp->snd_wnd);
5596 else
5597 tp->pred_flags = 0;
5598
5599 if (!sock_flag(sk, SOCK_DEAD)) {
5600 sk->sk_state_change(sk);
5601 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
5602 }
5603
5604 if (sk->sk_write_pending ||
5605 icsk->icsk_accept_queue.rskq_defer_accept ||
5606 icsk->icsk_ack.pingpong) {
5607
5608
5609
5610
5611
5612
5613
5614 inet_csk_schedule_ack(sk);
5615 icsk->icsk_ack.lrcvtime = tcp_time_stamp;
5616 icsk->icsk_ack.ato = TCP_ATO_MIN;
5617 tcp_incr_quickack(sk);
5618 tcp_enter_quickack_mode(sk);
5619 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
5620 TCP_DELACK_MAX, TCP_RTO_MAX);
5621
5622discard:
5623 __kfree_skb(skb);
5624 return 0;
5625 } else {
5626 tcp_send_ack(sk);
5627 }
5628 return -1;
5629 }
5630
5631
5632
5633 if (th->rst) {
5634
5635
5636
5637
5638
5639
5640 goto discard_and_undo;
5641 }
5642
5643
5644 if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
5645 tcp_paws_reject(&tp->rx_opt, 0))
5646 goto discard_and_undo;
5647
5648 if (th->syn) {
5649
5650
5651
5652
5653 tcp_set_state(sk, TCP_SYN_RECV);
5654
5655 if (tp->rx_opt.saw_tstamp) {
5656 tp->rx_opt.tstamp_ok = 1;
5657 tcp_store_ts_recent(tp);
5658 tp->tcp_header_len =
5659 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5660 } else {
5661 tp->tcp_header_len = sizeof(struct tcphdr);
5662 }
5663
5664 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5665 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5666
5667
5668
5669
5670 tp->snd_wnd = ntohs(th->window);
5671 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
5672 tp->max_window = tp->snd_wnd;
5673
5674 TCP_ECN_rcv_syn(tp, th);
5675
5676 tcp_mtup_init(sk);
5677 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5678 tcp_initialize_rcv_mss(sk);
5679
5680 tcp_send_synack(sk);
5681#if 0
5682
5683
5684
5685
5686
5687
5688
5689
5690
5691 return -1;
5692#else
5693 goto discard;
5694#endif
5695 }
5696
5697
5698
5699
5700discard_and_undo:
5701 tcp_clear_options(&tp->rx_opt);
5702 tp->rx_opt.mss_clamp = saved_clamp;
5703 goto discard;
5704
5705reset_and_undo:
5706 tcp_clear_options(&tp->rx_opt);
5707 tp->rx_opt.mss_clamp = saved_clamp;
5708 return 1;
5709}
5710
5711
5712
5713
5714
5715
5716
5717
5718int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5719 struct tcphdr *th, unsigned len)
5720{
5721 struct tcp_sock *tp = tcp_sk(sk);
5722 struct inet_connection_sock *icsk = inet_csk(sk);
5723 int queued = 0;
5724 int res;
5725
5726 tp->rx_opt.saw_tstamp = 0;
5727
5728 switch (sk->sk_state) {
5729 case TCP_CLOSE:
5730 goto discard;
5731
5732 case TCP_LISTEN:
5733 if (th->ack)
5734 return 1;
5735
5736 if (th->rst)
5737 goto discard;
5738
5739 if (th->syn) {
5740 if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
5741 return 1;
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760 kfree_skb(skb);
5761 return 0;
5762 }
5763 goto discard;
5764
5765 case TCP_SYN_SENT:
5766 queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
5767 if (queued >= 0)
5768 return queued;
5769
5770
5771 tcp_urg(sk, skb, th);
5772 __kfree_skb(skb);
5773 tcp_data_snd_check(sk);
5774 return 0;
5775 }
5776
5777 res = tcp_validate_incoming(sk, skb, th, 0);
5778 if (res <= 0)
5779 return -res;
5780
5781
5782 if (th->ack) {
5783 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0;
5784
5785 switch (sk->sk_state) {
5786 case TCP_SYN_RECV:
5787 if (acceptable) {
5788 tp->copied_seq = tp->rcv_nxt;
5789 smp_mb();
5790 tcp_set_state(sk, TCP_ESTABLISHED);
5791 sk->sk_state_change(sk);
5792
5793
5794
5795
5796
5797
5798 if (sk->sk_socket)
5799 sk_wake_async(sk,
5800 SOCK_WAKE_IO, POLL_OUT);
5801
5802 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
5803 tp->snd_wnd = ntohs(th->window) <<
5804 tp->rx_opt.snd_wscale;
5805 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5806
5807
5808
5809
5810
5811 tcp_ack_update_rtt(sk, 0, 0);
5812
5813 if (tp->rx_opt.tstamp_ok)
5814 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5815
5816
5817
5818
5819 icsk->icsk_af_ops->rebuild_header(sk);
5820
5821 tcp_init_metrics(sk);
5822
5823 tcp_init_congestion_control(sk);
5824
5825
5826
5827
5828 tp->lsndtime = tcp_time_stamp;
5829
5830 tcp_mtup_init(sk);
5831 tcp_initialize_rcv_mss(sk);
5832 tcp_init_buffer_space(sk);
5833 tcp_fast_path_on(tp);
5834 } else {
5835 return 1;
5836 }
5837 break;
5838
5839 case TCP_FIN_WAIT1:
5840 if (tp->snd_una == tp->write_seq) {
5841 tcp_set_state(sk, TCP_FIN_WAIT2);
5842 sk->sk_shutdown |= SEND_SHUTDOWN;
5843 dst_confirm(__sk_dst_get(sk));
5844
5845 if (!sock_flag(sk, SOCK_DEAD))
5846
5847 sk->sk_state_change(sk);
5848 else {
5849 int tmo;
5850
5851 if (tp->linger2 < 0 ||
5852 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5853 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
5854 tcp_done(sk);
5855 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5856 return 1;
5857 }
5858
5859 tmo = tcp_fin_time(sk);
5860 if (tmo > TCP_TIMEWAIT_LEN) {
5861 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
5862 } else if (th->fin || sock_owned_by_user(sk)) {
5863
5864
5865
5866
5867
5868
5869 inet_csk_reset_keepalive_timer(sk, tmo);
5870 } else {
5871 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
5872 goto discard;
5873 }
5874 }
5875 }
5876 break;
5877
5878 case TCP_CLOSING:
5879 if (tp->snd_una == tp->write_seq) {
5880 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
5881 goto discard;
5882 }
5883 break;
5884
5885 case TCP_LAST_ACK:
5886 if (tp->snd_una == tp->write_seq) {
5887 tcp_update_metrics(sk);
5888 tcp_done(sk);
5889 goto discard;
5890 }
5891 break;
5892 }
5893 } else
5894 goto discard;
5895
5896
5897 tcp_urg(sk, skb, th);
5898
5899
5900 switch (sk->sk_state) {
5901 case TCP_CLOSE_WAIT:
5902 case TCP_CLOSING:
5903 case TCP_LAST_ACK:
5904 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
5905 break;
5906 case TCP_FIN_WAIT1:
5907 case TCP_FIN_WAIT2:
5908
5909
5910
5911
5912 if (sk->sk_shutdown & RCV_SHUTDOWN) {
5913 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5914 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
5915 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5916 tcp_reset(sk);
5917 return 1;
5918 }
5919 }
5920
5921 case TCP_ESTABLISHED:
5922 tcp_data_queue(sk, skb);
5923 queued = 1;
5924 break;
5925 }
5926
5927
5928 if (sk->sk_state != TCP_CLOSE) {
5929 tcp_data_snd_check(sk);
5930 tcp_ack_snd_check(sk);
5931 }
5932
5933 if (!queued) {
5934discard:
5935 __kfree_skb(skb);
5936 }
5937 return 0;
5938}
5939EXPORT_SYMBOL(tcp_rcv_state_process);
5940