1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64#include <linux/mm.h>
65#include <linux/slab.h>
66#include <linux/module.h>
67#include <linux/sysctl.h>
68#include <linux/kernel.h>
69#include <net/dst.h>
70#include <net/tcp.h>
71#include <net/inet_common.h>
72#include <linux/ipsec.h>
73#include <asm/unaligned.h>
74#include <net/netdma.h>
75
76int sysctl_tcp_timestamps __read_mostly = 1;
77int sysctl_tcp_window_scaling __read_mostly = 1;
78int sysctl_tcp_sack __read_mostly = 1;
79int sysctl_tcp_fack __read_mostly = 1;
80int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
81EXPORT_SYMBOL(sysctl_tcp_reordering);
82int sysctl_tcp_ecn __read_mostly = 2;
83EXPORT_SYMBOL(sysctl_tcp_ecn);
84int sysctl_tcp_dsack __read_mostly = 1;
85int sysctl_tcp_app_win __read_mostly = 31;
86int sysctl_tcp_adv_win_scale __read_mostly = 2;
87EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
88
89int sysctl_tcp_stdurg __read_mostly;
90int sysctl_tcp_rfc1337 __read_mostly;
91int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
92int sysctl_tcp_frto __read_mostly = 2;
93int sysctl_tcp_frto_response __read_mostly;
94int sysctl_tcp_nometrics_save __read_mostly;
95
96int sysctl_tcp_thin_dupack __read_mostly;
97
98int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
99int sysctl_tcp_abc __read_mostly;
100
101#define FLAG_DATA 0x01
102#define FLAG_WIN_UPDATE 0x02
103#define FLAG_DATA_ACKED 0x04
104#define FLAG_RETRANS_DATA_ACKED 0x08
105#define FLAG_SYN_ACKED 0x10
106#define FLAG_DATA_SACKED 0x20
107#define FLAG_ECE 0x40
108#define FLAG_DATA_LOST 0x80
109#define FLAG_SLOWPATH 0x100
110#define FLAG_ONLY_ORIG_SACKED 0x200
111#define FLAG_SND_UNA_ADVANCED 0x400
112#define FLAG_DSACKING_ACK 0x800
113#define FLAG_NONHEAD_RETRANS_ACKED 0x1000
114#define FLAG_SACK_RENEGING 0x2000
115
116#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
117#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
118#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
119#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
120#define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED)
121
122#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
123#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
124
125
126
127
128static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
129{
130 struct inet_connection_sock *icsk = inet_csk(sk);
131 const unsigned int lss = icsk->icsk_ack.last_seg_size;
132 unsigned int len;
133
134 icsk->icsk_ack.last_seg_size = 0;
135
136
137
138
139 len = skb_shinfo(skb)->gso_size ? : skb->len;
140 if (len >= icsk->icsk_ack.rcv_mss) {
141 icsk->icsk_ack.rcv_mss = len;
142 } else {
143
144
145
146
147
148 len += skb->data - skb_transport_header(skb);
149 if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
150
151
152
153
154
155 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
156 !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
157
158
159
160
161 len -= tcp_sk(sk)->tcp_header_len;
162 icsk->icsk_ack.last_seg_size = len;
163 if (len == lss) {
164 icsk->icsk_ack.rcv_mss = len;
165 return;
166 }
167 }
168 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
169 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
170 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
171 }
172}
173
174static void tcp_incr_quickack(struct sock *sk)
175{
176 struct inet_connection_sock *icsk = inet_csk(sk);
177 unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
178
179 if (quickacks == 0)
180 quickacks = 2;
181 if (quickacks > icsk->icsk_ack.quick)
182 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
183}
184
185static void tcp_enter_quickack_mode(struct sock *sk)
186{
187 struct inet_connection_sock *icsk = inet_csk(sk);
188 tcp_incr_quickack(sk);
189 icsk->icsk_ack.pingpong = 0;
190 icsk->icsk_ack.ato = TCP_ATO_MIN;
191}
192
193
194
195
196
197static inline int tcp_in_quickack_mode(const struct sock *sk)
198{
199 const struct inet_connection_sock *icsk = inet_csk(sk);
200 return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong;
201}
202
203static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
204{
205 if (tp->ecn_flags & TCP_ECN_OK)
206 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
207}
208
209static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb)
210{
211 if (tcp_hdr(skb)->cwr)
212 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
213}
214
215static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)
216{
217 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
218}
219
220static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
221{
222 if (!(tp->ecn_flags & TCP_ECN_OK))
223 return;
224
225 switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
226 case INET_ECN_NOT_ECT:
227
228
229
230
231 if (tp->ecn_flags & TCP_ECN_SEEN)
232 tcp_enter_quickack_mode((struct sock *)tp);
233 break;
234 case INET_ECN_CE:
235 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
236
237 default:
238 tp->ecn_flags |= TCP_ECN_SEEN;
239 }
240}
241
242static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
243{
244 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
245 tp->ecn_flags &= ~TCP_ECN_OK;
246}
247
248static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
249{
250 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
251 tp->ecn_flags &= ~TCP_ECN_OK;
252}
253
254static inline int TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
255{
256 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
257 return 1;
258 return 0;
259}
260
261
262
263
264
265
266static void tcp_fixup_sndbuf(struct sock *sk)
267{
268 int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER);
269
270 sndmem *= TCP_INIT_CWND;
271 if (sk->sk_sndbuf < sndmem)
272 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
273}
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
302{
303 struct tcp_sock *tp = tcp_sk(sk);
304
305 int truesize = tcp_win_from_space(skb->truesize) >> 1;
306 int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
307
308 while (tp->rcv_ssthresh <= window) {
309 if (truesize <= skb->len)
310 return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
311
312 truesize >>= 1;
313 window >>= 1;
314 }
315 return 0;
316}
317
318static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
319{
320 struct tcp_sock *tp = tcp_sk(sk);
321
322
323 if (tp->rcv_ssthresh < tp->window_clamp &&
324 (int)tp->rcv_ssthresh < tcp_space(sk) &&
325 !tcp_memory_pressure) {
326 int incr;
327
328
329
330
331 if (tcp_win_from_space(skb->truesize) <= skb->len)
332 incr = 2 * tp->advmss;
333 else
334 incr = __tcp_grow_window(sk, skb);
335
336 if (incr) {
337 tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
338 tp->window_clamp);
339 inet_csk(sk)->icsk_ack.quick |= 1;
340 }
341 }
342}
343
344
345
346static void tcp_fixup_rcvbuf(struct sock *sk)
347{
348 u32 mss = tcp_sk(sk)->advmss;
349 u32 icwnd = TCP_DEFAULT_INIT_RCVWND;
350 int rcvmem;
351
352
353
354
355 if (mss > 1460)
356 icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
357
358 rcvmem = SKB_TRUESIZE(mss + MAX_TCP_HEADER);
359 while (tcp_win_from_space(rcvmem) < mss)
360 rcvmem += 128;
361
362 rcvmem *= icwnd;
363
364 if (sk->sk_rcvbuf < rcvmem)
365 sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
366}
367
368
369
370
371static void tcp_init_buffer_space(struct sock *sk)
372{
373 struct tcp_sock *tp = tcp_sk(sk);
374 int maxwin;
375
376 if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
377 tcp_fixup_rcvbuf(sk);
378 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
379 tcp_fixup_sndbuf(sk);
380
381 tp->rcvq_space.space = tp->rcv_wnd;
382
383 maxwin = tcp_full_space(sk);
384
385 if (tp->window_clamp >= maxwin) {
386 tp->window_clamp = maxwin;
387
388 if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
389 tp->window_clamp = max(maxwin -
390 (maxwin >> sysctl_tcp_app_win),
391 4 * tp->advmss);
392 }
393
394
395 if (sysctl_tcp_app_win &&
396 tp->window_clamp > 2 * tp->advmss &&
397 tp->window_clamp + tp->advmss > maxwin)
398 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
399
400 tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
401 tp->snd_cwnd_stamp = tcp_time_stamp;
402}
403
404
405static void tcp_clamp_window(struct sock *sk)
406{
407 struct tcp_sock *tp = tcp_sk(sk);
408 struct inet_connection_sock *icsk = inet_csk(sk);
409
410 icsk->icsk_ack.quick = 0;
411
412 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
413 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
414 !tcp_memory_pressure &&
415 atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
416 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
417 sysctl_tcp_rmem[2]);
418 }
419 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
420 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
421}
422
423
424
425
426
427
428
429
430void tcp_initialize_rcv_mss(struct sock *sk)
431{
432 const struct tcp_sock *tp = tcp_sk(sk);
433 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
434
435 hint = min(hint, tp->rcv_wnd / 2);
436 hint = min(hint, TCP_MSS_DEFAULT);
437 hint = max(hint, TCP_MIN_MSS);
438
439 inet_csk(sk)->icsk_ack.rcv_mss = hint;
440}
441EXPORT_SYMBOL(tcp_initialize_rcv_mss);
442
443
444
445
446
447
448
449
450
451
452
453
454static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
455{
456 u32 new_sample = tp->rcv_rtt_est.rtt;
457 long m = sample;
458
459 if (m == 0)
460 m = 1;
461
462 if (new_sample != 0) {
463
464
465
466
467
468
469
470
471
472
473 if (!win_dep) {
474 m -= (new_sample >> 3);
475 new_sample += m;
476 } else if (m < new_sample)
477 new_sample = m << 3;
478 } else {
479
480 new_sample = m << 3;
481 }
482
483 if (tp->rcv_rtt_est.rtt != new_sample)
484 tp->rcv_rtt_est.rtt = new_sample;
485}
486
487static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
488{
489 if (tp->rcv_rtt_est.time == 0)
490 goto new_measure;
491 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
492 return;
493 tcp_rcv_rtt_update(tp, jiffies - tp->rcv_rtt_est.time, 1);
494
495new_measure:
496 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
497 tp->rcv_rtt_est.time = tcp_time_stamp;
498}
499
500static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
501 const struct sk_buff *skb)
502{
503 struct tcp_sock *tp = tcp_sk(sk);
504 if (tp->rx_opt.rcv_tsecr &&
505 (TCP_SKB_CB(skb)->end_seq -
506 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
507 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
508}
509
510
511
512
513
514void tcp_rcv_space_adjust(struct sock *sk)
515{
516 struct tcp_sock *tp = tcp_sk(sk);
517 int time;
518 int space;
519
520 if (tp->rcvq_space.time == 0)
521 goto new_measure;
522
523 time = tcp_time_stamp - tp->rcvq_space.time;
524 if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
525 return;
526
527 space = 2 * (tp->copied_seq - tp->rcvq_space.seq);
528
529 space = max(tp->rcvq_space.space, space);
530
531 if (tp->rcvq_space.space != space) {
532 int rcvmem;
533
534 tp->rcvq_space.space = space;
535
536 if (sysctl_tcp_moderate_rcvbuf &&
537 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
538 int new_clamp = space;
539
540
541
542
543
544 space /= tp->advmss;
545 if (!space)
546 space = 1;
547 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
548 while (tcp_win_from_space(rcvmem) < tp->advmss)
549 rcvmem += 128;
550 space *= rcvmem;
551 space = min(space, sysctl_tcp_rmem[2]);
552 if (space > sk->sk_rcvbuf) {
553 sk->sk_rcvbuf = space;
554
555
556 tp->window_clamp = new_clamp;
557 }
558 }
559 }
560
561new_measure:
562 tp->rcvq_space.seq = tp->copied_seq;
563 tp->rcvq_space.time = tcp_time_stamp;
564}
565
566
567
568
569
570
571
572
573
574
575
576static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
577{
578 struct tcp_sock *tp = tcp_sk(sk);
579 struct inet_connection_sock *icsk = inet_csk(sk);
580 u32 now;
581
582 inet_csk_schedule_ack(sk);
583
584 tcp_measure_rcv_mss(sk, skb);
585
586 tcp_rcv_rtt_measure(tp);
587
588 now = tcp_time_stamp;
589
590 if (!icsk->icsk_ack.ato) {
591
592
593
594 tcp_incr_quickack(sk);
595 icsk->icsk_ack.ato = TCP_ATO_MIN;
596 } else {
597 int m = now - icsk->icsk_ack.lrcvtime;
598
599 if (m <= TCP_ATO_MIN / 2) {
600
601 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
602 } else if (m < icsk->icsk_ack.ato) {
603 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
604 if (icsk->icsk_ack.ato > icsk->icsk_rto)
605 icsk->icsk_ack.ato = icsk->icsk_rto;
606 } else if (m > icsk->icsk_rto) {
607
608
609
610 tcp_incr_quickack(sk);
611 sk_mem_reclaim(sk);
612 }
613 }
614 icsk->icsk_ack.lrcvtime = now;
615
616 TCP_ECN_check_ce(tp, skb);
617
618 if (skb->len >= 128)
619 tcp_grow_window(sk, skb);
620}
621
622
623
624
625
626
627
628
629
630
631static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
632{
633 struct tcp_sock *tp = tcp_sk(sk);
634 long m = mrtt;
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652 if (m == 0)
653 m = 1;
654 if (tp->srtt != 0) {
655 m -= (tp->srtt >> 3);
656 tp->srtt += m;
657 if (m < 0) {
658 m = -m;
659 m -= (tp->mdev >> 2);
660
661
662
663
664
665
666
667
668 if (m > 0)
669 m >>= 3;
670 } else {
671 m -= (tp->mdev >> 2);
672 }
673 tp->mdev += m;
674 if (tp->mdev > tp->mdev_max) {
675 tp->mdev_max = tp->mdev;
676 if (tp->mdev_max > tp->rttvar)
677 tp->rttvar = tp->mdev_max;
678 }
679 if (after(tp->snd_una, tp->rtt_seq)) {
680 if (tp->mdev_max < tp->rttvar)
681 tp->rttvar -= (tp->rttvar - tp->mdev_max) >> 2;
682 tp->rtt_seq = tp->snd_nxt;
683 tp->mdev_max = tcp_rto_min(sk);
684 }
685 } else {
686
687 tp->srtt = m << 3;
688 tp->mdev = m << 1;
689 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
690 tp->rtt_seq = tp->snd_nxt;
691 }
692}
693
694
695
696
697static inline void tcp_set_rto(struct sock *sk)
698{
699 const struct tcp_sock *tp = tcp_sk(sk);
700
701
702
703
704
705
706
707
708
709
710 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
711
712
713
714
715
716
717
718
719
720
721 tcp_bound_rto(sk);
722}
723
724
725
726
727
728void tcp_update_metrics(struct sock *sk)
729{
730 struct tcp_sock *tp = tcp_sk(sk);
731 struct dst_entry *dst = __sk_dst_get(sk);
732
733 if (sysctl_tcp_nometrics_save)
734 return;
735
736 dst_confirm(dst);
737
738 if (dst && (dst->flags & DST_HOST)) {
739 const struct inet_connection_sock *icsk = inet_csk(sk);
740 int m;
741 unsigned long rtt;
742
743 if (icsk->icsk_backoff || !tp->srtt) {
744
745
746
747
748 if (!(dst_metric_locked(dst, RTAX_RTT)))
749 dst_metric_set(dst, RTAX_RTT, 0);
750 return;
751 }
752
753 rtt = dst_metric_rtt(dst, RTAX_RTT);
754 m = rtt - tp->srtt;
755
756
757
758
759
760 if (!(dst_metric_locked(dst, RTAX_RTT))) {
761 if (m <= 0)
762 set_dst_metric_rtt(dst, RTAX_RTT, tp->srtt);
763 else
764 set_dst_metric_rtt(dst, RTAX_RTT, rtt - (m >> 3));
765 }
766
767 if (!(dst_metric_locked(dst, RTAX_RTTVAR))) {
768 unsigned long var;
769 if (m < 0)
770 m = -m;
771
772
773 m >>= 1;
774 if (m < tp->mdev)
775 m = tp->mdev;
776
777 var = dst_metric_rtt(dst, RTAX_RTTVAR);
778 if (m >= var)
779 var = m;
780 else
781 var -= (var - m) >> 2;
782
783 set_dst_metric_rtt(dst, RTAX_RTTVAR, var);
784 }
785
786 if (tcp_in_initial_slowstart(tp)) {
787
788 if (dst_metric(dst, RTAX_SSTHRESH) &&
789 !dst_metric_locked(dst, RTAX_SSTHRESH) &&
790 (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH))
791 dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_cwnd >> 1);
792 if (!dst_metric_locked(dst, RTAX_CWND) &&
793 tp->snd_cwnd > dst_metric(dst, RTAX_CWND))
794 dst_metric_set(dst, RTAX_CWND, tp->snd_cwnd);
795 } else if (tp->snd_cwnd > tp->snd_ssthresh &&
796 icsk->icsk_ca_state == TCP_CA_Open) {
797
798 if (!dst_metric_locked(dst, RTAX_SSTHRESH))
799 dst_metric_set(dst, RTAX_SSTHRESH,
800 max(tp->snd_cwnd >> 1, tp->snd_ssthresh));
801 if (!dst_metric_locked(dst, RTAX_CWND))
802 dst_metric_set(dst, RTAX_CWND,
803 (dst_metric(dst, RTAX_CWND) +
804 tp->snd_cwnd) >> 1);
805 } else {
806
807
808
809 if (!dst_metric_locked(dst, RTAX_CWND))
810 dst_metric_set(dst, RTAX_CWND,
811 (dst_metric(dst, RTAX_CWND) +
812 tp->snd_ssthresh) >> 1);
813 if (dst_metric(dst, RTAX_SSTHRESH) &&
814 !dst_metric_locked(dst, RTAX_SSTHRESH) &&
815 tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH))
816 dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_ssthresh);
817 }
818
819 if (!dst_metric_locked(dst, RTAX_REORDERING)) {
820 if (dst_metric(dst, RTAX_REORDERING) < tp->reordering &&
821 tp->reordering != sysctl_tcp_reordering)
822 dst_metric_set(dst, RTAX_REORDERING, tp->reordering);
823 }
824 }
825}
826
827__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
828{
829 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
830
831 if (!cwnd)
832 cwnd = TCP_INIT_CWND;
833 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
834}
835
836
837void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
838{
839 struct tcp_sock *tp = tcp_sk(sk);
840 const struct inet_connection_sock *icsk = inet_csk(sk);
841
842 tp->prior_ssthresh = 0;
843 tp->bytes_acked = 0;
844 if (icsk->icsk_ca_state < TCP_CA_CWR) {
845 tp->undo_marker = 0;
846 if (set_ssthresh)
847 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
848 tp->snd_cwnd = min(tp->snd_cwnd,
849 tcp_packets_in_flight(tp) + 1U);
850 tp->snd_cwnd_cnt = 0;
851 tp->high_seq = tp->snd_nxt;
852 tp->snd_cwnd_stamp = tcp_time_stamp;
853 TCP_ECN_queue_cwr(tp);
854
855 tcp_set_ca_state(sk, TCP_CA_CWR);
856 }
857}
858
859
860
861
862
863static void tcp_disable_fack(struct tcp_sock *tp)
864{
865
866 if (tcp_is_fack(tp))
867 tp->lost_skb_hint = NULL;
868 tp->rx_opt.sack_ok &= ~2;
869}
870
871
872static void tcp_dsack_seen(struct tcp_sock *tp)
873{
874 tp->rx_opt.sack_ok |= 4;
875}
876
877
878
879static void tcp_init_metrics(struct sock *sk)
880{
881 struct tcp_sock *tp = tcp_sk(sk);
882 struct dst_entry *dst = __sk_dst_get(sk);
883
884 if (dst == NULL)
885 goto reset;
886
887 dst_confirm(dst);
888
889 if (dst_metric_locked(dst, RTAX_CWND))
890 tp->snd_cwnd_clamp = dst_metric(dst, RTAX_CWND);
891 if (dst_metric(dst, RTAX_SSTHRESH)) {
892 tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH);
893 if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
894 tp->snd_ssthresh = tp->snd_cwnd_clamp;
895 } else {
896
897
898
899 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
900 }
901 if (dst_metric(dst, RTAX_REORDERING) &&
902 tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
903 tcp_disable_fack(tp);
904 tp->reordering = dst_metric(dst, RTAX_REORDERING);
905 }
906
907 if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0)
908 goto reset;
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924 if (dst_metric_rtt(dst, RTAX_RTT) > tp->srtt) {
925 tp->srtt = dst_metric_rtt(dst, RTAX_RTT);
926 tp->rtt_seq = tp->snd_nxt;
927 }
928 if (dst_metric_rtt(dst, RTAX_RTTVAR) > tp->mdev) {
929 tp->mdev = dst_metric_rtt(dst, RTAX_RTTVAR);
930 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
931 }
932 tcp_set_rto(sk);
933reset:
934 if (tp->srtt == 0) {
935
936
937
938
939
940
941 tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK;
942 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;
943 }
944
945
946
947
948
949 if (tp->total_retrans > 1)
950 tp->snd_cwnd = 1;
951 else
952 tp->snd_cwnd = tcp_init_cwnd(tp, dst);
953 tp->snd_cwnd_stamp = tcp_time_stamp;
954}
955
956static void tcp_update_reordering(struct sock *sk, const int metric,
957 const int ts)
958{
959 struct tcp_sock *tp = tcp_sk(sk);
960 if (metric > tp->reordering) {
961 int mib_idx;
962
963 tp->reordering = min(TCP_MAX_REORDERING, metric);
964
965
966 if (ts)
967 mib_idx = LINUX_MIB_TCPTSREORDER;
968 else if (tcp_is_reno(tp))
969 mib_idx = LINUX_MIB_TCPRENOREORDER;
970 else if (tcp_is_fack(tp))
971 mib_idx = LINUX_MIB_TCPFACKREORDER;
972 else
973 mib_idx = LINUX_MIB_TCPSACKREORDER;
974
975 NET_INC_STATS_BH(sock_net(sk), mib_idx);
976#if FASTRETRANS_DEBUG > 1
977 printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
978 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
979 tp->reordering,
980 tp->fackets_out,
981 tp->sacked_out,
982 tp->undo_marker ? tp->undo_retrans : 0);
983#endif
984 tcp_disable_fack(tp);
985 }
986}
987
988
989static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
990{
991 if ((tp->retransmit_skb_hint == NULL) ||
992 before(TCP_SKB_CB(skb)->seq,
993 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
994 tp->retransmit_skb_hint = skb;
995
996 if (!tp->lost_out ||
997 after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high))
998 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
999}
1000
1001static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
1002{
1003 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
1004 tcp_verify_retransmit_hint(tp, skb);
1005
1006 tp->lost_out += tcp_skb_pcount(skb);
1007 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1008 }
1009}
1010
1011static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
1012 struct sk_buff *skb)
1013{
1014 tcp_verify_retransmit_hint(tp, skb);
1015
1016 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
1017 tp->lost_out += tcp_skb_pcount(skb);
1018 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1019 }
1020}
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
1119 u32 start_seq, u32 end_seq)
1120{
1121
1122 if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
1123 return 0;
1124
1125
1126 if (!before(start_seq, tp->snd_nxt))
1127 return 0;
1128
1129
1130
1131
1132 if (after(start_seq, tp->snd_una))
1133 return 1;
1134
1135 if (!is_dsack || !tp->undo_marker)
1136 return 0;
1137
1138
1139 if (after(end_seq, tp->snd_una))
1140 return 0;
1141
1142 if (!before(start_seq, tp->undo_marker))
1143 return 1;
1144
1145
1146 if (!after(end_seq, tp->undo_marker))
1147 return 0;
1148
1149
1150
1151
1152 return !before(start_seq, end_seq - tp->max_window);
1153}
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164static void tcp_mark_lost_retrans(struct sock *sk)
1165{
1166 const struct inet_connection_sock *icsk = inet_csk(sk);
1167 struct tcp_sock *tp = tcp_sk(sk);
1168 struct sk_buff *skb;
1169 int cnt = 0;
1170 u32 new_low_seq = tp->snd_nxt;
1171 u32 received_upto = tcp_highest_sack_seq(tp);
1172
1173 if (!tcp_is_fack(tp) || !tp->retrans_out ||
1174 !after(received_upto, tp->lost_retrans_low) ||
1175 icsk->icsk_ca_state != TCP_CA_Recovery)
1176 return;
1177
1178 tcp_for_write_queue(skb, sk) {
1179 u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
1180
1181 if (skb == tcp_send_head(sk))
1182 break;
1183 if (cnt == tp->retrans_out)
1184 break;
1185 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1186 continue;
1187
1188 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS))
1189 continue;
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202 if (after(received_upto, ack_seq)) {
1203 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1204 tp->retrans_out -= tcp_skb_pcount(skb);
1205
1206 tcp_skb_mark_lost_uncond_verify(tp, skb);
1207 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
1208 } else {
1209 if (before(ack_seq, new_low_seq))
1210 new_low_seq = ack_seq;
1211 cnt += tcp_skb_pcount(skb);
1212 }
1213 }
1214
1215 if (tp->retrans_out)
1216 tp->lost_retrans_low = new_low_seq;
1217}
1218
1219static int tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1220 struct tcp_sack_block_wire *sp, int num_sacks,
1221 u32 prior_snd_una)
1222{
1223 struct tcp_sock *tp = tcp_sk(sk);
1224 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
1225 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
1226 int dup_sack = 0;
1227
1228 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1229 dup_sack = 1;
1230 tcp_dsack_seen(tp);
1231 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
1232 } else if (num_sacks > 1) {
1233 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
1234 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
1235
1236 if (!after(end_seq_0, end_seq_1) &&
1237 !before(start_seq_0, start_seq_1)) {
1238 dup_sack = 1;
1239 tcp_dsack_seen(tp);
1240 NET_INC_STATS_BH(sock_net(sk),
1241 LINUX_MIB_TCPDSACKOFORECV);
1242 }
1243 }
1244
1245
1246 if (dup_sack && tp->undo_marker && tp->undo_retrans &&
1247 !after(end_seq_0, prior_snd_una) &&
1248 after(end_seq_0, tp->undo_marker))
1249 tp->undo_retrans--;
1250
1251 return dup_sack;
1252}
1253
1254struct tcp_sacktag_state {
1255 int reord;
1256 int fack_count;
1257 int flag;
1258};
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1269 u32 start_seq, u32 end_seq)
1270{
1271 int in_sack, err;
1272 unsigned int pkt_len;
1273 unsigned int mss;
1274
1275 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1276 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1277
1278 if (tcp_skb_pcount(skb) > 1 && !in_sack &&
1279 after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
1280 mss = tcp_skb_mss(skb);
1281 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1282
1283 if (!in_sack) {
1284 pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
1285 if (pkt_len < mss)
1286 pkt_len = mss;
1287 } else {
1288 pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
1289 if (pkt_len < mss)
1290 return -EINVAL;
1291 }
1292
1293
1294
1295
1296 if (pkt_len > mss) {
1297 unsigned int new_len = (pkt_len / mss) * mss;
1298 if (!in_sack && new_len < pkt_len) {
1299 new_len += mss;
1300 if (new_len > skb->len)
1301 return 0;
1302 }
1303 pkt_len = new_len;
1304 }
1305 err = tcp_fragment(sk, skb, pkt_len, mss);
1306 if (err < 0)
1307 return err;
1308 }
1309
1310 return in_sack;
1311}
1312
1313static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk,
1314 struct tcp_sacktag_state *state,
1315 int dup_sack, int pcount)
1316{
1317 struct tcp_sock *tp = tcp_sk(sk);
1318 u8 sacked = TCP_SKB_CB(skb)->sacked;
1319 int fack_count = state->fack_count;
1320
1321
1322 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1323 if (tp->undo_marker && tp->undo_retrans &&
1324 after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
1325 tp->undo_retrans--;
1326 if (sacked & TCPCB_SACKED_ACKED)
1327 state->reord = min(fack_count, state->reord);
1328 }
1329
1330
1331 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1332 return sacked;
1333
1334 if (!(sacked & TCPCB_SACKED_ACKED)) {
1335 if (sacked & TCPCB_SACKED_RETRANS) {
1336
1337
1338
1339
1340 if (sacked & TCPCB_LOST) {
1341 sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1342 tp->lost_out -= pcount;
1343 tp->retrans_out -= pcount;
1344 }
1345 } else {
1346 if (!(sacked & TCPCB_RETRANS)) {
1347
1348
1349
1350 if (before(TCP_SKB_CB(skb)->seq,
1351 tcp_highest_sack_seq(tp)))
1352 state->reord = min(fack_count,
1353 state->reord);
1354
1355
1356 if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))
1357 state->flag |= FLAG_ONLY_ORIG_SACKED;
1358 }
1359
1360 if (sacked & TCPCB_LOST) {
1361 sacked &= ~TCPCB_LOST;
1362 tp->lost_out -= pcount;
1363 }
1364 }
1365
1366 sacked |= TCPCB_SACKED_ACKED;
1367 state->flag |= FLAG_DATA_SACKED;
1368 tp->sacked_out += pcount;
1369
1370 fack_count += pcount;
1371
1372
1373 if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
1374 before(TCP_SKB_CB(skb)->seq,
1375 TCP_SKB_CB(tp->lost_skb_hint)->seq))
1376 tp->lost_cnt_hint += pcount;
1377
1378 if (fack_count > tp->fackets_out)
1379 tp->fackets_out = fack_count;
1380 }
1381
1382
1383
1384
1385
1386 if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
1387 sacked &= ~TCPCB_SACKED_RETRANS;
1388 tp->retrans_out -= pcount;
1389 }
1390
1391 return sacked;
1392}
1393
1394static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1395 struct tcp_sacktag_state *state,
1396 unsigned int pcount, int shifted, int mss,
1397 int dup_sack)
1398{
1399 struct tcp_sock *tp = tcp_sk(sk);
1400 struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
1401
1402 BUG_ON(!pcount);
1403
1404 if (skb == tp->lost_skb_hint)
1405 tp->lost_cnt_hint += pcount;
1406
1407 TCP_SKB_CB(prev)->end_seq += shifted;
1408 TCP_SKB_CB(skb)->seq += shifted;
1409
1410 skb_shinfo(prev)->gso_segs += pcount;
1411 BUG_ON(skb_shinfo(skb)->gso_segs < pcount);
1412 skb_shinfo(skb)->gso_segs -= pcount;
1413
1414
1415
1416
1417
1418
1419 if (!skb_shinfo(prev)->gso_size) {
1420 skb_shinfo(prev)->gso_size = mss;
1421 skb_shinfo(prev)->gso_type = sk->sk_gso_type;
1422 }
1423
1424
1425 if (skb_shinfo(skb)->gso_segs <= 1) {
1426 skb_shinfo(skb)->gso_size = 0;
1427 skb_shinfo(skb)->gso_type = 0;
1428 }
1429
1430
1431 tcp_sacktag_one(skb, sk, state, dup_sack, pcount);
1432
1433
1434 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
1435
1436 if (skb->len > 0) {
1437 BUG_ON(!tcp_skb_pcount(skb));
1438 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED);
1439 return 0;
1440 }
1441
1442
1443
1444 if (skb == tp->retransmit_skb_hint)
1445 tp->retransmit_skb_hint = prev;
1446 if (skb == tp->scoreboard_skb_hint)
1447 tp->scoreboard_skb_hint = prev;
1448 if (skb == tp->lost_skb_hint) {
1449 tp->lost_skb_hint = prev;
1450 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1451 }
1452
1453 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags;
1454 if (skb == tcp_highest_sack(sk))
1455 tcp_advance_highest_sack(sk, skb);
1456
1457 tcp_unlink_write_queue(skb, sk);
1458 sk_wmem_free_skb(sk, skb);
1459
1460 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED);
1461
1462 return 1;
1463}
1464
1465
1466
1467
1468static int tcp_skb_seglen(const struct sk_buff *skb)
1469{
1470 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
1471}
1472
1473
1474static int skb_can_shift(const struct sk_buff *skb)
1475{
1476 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1477}
1478
1479
1480
1481
1482static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1483 struct tcp_sacktag_state *state,
1484 u32 start_seq, u32 end_seq,
1485 int dup_sack)
1486{
1487 struct tcp_sock *tp = tcp_sk(sk);
1488 struct sk_buff *prev;
1489 int mss;
1490 int pcount = 0;
1491 int len;
1492 int in_sack;
1493
1494 if (!sk_can_gso(sk))
1495 goto fallback;
1496
1497
1498 if (!dup_sack &&
1499 (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
1500 goto fallback;
1501 if (!skb_can_shift(skb))
1502 goto fallback;
1503
1504 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1505 goto fallback;
1506
1507
1508 if (unlikely(skb == tcp_write_queue_head(sk)))
1509 goto fallback;
1510 prev = tcp_write_queue_prev(sk, skb);
1511
1512 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1513 goto fallback;
1514
1515 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1516 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1517
1518 if (in_sack) {
1519 len = skb->len;
1520 pcount = tcp_skb_pcount(skb);
1521 mss = tcp_skb_seglen(skb);
1522
1523
1524
1525
1526 if (mss != tcp_skb_seglen(prev))
1527 goto fallback;
1528 } else {
1529 if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
1530 goto noop;
1531
1532
1533
1534
1535 if (tcp_skb_pcount(skb) <= 1)
1536 goto noop;
1537
1538 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1539 if (!in_sack) {
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551 goto fallback;
1552 }
1553
1554 len = end_seq - TCP_SKB_CB(skb)->seq;
1555 BUG_ON(len < 0);
1556 BUG_ON(len > skb->len);
1557
1558
1559
1560
1561
1562 mss = tcp_skb_mss(skb);
1563
1564
1565
1566
1567 if (mss != tcp_skb_seglen(prev))
1568 goto fallback;
1569
1570 if (len == mss) {
1571 pcount = 1;
1572 } else if (len < mss) {
1573 goto noop;
1574 } else {
1575 pcount = len / mss;
1576 len = pcount * mss;
1577 }
1578 }
1579
1580 if (!skb_shift(prev, skb, len))
1581 goto fallback;
1582 if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
1583 goto out;
1584
1585
1586
1587
1588 if (prev == tcp_write_queue_tail(sk))
1589 goto out;
1590 skb = tcp_write_queue_next(sk, prev);
1591
1592 if (!skb_can_shift(skb) ||
1593 (skb == tcp_send_head(sk)) ||
1594 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1595 (mss != tcp_skb_seglen(skb)))
1596 goto out;
1597
1598 len = skb->len;
1599 if (skb_shift(prev, skb, len)) {
1600 pcount += tcp_skb_pcount(skb);
1601 tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
1602 }
1603
1604out:
1605 state->fack_count += pcount;
1606 return prev;
1607
1608noop:
1609 return skb;
1610
1611fallback:
1612 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
1613 return NULL;
1614}
1615
1616static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1617 struct tcp_sack_block *next_dup,
1618 struct tcp_sacktag_state *state,
1619 u32 start_seq, u32 end_seq,
1620 int dup_sack_in)
1621{
1622 struct tcp_sock *tp = tcp_sk(sk);
1623 struct sk_buff *tmp;
1624
1625 tcp_for_write_queue_from(skb, sk) {
1626 int in_sack = 0;
1627 int dup_sack = dup_sack_in;
1628
1629 if (skb == tcp_send_head(sk))
1630 break;
1631
1632
1633 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1634 break;
1635
1636 if ((next_dup != NULL) &&
1637 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
1638 in_sack = tcp_match_skb_to_sack(sk, skb,
1639 next_dup->start_seq,
1640 next_dup->end_seq);
1641 if (in_sack > 0)
1642 dup_sack = 1;
1643 }
1644
1645
1646
1647
1648
1649 if (in_sack <= 0) {
1650 tmp = tcp_shift_skb_data(sk, skb, state,
1651 start_seq, end_seq, dup_sack);
1652 if (tmp != NULL) {
1653 if (tmp != skb) {
1654 skb = tmp;
1655 continue;
1656 }
1657
1658 in_sack = 0;
1659 } else {
1660 in_sack = tcp_match_skb_to_sack(sk, skb,
1661 start_seq,
1662 end_seq);
1663 }
1664 }
1665
1666 if (unlikely(in_sack < 0))
1667 break;
1668
1669 if (in_sack) {
1670 TCP_SKB_CB(skb)->sacked = tcp_sacktag_one(skb, sk,
1671 state,
1672 dup_sack,
1673 tcp_skb_pcount(skb));
1674
1675 if (!before(TCP_SKB_CB(skb)->seq,
1676 tcp_highest_sack_seq(tp)))
1677 tcp_advance_highest_sack(sk, skb);
1678 }
1679
1680 state->fack_count += tcp_skb_pcount(skb);
1681 }
1682 return skb;
1683}
1684
1685
1686
1687
1688static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1689 struct tcp_sacktag_state *state,
1690 u32 skip_to_seq)
1691{
1692 tcp_for_write_queue_from(skb, sk) {
1693 if (skb == tcp_send_head(sk))
1694 break;
1695
1696 if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
1697 break;
1698
1699 state->fack_count += tcp_skb_pcount(skb);
1700 }
1701 return skb;
1702}
1703
1704static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1705 struct sock *sk,
1706 struct tcp_sack_block *next_dup,
1707 struct tcp_sacktag_state *state,
1708 u32 skip_to_seq)
1709{
1710 if (next_dup == NULL)
1711 return skb;
1712
1713 if (before(next_dup->start_seq, skip_to_seq)) {
1714 skb = tcp_sacktag_skip(skb, sk, state, next_dup->start_seq);
1715 skb = tcp_sacktag_walk(skb, sk, NULL, state,
1716 next_dup->start_seq, next_dup->end_seq,
1717 1);
1718 }
1719
1720 return skb;
1721}
1722
1723static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
1724{
1725 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1726}
1727
1728static int
1729tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1730 u32 prior_snd_una)
1731{
1732 const struct inet_connection_sock *icsk = inet_csk(sk);
1733 struct tcp_sock *tp = tcp_sk(sk);
1734 const unsigned char *ptr = (skb_transport_header(ack_skb) +
1735 TCP_SKB_CB(ack_skb)->sacked);
1736 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1737 struct tcp_sack_block sp[TCP_NUM_SACKS];
1738 struct tcp_sack_block *cache;
1739 struct tcp_sacktag_state state;
1740 struct sk_buff *skb;
1741 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1742 int used_sacks;
1743 int found_dup_sack = 0;
1744 int i, j;
1745 int first_sack_index;
1746
1747 state.flag = 0;
1748 state.reord = tp->packets_out;
1749
1750 if (!tp->sacked_out) {
1751 if (WARN_ON(tp->fackets_out))
1752 tp->fackets_out = 0;
1753 tcp_highest_sack_reset(sk);
1754 }
1755
1756 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1757 num_sacks, prior_snd_una);
1758 if (found_dup_sack)
1759 state.flag |= FLAG_DSACKING_ACK;
1760
1761
1762
1763
1764
1765 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
1766 return 0;
1767
1768 if (!tp->packets_out)
1769 goto out;
1770
1771 used_sacks = 0;
1772 first_sack_index = 0;
1773 for (i = 0; i < num_sacks; i++) {
1774 int dup_sack = !i && found_dup_sack;
1775
1776 sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
1777 sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
1778
1779 if (!tcp_is_sackblock_valid(tp, dup_sack,
1780 sp[used_sacks].start_seq,
1781 sp[used_sacks].end_seq)) {
1782 int mib_idx;
1783
1784 if (dup_sack) {
1785 if (!tp->undo_marker)
1786 mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
1787 else
1788 mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
1789 } else {
1790
1791 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1792 !after(sp[used_sacks].end_seq, tp->snd_una))
1793 continue;
1794 mib_idx = LINUX_MIB_TCPSACKDISCARD;
1795 }
1796
1797 NET_INC_STATS_BH(sock_net(sk), mib_idx);
1798 if (i == 0)
1799 first_sack_index = -1;
1800 continue;
1801 }
1802
1803
1804 if (!after(sp[used_sacks].end_seq, prior_snd_una))
1805 continue;
1806
1807 used_sacks++;
1808 }
1809
1810
1811 for (i = used_sacks - 1; i > 0; i--) {
1812 for (j = 0; j < i; j++) {
1813 if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
1814 swap(sp[j], sp[j + 1]);
1815
1816
1817 if (j == first_sack_index)
1818 first_sack_index = j + 1;
1819 }
1820 }
1821 }
1822
1823 skb = tcp_write_queue_head(sk);
1824 state.fack_count = 0;
1825 i = 0;
1826
1827 if (!tp->sacked_out) {
1828
1829 cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1830 } else {
1831 cache = tp->recv_sack_cache;
1832
1833 while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
1834 !cache->end_seq)
1835 cache++;
1836 }
1837
1838 while (i < used_sacks) {
1839 u32 start_seq = sp[i].start_seq;
1840 u32 end_seq = sp[i].end_seq;
1841 int dup_sack = (found_dup_sack && (i == first_sack_index));
1842 struct tcp_sack_block *next_dup = NULL;
1843
1844 if (found_dup_sack && ((i + 1) == first_sack_index))
1845 next_dup = &sp[i + 1];
1846
1847
1848 if (after(end_seq, tp->high_seq))
1849 state.flag |= FLAG_DATA_LOST;
1850
1851
1852 while (tcp_sack_cache_ok(tp, cache) &&
1853 !before(start_seq, cache->end_seq))
1854 cache++;
1855
1856
1857 if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
1858 after(end_seq, cache->start_seq)) {
1859
1860
1861 if (before(start_seq, cache->start_seq)) {
1862 skb = tcp_sacktag_skip(skb, sk, &state,
1863 start_seq);
1864 skb = tcp_sacktag_walk(skb, sk, next_dup,
1865 &state,
1866 start_seq,
1867 cache->start_seq,
1868 dup_sack);
1869 }
1870
1871
1872 if (!after(end_seq, cache->end_seq))
1873 goto advance_sp;
1874
1875 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1876 &state,
1877 cache->end_seq);
1878
1879
1880 if (tcp_highest_sack_seq(tp) == cache->end_seq) {
1881
1882 skb = tcp_highest_sack(sk);
1883 if (skb == NULL)
1884 break;
1885 state.fack_count = tp->fackets_out;
1886 cache++;
1887 goto walk;
1888 }
1889
1890 skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq);
1891
1892 cache++;
1893 continue;
1894 }
1895
1896 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
1897 skb = tcp_highest_sack(sk);
1898 if (skb == NULL)
1899 break;
1900 state.fack_count = tp->fackets_out;
1901 }
1902 skb = tcp_sacktag_skip(skb, sk, &state, start_seq);
1903
1904walk:
1905 skb = tcp_sacktag_walk(skb, sk, next_dup, &state,
1906 start_seq, end_seq, dup_sack);
1907
1908advance_sp:
1909
1910
1911
1912 if (after(end_seq, tp->frto_highmark))
1913 state.flag &= ~FLAG_ONLY_ORIG_SACKED;
1914
1915 i++;
1916 }
1917
1918
1919 for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
1920 tp->recv_sack_cache[i].start_seq = 0;
1921 tp->recv_sack_cache[i].end_seq = 0;
1922 }
1923 for (j = 0; j < used_sacks; j++)
1924 tp->recv_sack_cache[i++] = sp[j];
1925
1926 tcp_mark_lost_retrans(sk);
1927
1928 tcp_verify_left_out(tp);
1929
1930 if ((state.reord < tp->fackets_out) &&
1931 ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) &&
1932 (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
1933 tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
1934
1935out:
1936
1937#if FASTRETRANS_DEBUG > 0
1938 WARN_ON((int)tp->sacked_out < 0);
1939 WARN_ON((int)tp->lost_out < 0);
1940 WARN_ON((int)tp->retrans_out < 0);
1941 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1942#endif
1943 return state.flag;
1944}
1945
1946
1947
1948
1949static int tcp_limit_reno_sacked(struct tcp_sock *tp)
1950{
1951 u32 holes;
1952
1953 holes = max(tp->lost_out, 1U);
1954 holes = min(holes, tp->packets_out);
1955
1956 if ((tp->sacked_out + holes) > tp->packets_out) {
1957 tp->sacked_out = tp->packets_out - holes;
1958 return 1;
1959 }
1960 return 0;
1961}
1962
1963
1964
1965
1966
1967static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1968{
1969 struct tcp_sock *tp = tcp_sk(sk);
1970 if (tcp_limit_reno_sacked(tp))
1971 tcp_update_reordering(sk, tp->packets_out + addend, 0);
1972}
1973
1974
1975
1976static void tcp_add_reno_sack(struct sock *sk)
1977{
1978 struct tcp_sock *tp = tcp_sk(sk);
1979 tp->sacked_out++;
1980 tcp_check_reno_reordering(sk, 0);
1981 tcp_verify_left_out(tp);
1982}
1983
1984
1985
1986static void tcp_remove_reno_sacks(struct sock *sk, int acked)
1987{
1988 struct tcp_sock *tp = tcp_sk(sk);
1989
1990 if (acked > 0) {
1991
1992 if (acked - 1 >= tp->sacked_out)
1993 tp->sacked_out = 0;
1994 else
1995 tp->sacked_out -= acked - 1;
1996 }
1997 tcp_check_reno_reordering(sk, acked);
1998 tcp_verify_left_out(tp);
1999}
2000
2001static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
2002{
2003 tp->sacked_out = 0;
2004}
2005
2006static int tcp_is_sackfrto(const struct tcp_sock *tp)
2007{
2008 return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp);
2009}
2010
2011
2012
2013
2014int tcp_use_frto(struct sock *sk)
2015{
2016 const struct tcp_sock *tp = tcp_sk(sk);
2017 const struct inet_connection_sock *icsk = inet_csk(sk);
2018 struct sk_buff *skb;
2019
2020 if (!sysctl_tcp_frto)
2021 return 0;
2022
2023
2024 if (icsk->icsk_mtup.probe_size)
2025 return 0;
2026
2027 if (tcp_is_sackfrto(tp))
2028 return 1;
2029
2030
2031 if (tp->retrans_out > 1)
2032 return 0;
2033
2034 skb = tcp_write_queue_head(sk);
2035 if (tcp_skb_is_last(sk, skb))
2036 return 1;
2037 skb = tcp_write_queue_next(sk, skb);
2038 tcp_for_write_queue_from(skb, sk) {
2039 if (skb == tcp_send_head(sk))
2040 break;
2041 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2042 return 0;
2043
2044 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2045 break;
2046 }
2047 return 1;
2048}
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062void tcp_enter_frto(struct sock *sk)
2063{
2064 const struct inet_connection_sock *icsk = inet_csk(sk);
2065 struct tcp_sock *tp = tcp_sk(sk);
2066 struct sk_buff *skb;
2067
2068 if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) ||
2069 tp->snd_una == tp->high_seq ||
2070 ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&
2071 !icsk->icsk_retransmits)) {
2072 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082 if (tp->frto_counter) {
2083 u32 stored_cwnd;
2084 stored_cwnd = tp->snd_cwnd;
2085 tp->snd_cwnd = 2;
2086 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2087 tp->snd_cwnd = stored_cwnd;
2088 } else {
2089 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2090 }
2091
2092
2093
2094
2095
2096
2097
2098 tcp_ca_event(sk, CA_EVENT_FRTO);
2099 }
2100
2101 tp->undo_marker = tp->snd_una;
2102 tp->undo_retrans = 0;
2103
2104 skb = tcp_write_queue_head(sk);
2105 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2106 tp->undo_marker = 0;
2107 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2108 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2109 tp->retrans_out -= tcp_skb_pcount(skb);
2110 }
2111 tcp_verify_left_out(tp);
2112
2113
2114 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
2115
2116
2117
2118
2119 if (tcp_is_sackfrto(tp) && (tp->frto_counter ||
2120 ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
2121 after(tp->high_seq, tp->snd_una)) {
2122 tp->frto_highmark = tp->high_seq;
2123 } else {
2124 tp->frto_highmark = tp->snd_nxt;
2125 }
2126 tcp_set_ca_state(sk, TCP_CA_Disorder);
2127 tp->high_seq = tp->snd_nxt;
2128 tp->frto_counter = 1;
2129}
2130
2131
2132
2133
2134
2135static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
2136{
2137 struct tcp_sock *tp = tcp_sk(sk);
2138 struct sk_buff *skb;
2139
2140 tp->lost_out = 0;
2141 tp->retrans_out = 0;
2142 if (tcp_is_reno(tp))
2143 tcp_reset_reno_sack(tp);
2144
2145 tcp_for_write_queue(skb, sk) {
2146 if (skb == tcp_send_head(sk))
2147 break;
2148
2149 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2150
2151
2152
2153
2154 if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) {
2155
2156 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
2157 tp->retrans_out += tcp_skb_pcount(skb);
2158
2159 flag |= FLAG_DATA_ACKED;
2160 } else {
2161 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2162 tp->undo_marker = 0;
2163 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2164 }
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2176 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
2177 tp->lost_out += tcp_skb_pcount(skb);
2178 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
2179 }
2180 }
2181 tcp_verify_left_out(tp);
2182
2183 tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
2184 tp->snd_cwnd_cnt = 0;
2185 tp->snd_cwnd_stamp = tcp_time_stamp;
2186 tp->frto_counter = 0;
2187 tp->bytes_acked = 0;
2188
2189 tp->reordering = min_t(unsigned int, tp->reordering,
2190 sysctl_tcp_reordering);
2191 tcp_set_ca_state(sk, TCP_CA_Loss);
2192 tp->high_seq = tp->snd_nxt;
2193 TCP_ECN_queue_cwr(tp);
2194
2195 tcp_clear_all_retrans_hints(tp);
2196}
2197
2198static void tcp_clear_retrans_partial(struct tcp_sock *tp)
2199{
2200 tp->retrans_out = 0;
2201 tp->lost_out = 0;
2202
2203 tp->undo_marker = 0;
2204 tp->undo_retrans = 0;
2205}
2206
2207void tcp_clear_retrans(struct tcp_sock *tp)
2208{
2209 tcp_clear_retrans_partial(tp);
2210
2211 tp->fackets_out = 0;
2212 tp->sacked_out = 0;
2213}
2214
2215
2216
2217
2218
2219void tcp_enter_loss(struct sock *sk, int how)
2220{
2221 const struct inet_connection_sock *icsk = inet_csk(sk);
2222 struct tcp_sock *tp = tcp_sk(sk);
2223 struct sk_buff *skb;
2224
2225
2226 if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq ||
2227 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
2228 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2229 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2230 tcp_ca_event(sk, CA_EVENT_LOSS);
2231 }
2232 tp->snd_cwnd = 1;
2233 tp->snd_cwnd_cnt = 0;
2234 tp->snd_cwnd_stamp = tcp_time_stamp;
2235
2236 tp->bytes_acked = 0;
2237 tcp_clear_retrans_partial(tp);
2238
2239 if (tcp_is_reno(tp))
2240 tcp_reset_reno_sack(tp);
2241
2242 if (!how) {
2243
2244
2245 tp->undo_marker = tp->snd_una;
2246 } else {
2247 tp->sacked_out = 0;
2248 tp->fackets_out = 0;
2249 }
2250 tcp_clear_all_retrans_hints(tp);
2251
2252 tcp_for_write_queue(skb, sk) {
2253 if (skb == tcp_send_head(sk))
2254 break;
2255
2256 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
2257 tp->undo_marker = 0;
2258 TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
2259 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
2260 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
2261 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
2262 tp->lost_out += tcp_skb_pcount(skb);
2263 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
2264 }
2265 }
2266 tcp_verify_left_out(tp);
2267
2268 tp->reordering = min_t(unsigned int, tp->reordering,
2269 sysctl_tcp_reordering);
2270 tcp_set_ca_state(sk, TCP_CA_Loss);
2271 tp->high_seq = tp->snd_nxt;
2272 TCP_ECN_queue_cwr(tp);
2273
2274 tp->frto_counter = 0;
2275}
2276
2277
2278
2279
2280
2281
2282
2283static int tcp_check_sack_reneging(struct sock *sk, int flag)
2284{
2285 if (flag & FLAG_SACK_RENEGING) {
2286 struct inet_connection_sock *icsk = inet_csk(sk);
2287 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
2288
2289 tcp_enter_loss(sk, 1);
2290 icsk->icsk_retransmits++;
2291 tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
2292 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2293 icsk->icsk_rto, TCP_RTO_MAX);
2294 return 1;
2295 }
2296 return 0;
2297}
2298
2299static inline int tcp_fackets_out(const struct tcp_sock *tp)
2300{
2301 return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
2302}
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
2320{
2321 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
2322}
2323
2324static inline int tcp_skb_timedout(const struct sock *sk,
2325 const struct sk_buff *skb)
2326{
2327 return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto;
2328}
2329
2330static inline int tcp_head_timedout(const struct sock *sk)
2331{
2332 const struct tcp_sock *tp = tcp_sk(sk);
2333
2334 return tp->packets_out &&
2335 tcp_skb_timedout(sk, tcp_write_queue_head(sk));
2336}
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431static int tcp_time_to_recover(struct sock *sk)
2432{
2433 struct tcp_sock *tp = tcp_sk(sk);
2434 __u32 packets_out;
2435
2436
2437 if (tp->frto_counter)
2438 return 0;
2439
2440
2441 if (tp->lost_out)
2442 return 1;
2443
2444
2445 if (tcp_dupack_heuristics(tp) > tp->reordering)
2446 return 1;
2447
2448
2449
2450
2451 if (tcp_is_fack(tp) && tcp_head_timedout(sk))
2452 return 1;
2453
2454
2455
2456
2457 packets_out = tp->packets_out;
2458 if (packets_out <= tp->reordering &&
2459 tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
2460 !tcp_may_send_now(sk)) {
2461
2462
2463
2464 return 1;
2465 }
2466
2467
2468
2469
2470
2471
2472 if ((tp->thin_dupack || sysctl_tcp_thin_dupack) &&
2473 tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 &&
2474 tcp_is_sack(tp) && !tcp_send_head(sk))
2475 return 1;
2476
2477 return 0;
2478}
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492static void tcp_timeout_skbs(struct sock *sk)
2493{
2494 struct tcp_sock *tp = tcp_sk(sk);
2495 struct sk_buff *skb;
2496
2497 if (!tcp_is_fack(tp) || !tcp_head_timedout(sk))
2498 return;
2499
2500 skb = tp->scoreboard_skb_hint;
2501 if (tp->scoreboard_skb_hint == NULL)
2502 skb = tcp_write_queue_head(sk);
2503
2504 tcp_for_write_queue_from(skb, sk) {
2505 if (skb == tcp_send_head(sk))
2506 break;
2507 if (!tcp_skb_timedout(sk, skb))
2508 break;
2509
2510 tcp_skb_mark_lost(tp, skb);
2511 }
2512
2513 tp->scoreboard_skb_hint = skb;
2514
2515 tcp_verify_left_out(tp);
2516}
2517
2518
2519
2520
2521static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2522{
2523 struct tcp_sock *tp = tcp_sk(sk);
2524 struct sk_buff *skb;
2525 int cnt, oldcnt;
2526 int err;
2527 unsigned int mss;
2528
2529 WARN_ON(packets > tp->packets_out);
2530 if (tp->lost_skb_hint) {
2531 skb = tp->lost_skb_hint;
2532 cnt = tp->lost_cnt_hint;
2533
2534 if (mark_head && skb != tcp_write_queue_head(sk))
2535 return;
2536 } else {
2537 skb = tcp_write_queue_head(sk);
2538 cnt = 0;
2539 }
2540
2541 tcp_for_write_queue_from(skb, sk) {
2542 if (skb == tcp_send_head(sk))
2543 break;
2544
2545
2546 tp->lost_skb_hint = skb;
2547 tp->lost_cnt_hint = cnt;
2548
2549 if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
2550 break;
2551
2552 oldcnt = cnt;
2553 if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
2554 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2555 cnt += tcp_skb_pcount(skb);
2556
2557 if (cnt > packets) {
2558 if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
2559 (oldcnt >= packets))
2560 break;
2561
2562 mss = skb_shinfo(skb)->gso_size;
2563 err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss);
2564 if (err < 0)
2565 break;
2566 cnt = packets;
2567 }
2568
2569 tcp_skb_mark_lost(tp, skb);
2570
2571 if (mark_head)
2572 break;
2573 }
2574 tcp_verify_left_out(tp);
2575}
2576
2577
2578
2579static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2580{
2581 struct tcp_sock *tp = tcp_sk(sk);
2582
2583 if (tcp_is_reno(tp)) {
2584 tcp_mark_head_lost(sk, 1, 1);
2585 } else if (tcp_is_fack(tp)) {
2586 int lost = tp->fackets_out - tp->reordering;
2587 if (lost <= 0)
2588 lost = 1;
2589 tcp_mark_head_lost(sk, lost, 0);
2590 } else {
2591 int sacked_upto = tp->sacked_out - tp->reordering;
2592 if (sacked_upto >= 0)
2593 tcp_mark_head_lost(sk, sacked_upto, 0);
2594 else if (fast_rexmit)
2595 tcp_mark_head_lost(sk, 1, 1);
2596 }
2597
2598 tcp_timeout_skbs(sk);
2599}
2600
2601
2602
2603
2604static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
2605{
2606 tp->snd_cwnd = min(tp->snd_cwnd,
2607 tcp_packets_in_flight(tp) + tcp_max_burst(tp));
2608 tp->snd_cwnd_stamp = tcp_time_stamp;
2609}
2610
2611
2612
2613
2614static inline u32 tcp_cwnd_min(const struct sock *sk)
2615{
2616 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
2617
2618 return ca_ops->min_cwnd ? ca_ops->min_cwnd(sk) : tcp_sk(sk)->snd_ssthresh;
2619}
2620
2621
2622static void tcp_cwnd_down(struct sock *sk, int flag)
2623{
2624 struct tcp_sock *tp = tcp_sk(sk);
2625 int decr = tp->snd_cwnd_cnt + 1;
2626
2627 if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) ||
2628 (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) {
2629 tp->snd_cwnd_cnt = decr & 1;
2630 decr >>= 1;
2631
2632 if (decr && tp->snd_cwnd > tcp_cwnd_min(sk))
2633 tp->snd_cwnd -= decr;
2634
2635 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
2636 tp->snd_cwnd_stamp = tcp_time_stamp;
2637 }
2638}
2639
2640
2641
2642
2643static inline int tcp_packet_delayed(const struct tcp_sock *tp)
2644{
2645 return !tp->retrans_stamp ||
2646 (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2647 before(tp->rx_opt.rcv_tsecr, tp->retrans_stamp));
2648}
2649
2650
2651
2652#if FASTRETRANS_DEBUG > 1
2653static void DBGUNDO(struct sock *sk, const char *msg)
2654{
2655 struct tcp_sock *tp = tcp_sk(sk);
2656 struct inet_sock *inet = inet_sk(sk);
2657
2658 if (sk->sk_family == AF_INET) {
2659 printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2660 msg,
2661 &inet->inet_daddr, ntohs(inet->inet_dport),
2662 tp->snd_cwnd, tcp_left_out(tp),
2663 tp->snd_ssthresh, tp->prior_ssthresh,
2664 tp->packets_out);
2665 }
2666#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
2667 else if (sk->sk_family == AF_INET6) {
2668 struct ipv6_pinfo *np = inet6_sk(sk);
2669 printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2670 msg,
2671 &np->daddr, ntohs(inet->inet_dport),
2672 tp->snd_cwnd, tcp_left_out(tp),
2673 tp->snd_ssthresh, tp->prior_ssthresh,
2674 tp->packets_out);
2675 }
2676#endif
2677}
2678#else
2679#define DBGUNDO(x...) do { } while (0)
2680#endif
2681
2682static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
2683{
2684 struct tcp_sock *tp = tcp_sk(sk);
2685
2686 if (tp->prior_ssthresh) {
2687 const struct inet_connection_sock *icsk = inet_csk(sk);
2688
2689 if (icsk->icsk_ca_ops->undo_cwnd)
2690 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
2691 else
2692 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
2693
2694 if (undo_ssthresh && tp->prior_ssthresh > tp->snd_ssthresh) {
2695 tp->snd_ssthresh = tp->prior_ssthresh;
2696 TCP_ECN_withdraw_cwr(tp);
2697 }
2698 } else {
2699 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
2700 }
2701 tp->snd_cwnd_stamp = tcp_time_stamp;
2702}
2703
2704static inline int tcp_may_undo(const struct tcp_sock *tp)
2705{
2706 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2707}
2708
2709
2710static int tcp_try_undo_recovery(struct sock *sk)
2711{
2712 struct tcp_sock *tp = tcp_sk(sk);
2713
2714 if (tcp_may_undo(tp)) {
2715 int mib_idx;
2716
2717
2718
2719
2720 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2721 tcp_undo_cwr(sk, true);
2722 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2723 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2724 else
2725 mib_idx = LINUX_MIB_TCPFULLUNDO;
2726
2727 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2728 tp->undo_marker = 0;
2729 }
2730 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2731
2732
2733
2734 tcp_moderate_cwnd(tp);
2735 return 1;
2736 }
2737 tcp_set_ca_state(sk, TCP_CA_Open);
2738 return 0;
2739}
2740
2741
2742static void tcp_try_undo_dsack(struct sock *sk)
2743{
2744 struct tcp_sock *tp = tcp_sk(sk);
2745
2746 if (tp->undo_marker && !tp->undo_retrans) {
2747 DBGUNDO(sk, "D-SACK");
2748 tcp_undo_cwr(sk, true);
2749 tp->undo_marker = 0;
2750 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2751 }
2752}
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768static int tcp_any_retrans_done(const struct sock *sk)
2769{
2770 const struct tcp_sock *tp = tcp_sk(sk);
2771 struct sk_buff *skb;
2772
2773 if (tp->retrans_out)
2774 return 1;
2775
2776 skb = tcp_write_queue_head(sk);
2777 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2778 return 1;
2779
2780 return 0;
2781}
2782
2783
2784
2785static int tcp_try_undo_partial(struct sock *sk, int acked)
2786{
2787 struct tcp_sock *tp = tcp_sk(sk);
2788
2789 int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering);
2790
2791 if (tcp_may_undo(tp)) {
2792
2793
2794
2795 if (!tcp_any_retrans_done(sk))
2796 tp->retrans_stamp = 0;
2797
2798 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
2799
2800 DBGUNDO(sk, "Hoe");
2801 tcp_undo_cwr(sk, false);
2802 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2803
2804
2805
2806
2807
2808 failed = 0;
2809 }
2810 return failed;
2811}
2812
2813
2814static int tcp_try_undo_loss(struct sock *sk)
2815{
2816 struct tcp_sock *tp = tcp_sk(sk);
2817
2818 if (tcp_may_undo(tp)) {
2819 struct sk_buff *skb;
2820 tcp_for_write_queue(skb, sk) {
2821 if (skb == tcp_send_head(sk))
2822 break;
2823 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2824 }
2825
2826 tcp_clear_all_retrans_hints(tp);
2827
2828 DBGUNDO(sk, "partial loss");
2829 tp->lost_out = 0;
2830 tcp_undo_cwr(sk, true);
2831 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2832 inet_csk(sk)->icsk_retransmits = 0;
2833 tp->undo_marker = 0;
2834 if (tcp_is_sack(tp))
2835 tcp_set_ca_state(sk, TCP_CA_Open);
2836 return 1;
2837 }
2838 return 0;
2839}
2840
2841static inline void tcp_complete_cwr(struct sock *sk)
2842{
2843 struct tcp_sock *tp = tcp_sk(sk);
2844
2845
2846 if (tp->undo_marker) {
2847 if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR)
2848 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
2849 else
2850 tp->snd_cwnd = tp->snd_ssthresh;
2851 tp->snd_cwnd_stamp = tcp_time_stamp;
2852 }
2853 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2854}
2855
2856static void tcp_try_keep_open(struct sock *sk)
2857{
2858 struct tcp_sock *tp = tcp_sk(sk);
2859 int state = TCP_CA_Open;
2860
2861 if (tcp_left_out(tp) || tcp_any_retrans_done(sk) || tp->undo_marker)
2862 state = TCP_CA_Disorder;
2863
2864 if (inet_csk(sk)->icsk_ca_state != state) {
2865 tcp_set_ca_state(sk, state);
2866 tp->high_seq = tp->snd_nxt;
2867 }
2868}
2869
2870static void tcp_try_to_open(struct sock *sk, int flag)
2871{
2872 struct tcp_sock *tp = tcp_sk(sk);
2873
2874 tcp_verify_left_out(tp);
2875
2876 if (!tp->frto_counter && !tcp_any_retrans_done(sk))
2877 tp->retrans_stamp = 0;
2878
2879 if (flag & FLAG_ECE)
2880 tcp_enter_cwr(sk, 1);
2881
2882 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2883 tcp_try_keep_open(sk);
2884 tcp_moderate_cwnd(tp);
2885 } else {
2886 tcp_cwnd_down(sk, flag);
2887 }
2888}
2889
2890static void tcp_mtup_probe_failed(struct sock *sk)
2891{
2892 struct inet_connection_sock *icsk = inet_csk(sk);
2893
2894 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2895 icsk->icsk_mtup.probe_size = 0;
2896}
2897
2898static void tcp_mtup_probe_success(struct sock *sk)
2899{
2900 struct tcp_sock *tp = tcp_sk(sk);
2901 struct inet_connection_sock *icsk = inet_csk(sk);
2902
2903
2904 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2905 tp->snd_cwnd = tp->snd_cwnd *
2906 tcp_mss_to_mtu(sk, tp->mss_cache) /
2907 icsk->icsk_mtup.probe_size;
2908 tp->snd_cwnd_cnt = 0;
2909 tp->snd_cwnd_stamp = tcp_time_stamp;
2910 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2911
2912 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2913 icsk->icsk_mtup.probe_size = 0;
2914 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2915}
2916
2917
2918
2919
2920
2921void tcp_simple_retransmit(struct sock *sk)
2922{
2923 const struct inet_connection_sock *icsk = inet_csk(sk);
2924 struct tcp_sock *tp = tcp_sk(sk);
2925 struct sk_buff *skb;
2926 unsigned int mss = tcp_current_mss(sk);
2927 u32 prior_lost = tp->lost_out;
2928
2929 tcp_for_write_queue(skb, sk) {
2930 if (skb == tcp_send_head(sk))
2931 break;
2932 if (tcp_skb_seglen(skb) > mss &&
2933 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2934 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2935 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2936 tp->retrans_out -= tcp_skb_pcount(skb);
2937 }
2938 tcp_skb_mark_lost_uncond_verify(tp, skb);
2939 }
2940 }
2941
2942 tcp_clear_retrans_hints_partial(tp);
2943
2944 if (prior_lost == tp->lost_out)
2945 return;
2946
2947 if (tcp_is_reno(tp))
2948 tcp_limit_reno_sacked(tp);
2949
2950 tcp_verify_left_out(tp);
2951
2952
2953
2954
2955
2956
2957 if (icsk->icsk_ca_state != TCP_CA_Loss) {
2958 tp->high_seq = tp->snd_nxt;
2959 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2960 tp->prior_ssthresh = 0;
2961 tp->undo_marker = 0;
2962 tcp_set_ca_state(sk, TCP_CA_Loss);
2963 }
2964 tcp_xmit_retransmit_queue(sk);
2965}
2966EXPORT_SYMBOL(tcp_simple_retransmit);
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
2980 int fast_rexmit, int flag)
2981{
2982 struct tcp_sock *tp = tcp_sk(sk);
2983 int sndcnt = 0;
2984 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2985
2986 if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
2987 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2988 tp->prior_cwnd - 1;
2989 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2990 } else {
2991 sndcnt = min_t(int, delta,
2992 max_t(int, tp->prr_delivered - tp->prr_out,
2993 newly_acked_sacked) + 1);
2994 }
2995
2996 sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
2997 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2998}
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3012 int newly_acked_sacked, int flag)
3013{
3014 struct inet_connection_sock *icsk = inet_csk(sk);
3015 struct tcp_sock *tp = tcp_sk(sk);
3016 int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3017 int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
3018 (tcp_fackets_out(tp) > tp->reordering));
3019 int fast_rexmit = 0, mib_idx;
3020
3021 if (WARN_ON(!tp->packets_out && tp->sacked_out))
3022 tp->sacked_out = 0;
3023 if (WARN_ON(!tp->sacked_out && tp->fackets_out))
3024 tp->fackets_out = 0;
3025
3026
3027
3028 if (flag & FLAG_ECE)
3029 tp->prior_ssthresh = 0;
3030
3031
3032 if (tcp_check_sack_reneging(sk, flag))
3033 return;
3034
3035
3036 if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) &&
3037 before(tp->snd_una, tp->high_seq) &&
3038 icsk->icsk_ca_state != TCP_CA_Open &&
3039 tp->fackets_out > tp->reordering) {
3040 tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0);
3041 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS);
3042 }
3043
3044
3045 tcp_verify_left_out(tp);
3046
3047
3048
3049 if (icsk->icsk_ca_state == TCP_CA_Open) {
3050 WARN_ON(tp->retrans_out != 0);
3051 tp->retrans_stamp = 0;
3052 } else if (!before(tp->snd_una, tp->high_seq)) {
3053 switch (icsk->icsk_ca_state) {
3054 case TCP_CA_Loss:
3055 icsk->icsk_retransmits = 0;
3056 if (tcp_try_undo_recovery(sk))
3057 return;
3058 break;
3059
3060 case TCP_CA_CWR:
3061
3062
3063 if (tp->snd_una != tp->high_seq) {
3064 tcp_complete_cwr(sk);
3065 tcp_set_ca_state(sk, TCP_CA_Open);
3066 }
3067 break;
3068
3069 case TCP_CA_Disorder:
3070 tcp_try_undo_dsack(sk);
3071 if (!tp->undo_marker ||
3072
3073
3074 tcp_is_reno(tp) || tp->snd_una != tp->high_seq) {
3075 tp->undo_marker = 0;
3076 tcp_set_ca_state(sk, TCP_CA_Open);
3077 }
3078 break;
3079
3080 case TCP_CA_Recovery:
3081 if (tcp_is_reno(tp))
3082 tcp_reset_reno_sack(tp);
3083 if (tcp_try_undo_recovery(sk))
3084 return;
3085 tcp_complete_cwr(sk);
3086 break;
3087 }
3088 }
3089
3090
3091 switch (icsk->icsk_ca_state) {
3092 case TCP_CA_Recovery:
3093 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
3094 if (tcp_is_reno(tp) && is_dupack)
3095 tcp_add_reno_sack(sk);
3096 } else
3097 do_lost = tcp_try_undo_partial(sk, pkts_acked);
3098 break;
3099 case TCP_CA_Loss:
3100 if (flag & FLAG_DATA_ACKED)
3101 icsk->icsk_retransmits = 0;
3102 if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED)
3103 tcp_reset_reno_sack(tp);
3104 if (!tcp_try_undo_loss(sk)) {
3105 tcp_moderate_cwnd(tp);
3106 tcp_xmit_retransmit_queue(sk);
3107 return;
3108 }
3109 if (icsk->icsk_ca_state != TCP_CA_Open)
3110 return;
3111
3112 default:
3113 if (tcp_is_reno(tp)) {
3114 if (flag & FLAG_SND_UNA_ADVANCED)
3115 tcp_reset_reno_sack(tp);
3116 if (is_dupack)
3117 tcp_add_reno_sack(sk);
3118 }
3119
3120 if (icsk->icsk_ca_state == TCP_CA_Disorder)
3121 tcp_try_undo_dsack(sk);
3122
3123 if (!tcp_time_to_recover(sk)) {
3124 tcp_try_to_open(sk, flag);
3125 return;
3126 }
3127
3128
3129 if (icsk->icsk_ca_state < TCP_CA_CWR &&
3130 icsk->icsk_mtup.probe_size &&
3131 tp->snd_una == tp->mtu_probe.probe_seq_start) {
3132 tcp_mtup_probe_failed(sk);
3133
3134 tp->snd_cwnd++;
3135 tcp_simple_retransmit(sk);
3136 return;
3137 }
3138
3139
3140
3141 if (tcp_is_reno(tp))
3142 mib_idx = LINUX_MIB_TCPRENORECOVERY;
3143 else
3144 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
3145
3146 NET_INC_STATS_BH(sock_net(sk), mib_idx);
3147
3148 tp->high_seq = tp->snd_nxt;
3149 tp->prior_ssthresh = 0;
3150 tp->undo_marker = tp->snd_una;
3151 tp->undo_retrans = tp->retrans_out;
3152
3153 if (icsk->icsk_ca_state < TCP_CA_CWR) {
3154 if (!(flag & FLAG_ECE))
3155 tp->prior_ssthresh = tcp_current_ssthresh(sk);
3156 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
3157 TCP_ECN_queue_cwr(tp);
3158 }
3159
3160 tp->bytes_acked = 0;
3161 tp->snd_cwnd_cnt = 0;
3162 tp->prior_cwnd = tp->snd_cwnd;
3163 tp->prr_delivered = 0;
3164 tp->prr_out = 0;
3165 tcp_set_ca_state(sk, TCP_CA_Recovery);
3166 fast_rexmit = 1;
3167 }
3168
3169 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
3170 tcp_update_scoreboard(sk, fast_rexmit);
3171 tp->prr_delivered += newly_acked_sacked;
3172 tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag);
3173 tcp_xmit_retransmit_queue(sk);
3174}
3175
3176void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
3177{
3178 tcp_rtt_estimator(sk, seq_rtt);
3179 tcp_set_rto(sk);
3180 inet_csk(sk)->icsk_backoff = 0;
3181}
3182EXPORT_SYMBOL(tcp_valid_rtt_meas);
3183
3184
3185
3186
3187static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
3188{
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204 struct tcp_sock *tp = tcp_sk(sk);
3205
3206 tcp_valid_rtt_meas(sk, tcp_time_stamp - tp->rx_opt.rcv_tsecr);
3207}
3208
3209static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag)
3210{
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220 if (flag & FLAG_RETRANS_DATA_ACKED)
3221 return;
3222
3223 tcp_valid_rtt_meas(sk, seq_rtt);
3224}
3225
3226static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
3227 const s32 seq_rtt)
3228{
3229 const struct tcp_sock *tp = tcp_sk(sk);
3230
3231 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
3232 tcp_ack_saw_tstamp(sk, flag);
3233 else if (seq_rtt >= 0)
3234 tcp_ack_no_tstamp(sk, seq_rtt, flag);
3235}
3236
3237static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
3238{
3239 const struct inet_connection_sock *icsk = inet_csk(sk);
3240 icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight);
3241 tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
3242}
3243
3244
3245
3246
3247static void tcp_rearm_rto(struct sock *sk)
3248{
3249 const struct tcp_sock *tp = tcp_sk(sk);
3250
3251 if (!tp->packets_out) {
3252 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
3253 } else {
3254 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3255 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
3256 }
3257}
3258
3259
3260static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
3261{
3262 struct tcp_sock *tp = tcp_sk(sk);
3263 u32 packets_acked;
3264
3265 BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
3266
3267 packets_acked = tcp_skb_pcount(skb);
3268 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
3269 return 0;
3270 packets_acked -= tcp_skb_pcount(skb);
3271
3272 if (packets_acked) {
3273 BUG_ON(tcp_skb_pcount(skb) == 0);
3274 BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
3275 }
3276
3277 return packets_acked;
3278}
3279
3280
3281
3282
3283
3284static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3285 u32 prior_snd_una)
3286{
3287 struct tcp_sock *tp = tcp_sk(sk);
3288 const struct inet_connection_sock *icsk = inet_csk(sk);
3289 struct sk_buff *skb;
3290 u32 now = tcp_time_stamp;
3291 int fully_acked = 1;
3292 int flag = 0;
3293 u32 pkts_acked = 0;
3294 u32 reord = tp->packets_out;
3295 u32 prior_sacked = tp->sacked_out;
3296 s32 seq_rtt = -1;
3297 s32 ca_seq_rtt = -1;
3298 ktime_t last_ackt = net_invalid_timestamp();
3299
3300 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
3301 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3302 u32 acked_pcount;
3303 u8 sacked = scb->sacked;
3304
3305
3306 if (after(scb->end_seq, tp->snd_una)) {
3307 if (tcp_skb_pcount(skb) == 1 ||
3308 !after(tp->snd_una, scb->seq))
3309 break;
3310
3311 acked_pcount = tcp_tso_acked(sk, skb);
3312 if (!acked_pcount)
3313 break;
3314
3315 fully_acked = 0;
3316 } else {
3317 acked_pcount = tcp_skb_pcount(skb);
3318 }
3319
3320 if (sacked & TCPCB_RETRANS) {
3321 if (sacked & TCPCB_SACKED_RETRANS)
3322 tp->retrans_out -= acked_pcount;
3323 flag |= FLAG_RETRANS_DATA_ACKED;
3324 ca_seq_rtt = -1;
3325 seq_rtt = -1;
3326 if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1))
3327 flag |= FLAG_NONHEAD_RETRANS_ACKED;
3328 } else {
3329 ca_seq_rtt = now - scb->when;
3330 last_ackt = skb->tstamp;
3331 if (seq_rtt < 0) {
3332 seq_rtt = ca_seq_rtt;
3333 }
3334 if (!(sacked & TCPCB_SACKED_ACKED))
3335 reord = min(pkts_acked, reord);
3336 }
3337
3338 if (sacked & TCPCB_SACKED_ACKED)
3339 tp->sacked_out -= acked_pcount;
3340 if (sacked & TCPCB_LOST)
3341 tp->lost_out -= acked_pcount;
3342
3343 tp->packets_out -= acked_pcount;
3344 pkts_acked += acked_pcount;
3345
3346
3347
3348
3349
3350
3351
3352
3353 if (!(scb->tcp_flags & TCPHDR_SYN)) {
3354 flag |= FLAG_DATA_ACKED;
3355 } else {
3356 flag |= FLAG_SYN_ACKED;
3357 tp->retrans_stamp = 0;
3358 }
3359
3360 if (!fully_acked)
3361 break;
3362
3363 tcp_unlink_write_queue(skb, sk);
3364 sk_wmem_free_skb(sk, skb);
3365 tp->scoreboard_skb_hint = NULL;
3366 if (skb == tp->retransmit_skb_hint)
3367 tp->retransmit_skb_hint = NULL;
3368 if (skb == tp->lost_skb_hint)
3369 tp->lost_skb_hint = NULL;
3370 }
3371
3372 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
3373 tp->snd_up = tp->snd_una;
3374
3375 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
3376 flag |= FLAG_SACK_RENEGING;
3377
3378 if (flag & FLAG_ACKED) {
3379 const struct tcp_congestion_ops *ca_ops
3380 = inet_csk(sk)->icsk_ca_ops;
3381
3382 if (unlikely(icsk->icsk_mtup.probe_size &&
3383 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
3384 tcp_mtup_probe_success(sk);
3385 }
3386
3387 tcp_ack_update_rtt(sk, flag, seq_rtt);
3388 tcp_rearm_rto(sk);
3389
3390 if (tcp_is_reno(tp)) {
3391 tcp_remove_reno_sacks(sk, pkts_acked);
3392 } else {
3393 int delta;
3394
3395
3396 if (reord < prior_fackets)
3397 tcp_update_reordering(sk, tp->fackets_out - reord, 0);
3398
3399 delta = tcp_is_fack(tp) ? pkts_acked :
3400 prior_sacked - tp->sacked_out;
3401 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3402 }
3403
3404 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
3405
3406 if (ca_ops->pkts_acked) {
3407 s32 rtt_us = -1;
3408
3409
3410 if (!(flag & FLAG_RETRANS_DATA_ACKED)) {
3411
3412 if (ca_ops->flags & TCP_CONG_RTT_STAMP &&
3413 !ktime_equal(last_ackt,
3414 net_invalid_timestamp()))
3415 rtt_us = ktime_us_delta(ktime_get_real(),
3416 last_ackt);
3417 else if (ca_seq_rtt >= 0)
3418 rtt_us = jiffies_to_usecs(ca_seq_rtt);
3419 }
3420
3421 ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
3422 }
3423 }
3424
3425#if FASTRETRANS_DEBUG > 0
3426 WARN_ON((int)tp->sacked_out < 0);
3427 WARN_ON((int)tp->lost_out < 0);
3428 WARN_ON((int)tp->retrans_out < 0);
3429 if (!tp->packets_out && tcp_is_sack(tp)) {
3430 icsk = inet_csk(sk);
3431 if (tp->lost_out) {
3432 printk(KERN_DEBUG "Leak l=%u %d\n",
3433 tp->lost_out, icsk->icsk_ca_state);
3434 tp->lost_out = 0;
3435 }
3436 if (tp->sacked_out) {
3437 printk(KERN_DEBUG "Leak s=%u %d\n",
3438 tp->sacked_out, icsk->icsk_ca_state);
3439 tp->sacked_out = 0;
3440 }
3441 if (tp->retrans_out) {
3442 printk(KERN_DEBUG "Leak r=%u %d\n",
3443 tp->retrans_out, icsk->icsk_ca_state);
3444 tp->retrans_out = 0;
3445 }
3446 }
3447#endif
3448 return flag;
3449}
3450
3451static void tcp_ack_probe(struct sock *sk)
3452{
3453 const struct tcp_sock *tp = tcp_sk(sk);
3454 struct inet_connection_sock *icsk = inet_csk(sk);
3455
3456
3457
3458 if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
3459 icsk->icsk_backoff = 0;
3460 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
3461
3462
3463
3464 } else {
3465 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3466 min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
3467 TCP_RTO_MAX);
3468 }
3469}
3470
3471static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag)
3472{
3473 return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3474 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3475}
3476
3477static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3478{
3479 const struct tcp_sock *tp = tcp_sk(sk);
3480 return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
3481 !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR));
3482}
3483
3484
3485
3486
3487static inline int tcp_may_update_window(const struct tcp_sock *tp,
3488 const u32 ack, const u32 ack_seq,
3489 const u32 nwin)
3490{
3491 return after(ack, tp->snd_una) ||
3492 after(ack_seq, tp->snd_wl1) ||
3493 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
3494}
3495
3496
3497
3498
3499
3500
3501static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
3502 u32 ack_seq)
3503{
3504 struct tcp_sock *tp = tcp_sk(sk);
3505 int flag = 0;
3506 u32 nwin = ntohs(tcp_hdr(skb)->window);
3507
3508 if (likely(!tcp_hdr(skb)->syn))
3509 nwin <<= tp->rx_opt.snd_wscale;
3510
3511 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
3512 flag |= FLAG_WIN_UPDATE;
3513 tcp_update_wl(tp, ack_seq);
3514
3515 if (tp->snd_wnd != nwin) {
3516 tp->snd_wnd = nwin;
3517
3518
3519
3520
3521 tp->pred_flags = 0;
3522 tcp_fast_path_check(sk);
3523
3524 if (nwin > tp->max_window) {
3525 tp->max_window = nwin;
3526 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
3527 }
3528 }
3529 }
3530
3531 tp->snd_una = ack;
3532
3533 return flag;
3534}
3535
3536
3537
3538
3539static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
3540{
3541 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
3542 tp->snd_cwnd_cnt = 0;
3543 tp->bytes_acked = 0;
3544 TCP_ECN_queue_cwr(tp);
3545 tcp_moderate_cwnd(tp);
3546}
3547
3548
3549
3550
3551static void tcp_ratehalving_spur_to_response(struct sock *sk)
3552{
3553 tcp_enter_cwr(sk, 0);
3554}
3555
3556static void tcp_undo_spur_to_response(struct sock *sk, int flag)
3557{
3558 if (flag & FLAG_ECE)
3559 tcp_ratehalving_spur_to_response(sk);
3560 else
3561 tcp_undo_cwr(sk, true);
3562}
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594static int tcp_process_frto(struct sock *sk, int flag)
3595{
3596 struct tcp_sock *tp = tcp_sk(sk);
3597
3598 tcp_verify_left_out(tp);
3599
3600
3601 if (flag & FLAG_DATA_ACKED)
3602 inet_csk(sk)->icsk_retransmits = 0;
3603
3604 if ((flag & FLAG_NONHEAD_RETRANS_ACKED) ||
3605 ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED)))
3606 tp->undo_marker = 0;
3607
3608 if (!before(tp->snd_una, tp->frto_highmark)) {
3609 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
3610 return 1;
3611 }
3612
3613 if (!tcp_is_sackfrto(tp)) {
3614
3615
3616
3617
3618 if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP))
3619 return 1;
3620
3621 if (!(flag & FLAG_DATA_ACKED)) {
3622 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
3623 flag);
3624 return 1;
3625 }
3626 } else {
3627 if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
3628
3629 tp->snd_cwnd = min(tp->snd_cwnd,
3630 tcp_packets_in_flight(tp));
3631 return 1;
3632 }
3633
3634 if ((tp->frto_counter >= 2) &&
3635 (!(flag & FLAG_FORWARD_PROGRESS) ||
3636 ((flag & FLAG_DATA_SACKED) &&
3637 !(flag & FLAG_ONLY_ORIG_SACKED)))) {
3638
3639 if (!(flag & FLAG_FORWARD_PROGRESS) &&
3640 (flag & FLAG_NOT_DUP))
3641 return 1;
3642
3643 tcp_enter_frto_loss(sk, 3, flag);
3644 return 1;
3645 }
3646 }
3647
3648 if (tp->frto_counter == 1) {
3649
3650 tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
3651 tp->frto_counter = 2;
3652
3653 if (!tcp_may_send_now(sk))
3654 tcp_enter_frto_loss(sk, 2, flag);
3655
3656 return 1;
3657 } else {
3658 switch (sysctl_tcp_frto_response) {
3659 case 2:
3660 tcp_undo_spur_to_response(sk, flag);
3661 break;
3662 case 1:
3663 tcp_conservative_spur_to_response(tp);
3664 break;
3665 default:
3666 tcp_ratehalving_spur_to_response(sk);
3667 break;
3668 }
3669 tp->frto_counter = 0;
3670 tp->undo_marker = 0;
3671 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS);
3672 }
3673 return 0;
3674}
3675
3676
3677static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3678{
3679 struct inet_connection_sock *icsk = inet_csk(sk);
3680 struct tcp_sock *tp = tcp_sk(sk);
3681 u32 prior_snd_una = tp->snd_una;
3682 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3683 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3684 u32 prior_in_flight;
3685 u32 prior_fackets;
3686 int prior_packets;
3687 int prior_sacked = tp->sacked_out;
3688 int newly_acked_sacked = 0;
3689 int frto_cwnd = 0;
3690
3691
3692
3693
3694 if (before(ack, prior_snd_una))
3695 goto old_ack;
3696
3697
3698
3699
3700 if (after(ack, tp->snd_nxt))
3701 goto invalid_ack;
3702
3703 if (after(ack, prior_snd_una))
3704 flag |= FLAG_SND_UNA_ADVANCED;
3705
3706 if (sysctl_tcp_abc) {
3707 if (icsk->icsk_ca_state < TCP_CA_CWR)
3708 tp->bytes_acked += ack - prior_snd_una;
3709 else if (icsk->icsk_ca_state == TCP_CA_Loss)
3710
3711 tp->bytes_acked += min(ack - prior_snd_una,
3712 tp->mss_cache);
3713 }
3714
3715 prior_fackets = tp->fackets_out;
3716 prior_in_flight = tcp_packets_in_flight(tp);
3717
3718 if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
3719
3720
3721
3722
3723 tcp_update_wl(tp, ack_seq);
3724 tp->snd_una = ack;
3725 flag |= FLAG_WIN_UPDATE;
3726
3727 tcp_ca_event(sk, CA_EVENT_FAST_ACK);
3728
3729 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS);
3730 } else {
3731 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3732 flag |= FLAG_DATA;
3733 else
3734 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS);
3735
3736 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3737
3738 if (TCP_SKB_CB(skb)->sacked)
3739 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3740
3741 if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
3742 flag |= FLAG_ECE;
3743
3744 tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
3745 }
3746
3747
3748
3749
3750 sk->sk_err_soft = 0;
3751 icsk->icsk_probes_out = 0;
3752 tp->rcv_tstamp = tcp_time_stamp;
3753 prior_packets = tp->packets_out;
3754 if (!prior_packets)
3755 goto no_queue;
3756
3757
3758 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
3759
3760 newly_acked_sacked = (prior_packets - prior_sacked) -
3761 (tp->packets_out - tp->sacked_out);
3762
3763 if (tp->frto_counter)
3764 frto_cwnd = tcp_process_frto(sk, flag);
3765
3766 if (before(tp->frto_highmark, tp->snd_una))
3767 tp->frto_highmark = 0;
3768
3769 if (tcp_ack_is_dubious(sk, flag)) {
3770
3771 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
3772 tcp_may_raise_cwnd(sk, flag))
3773 tcp_cong_avoid(sk, ack, prior_in_flight);
3774 tcp_fastretrans_alert(sk, prior_packets - tp->packets_out,
3775 newly_acked_sacked, flag);
3776 } else {
3777 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
3778 tcp_cong_avoid(sk, ack, prior_in_flight);
3779 }
3780
3781 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
3782 dst_confirm(__sk_dst_get(sk));
3783
3784 return 1;
3785
3786no_queue:
3787
3788
3789
3790
3791 if (tcp_send_head(sk))
3792 tcp_ack_probe(sk);
3793 return 1;
3794
3795invalid_ack:
3796 SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3797 return -1;
3798
3799old_ack:
3800 if (TCP_SKB_CB(skb)->sacked) {
3801 tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3802 if (icsk->icsk_ca_state == TCP_CA_Open)
3803 tcp_try_keep_open(sk);
3804 }
3805
3806 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3807 return 0;
3808}
3809
3810
3811
3812
3813
3814void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx,
3815 const u8 **hvpp, int estab)
3816{
3817 const unsigned char *ptr;
3818 const struct tcphdr *th = tcp_hdr(skb);
3819 int length = (th->doff * 4) - sizeof(struct tcphdr);
3820
3821 ptr = (const unsigned char *)(th + 1);
3822 opt_rx->saw_tstamp = 0;
3823
3824 while (length > 0) {
3825 int opcode = *ptr++;
3826 int opsize;
3827
3828 switch (opcode) {
3829 case TCPOPT_EOL:
3830 return;
3831 case TCPOPT_NOP:
3832 length--;
3833 continue;
3834 default:
3835 opsize = *ptr++;
3836 if (opsize < 2)
3837 return;
3838 if (opsize > length)
3839 return;
3840 switch (opcode) {
3841 case TCPOPT_MSS:
3842 if (opsize == TCPOLEN_MSS && th->syn && !estab) {
3843 u16 in_mss = get_unaligned_be16(ptr);
3844 if (in_mss) {
3845 if (opt_rx->user_mss &&
3846 opt_rx->user_mss < in_mss)
3847 in_mss = opt_rx->user_mss;
3848 opt_rx->mss_clamp = in_mss;
3849 }
3850 }
3851 break;
3852 case TCPOPT_WINDOW:
3853 if (opsize == TCPOLEN_WINDOW && th->syn &&
3854 !estab && sysctl_tcp_window_scaling) {
3855 __u8 snd_wscale = *(__u8 *)ptr;
3856 opt_rx->wscale_ok = 1;
3857 if (snd_wscale > 14) {
3858 if (net_ratelimit())
3859 printk(KERN_INFO "tcp_parse_options: Illegal window "
3860 "scaling value %d >14 received.\n",
3861 snd_wscale);
3862 snd_wscale = 14;
3863 }
3864 opt_rx->snd_wscale = snd_wscale;
3865 }
3866 break;
3867 case TCPOPT_TIMESTAMP:
3868 if ((opsize == TCPOLEN_TIMESTAMP) &&
3869 ((estab && opt_rx->tstamp_ok) ||
3870 (!estab && sysctl_tcp_timestamps))) {
3871 opt_rx->saw_tstamp = 1;
3872 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
3873 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
3874 }
3875 break;
3876 case TCPOPT_SACK_PERM:
3877 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3878 !estab && sysctl_tcp_sack) {
3879 opt_rx->sack_ok = 1;
3880 tcp_sack_reset(opt_rx);
3881 }
3882 break;
3883
3884 case TCPOPT_SACK:
3885 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
3886 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
3887 opt_rx->sack_ok) {
3888 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
3889 }
3890 break;
3891#ifdef CONFIG_TCP_MD5SIG
3892 case TCPOPT_MD5SIG:
3893
3894
3895
3896
3897 break;
3898#endif
3899 case TCPOPT_COOKIE:
3900
3901
3902 switch (opsize) {
3903 case TCPOLEN_COOKIE_BASE:
3904
3905 break;
3906 case TCPOLEN_COOKIE_PAIR:
3907
3908 break;
3909 case TCPOLEN_COOKIE_MIN+0:
3910 case TCPOLEN_COOKIE_MIN+2:
3911 case TCPOLEN_COOKIE_MIN+4:
3912 case TCPOLEN_COOKIE_MIN+6:
3913 case TCPOLEN_COOKIE_MAX:
3914
3915 opt_rx->cookie_plus = opsize;
3916 *hvpp = ptr;
3917 break;
3918 default:
3919
3920 break;
3921 }
3922 break;
3923 }
3924
3925 ptr += opsize-2;
3926 length -= opsize;
3927 }
3928 }
3929}
3930EXPORT_SYMBOL(tcp_parse_options);
3931
3932static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
3933{
3934 const __be32 *ptr = (const __be32 *)(th + 1);
3935
3936 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
3937 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
3938 tp->rx_opt.saw_tstamp = 1;
3939 ++ptr;
3940 tp->rx_opt.rcv_tsval = ntohl(*ptr);
3941 ++ptr;
3942 tp->rx_opt.rcv_tsecr = ntohl(*ptr);
3943 return 1;
3944 }
3945 return 0;
3946}
3947
3948
3949
3950
3951static int tcp_fast_parse_options(const struct sk_buff *skb,
3952 const struct tcphdr *th,
3953 struct tcp_sock *tp, const u8 **hvpp)
3954{
3955
3956
3957
3958 if (th->doff == (sizeof(*th) / 4)) {
3959 tp->rx_opt.saw_tstamp = 0;
3960 return 0;
3961 } else if (tp->rx_opt.tstamp_ok &&
3962 th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
3963 if (tcp_parse_aligned_timestamp(tp, th))
3964 return 1;
3965 }
3966 tcp_parse_options(skb, &tp->rx_opt, hvpp, 1);
3967 return 1;
3968}
3969
3970#ifdef CONFIG_TCP_MD5SIG
3971
3972
3973
3974const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
3975{
3976 int length = (th->doff << 2) - sizeof(*th);
3977 const u8 *ptr = (const u8 *)(th + 1);
3978
3979
3980 if (length < TCPOLEN_MD5SIG)
3981 return NULL;
3982
3983 while (length > 0) {
3984 int opcode = *ptr++;
3985 int opsize;
3986
3987 switch(opcode) {
3988 case TCPOPT_EOL:
3989 return NULL;
3990 case TCPOPT_NOP:
3991 length--;
3992 continue;
3993 default:
3994 opsize = *ptr++;
3995 if (opsize < 2 || opsize > length)
3996 return NULL;
3997 if (opcode == TCPOPT_MD5SIG)
3998 return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
3999 }
4000 ptr += opsize - 2;
4001 length -= opsize;
4002 }
4003 return NULL;
4004}
4005EXPORT_SYMBOL(tcp_parse_md5sig_option);
4006#endif
4007
4008static inline void tcp_store_ts_recent(struct tcp_sock *tp)
4009{
4010 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
4011 tp->rx_opt.ts_recent_stamp = get_seconds();
4012}
4013
4014static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
4015{
4016 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
4017
4018
4019
4020
4021
4022
4023
4024 if (tcp_paws_check(&tp->rx_opt, 0))
4025 tcp_store_ts_recent(tp);
4026 }
4027}
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
4053{
4054 const struct tcp_sock *tp = tcp_sk(sk);
4055 const struct tcphdr *th = tcp_hdr(skb);
4056 u32 seq = TCP_SKB_CB(skb)->seq;
4057 u32 ack = TCP_SKB_CB(skb)->ack_seq;
4058
4059 return (
4060 (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
4061
4062
4063 ack == tp->snd_una &&
4064
4065
4066 !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
4067
4068
4069 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
4070}
4071
4072static inline int tcp_paws_discard(const struct sock *sk,
4073 const struct sk_buff *skb)
4074{
4075 const struct tcp_sock *tp = tcp_sk(sk);
4076
4077 return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
4078 !tcp_disordered_ack(sk, skb);
4079}
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094static inline int tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
4095{
4096 return !before(end_seq, tp->rcv_wup) &&
4097 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
4098}
4099
4100
4101static void tcp_reset(struct sock *sk)
4102{
4103
4104 switch (sk->sk_state) {
4105 case TCP_SYN_SENT:
4106 sk->sk_err = ECONNREFUSED;
4107 break;
4108 case TCP_CLOSE_WAIT:
4109 sk->sk_err = EPIPE;
4110 break;
4111 case TCP_CLOSE:
4112 return;
4113 default:
4114 sk->sk_err = ECONNRESET;
4115 }
4116
4117 smp_wmb();
4118
4119 if (!sock_flag(sk, SOCK_DEAD))
4120 sk->sk_error_report(sk);
4121
4122 tcp_done(sk);
4123}
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139static void tcp_fin(struct sock *sk)
4140{
4141 struct tcp_sock *tp = tcp_sk(sk);
4142
4143 inet_csk_schedule_ack(sk);
4144
4145 sk->sk_shutdown |= RCV_SHUTDOWN;
4146 sock_set_flag(sk, SOCK_DONE);
4147
4148 switch (sk->sk_state) {
4149 case TCP_SYN_RECV:
4150 case TCP_ESTABLISHED:
4151
4152 tcp_set_state(sk, TCP_CLOSE_WAIT);
4153 inet_csk(sk)->icsk_ack.pingpong = 1;
4154 break;
4155
4156 case TCP_CLOSE_WAIT:
4157 case TCP_CLOSING:
4158
4159
4160
4161 break;
4162 case TCP_LAST_ACK:
4163
4164 break;
4165
4166 case TCP_FIN_WAIT1:
4167
4168
4169
4170
4171 tcp_send_ack(sk);
4172 tcp_set_state(sk, TCP_CLOSING);
4173 break;
4174 case TCP_FIN_WAIT2:
4175
4176 tcp_send_ack(sk);
4177 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
4178 break;
4179 default:
4180
4181
4182
4183 printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
4184 __func__, sk->sk_state);
4185 break;
4186 }
4187
4188
4189
4190
4191 __skb_queue_purge(&tp->out_of_order_queue);
4192 if (tcp_is_sack(tp))
4193 tcp_sack_reset(&tp->rx_opt);
4194 sk_mem_reclaim(sk);
4195
4196 if (!sock_flag(sk, SOCK_DEAD)) {
4197 sk->sk_state_change(sk);
4198
4199
4200 if (sk->sk_shutdown == SHUTDOWN_MASK ||
4201 sk->sk_state == TCP_CLOSE)
4202 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
4203 else
4204 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
4205 }
4206}
4207
4208static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
4209 u32 end_seq)
4210{
4211 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
4212 if (before(seq, sp->start_seq))
4213 sp->start_seq = seq;
4214 if (after(end_seq, sp->end_seq))
4215 sp->end_seq = end_seq;
4216 return 1;
4217 }
4218 return 0;
4219}
4220
4221static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4222{
4223 struct tcp_sock *tp = tcp_sk(sk);
4224
4225 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
4226 int mib_idx;
4227
4228 if (before(seq, tp->rcv_nxt))
4229 mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
4230 else
4231 mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
4232
4233 NET_INC_STATS_BH(sock_net(sk), mib_idx);
4234
4235 tp->rx_opt.dsack = 1;
4236 tp->duplicate_sack[0].start_seq = seq;
4237 tp->duplicate_sack[0].end_seq = end_seq;
4238 }
4239}
4240
4241static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
4242{
4243 struct tcp_sock *tp = tcp_sk(sk);
4244
4245 if (!tp->rx_opt.dsack)
4246 tcp_dsack_set(sk, seq, end_seq);
4247 else
4248 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
4249}
4250
4251static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
4252{
4253 struct tcp_sock *tp = tcp_sk(sk);
4254
4255 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
4256 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4257 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4258 tcp_enter_quickack_mode(sk);
4259
4260 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
4261 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4262
4263 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
4264 end_seq = tp->rcv_nxt;
4265 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
4266 }
4267 }
4268
4269 tcp_send_ack(sk);
4270}
4271
4272
4273
4274
4275static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
4276{
4277 int this_sack;
4278 struct tcp_sack_block *sp = &tp->selective_acks[0];
4279 struct tcp_sack_block *swalk = sp + 1;
4280
4281
4282
4283
4284 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
4285 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
4286 int i;
4287
4288
4289
4290
4291 tp->rx_opt.num_sacks--;
4292 for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
4293 sp[i] = sp[i + 1];
4294 continue;
4295 }
4296 this_sack++, swalk++;
4297 }
4298}
4299
4300static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
4301{
4302 struct tcp_sock *tp = tcp_sk(sk);
4303 struct tcp_sack_block *sp = &tp->selective_acks[0];
4304 int cur_sacks = tp->rx_opt.num_sacks;
4305 int this_sack;
4306
4307 if (!cur_sacks)
4308 goto new_sack;
4309
4310 for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
4311 if (tcp_sack_extend(sp, seq, end_seq)) {
4312
4313 for (; this_sack > 0; this_sack--, sp--)
4314 swap(*sp, *(sp - 1));
4315 if (cur_sacks > 1)
4316 tcp_sack_maybe_coalesce(tp);
4317 return;
4318 }
4319 }
4320
4321
4322
4323
4324
4325
4326
4327 if (this_sack >= TCP_NUM_SACKS) {
4328 this_sack--;
4329 tp->rx_opt.num_sacks--;
4330 sp--;
4331 }
4332 for (; this_sack > 0; this_sack--, sp--)
4333 *sp = *(sp - 1);
4334
4335new_sack:
4336
4337 sp->start_seq = seq;
4338 sp->end_seq = end_seq;
4339 tp->rx_opt.num_sacks++;
4340}
4341
4342
4343
4344static void tcp_sack_remove(struct tcp_sock *tp)
4345{
4346 struct tcp_sack_block *sp = &tp->selective_acks[0];
4347 int num_sacks = tp->rx_opt.num_sacks;
4348 int this_sack;
4349
4350
4351 if (skb_queue_empty(&tp->out_of_order_queue)) {
4352 tp->rx_opt.num_sacks = 0;
4353 return;
4354 }
4355
4356 for (this_sack = 0; this_sack < num_sacks;) {
4357
4358 if (!before(tp->rcv_nxt, sp->start_seq)) {
4359 int i;
4360
4361
4362 WARN_ON(before(tp->rcv_nxt, sp->end_seq));
4363
4364
4365 for (i=this_sack+1; i < num_sacks; i++)
4366 tp->selective_acks[i-1] = tp->selective_acks[i];
4367 num_sacks--;
4368 continue;
4369 }
4370 this_sack++;
4371 sp++;
4372 }
4373 tp->rx_opt.num_sacks = num_sacks;
4374}
4375
4376
4377
4378
4379static void tcp_ofo_queue(struct sock *sk)
4380{
4381 struct tcp_sock *tp = tcp_sk(sk);
4382 __u32 dsack_high = tp->rcv_nxt;
4383 struct sk_buff *skb;
4384
4385 while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
4386 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
4387 break;
4388
4389 if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
4390 __u32 dsack = dsack_high;
4391 if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
4392 dsack_high = TCP_SKB_CB(skb)->end_seq;
4393 tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
4394 }
4395
4396 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4397 SOCK_DEBUG(sk, "ofo packet was already received\n");
4398 __skb_unlink(skb, &tp->out_of_order_queue);
4399 __kfree_skb(skb);
4400 continue;
4401 }
4402 SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
4403 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4404 TCP_SKB_CB(skb)->end_seq);
4405
4406 __skb_unlink(skb, &tp->out_of_order_queue);
4407 __skb_queue_tail(&sk->sk_receive_queue, skb);
4408 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4409 if (tcp_hdr(skb)->fin)
4410 tcp_fin(sk);
4411 }
4412}
4413
4414static int tcp_prune_ofo_queue(struct sock *sk);
4415static int tcp_prune_queue(struct sock *sk);
4416
4417static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
4418{
4419 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
4420 !sk_rmem_schedule(sk, size)) {
4421
4422 if (tcp_prune_queue(sk) < 0)
4423 return -1;
4424
4425 if (!sk_rmem_schedule(sk, size)) {
4426 if (!tcp_prune_ofo_queue(sk))
4427 return -1;
4428
4429 if (!sk_rmem_schedule(sk, size))
4430 return -1;
4431 }
4432 }
4433 return 0;
4434}
4435
4436static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4437{
4438 const struct tcphdr *th = tcp_hdr(skb);
4439 struct tcp_sock *tp = tcp_sk(sk);
4440 int eaten = -1;
4441
4442 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
4443 goto drop;
4444
4445 skb_dst_drop(skb);
4446 __skb_pull(skb, th->doff * 4);
4447
4448 TCP_ECN_accept_cwr(tp, skb);
4449
4450 tp->rx_opt.dsack = 0;
4451
4452
4453
4454
4455
4456 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
4457 if (tcp_receive_window(tp) == 0)
4458 goto out_of_window;
4459
4460
4461 if (tp->ucopy.task == current &&
4462 tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
4463 sock_owned_by_user(sk) && !tp->urg_data) {
4464 int chunk = min_t(unsigned int, skb->len,
4465 tp->ucopy.len);
4466
4467 __set_current_state(TASK_RUNNING);
4468
4469 local_bh_enable();
4470 if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) {
4471 tp->ucopy.len -= chunk;
4472 tp->copied_seq += chunk;
4473 eaten = (chunk == skb->len);
4474 tcp_rcv_space_adjust(sk);
4475 }
4476 local_bh_disable();
4477 }
4478
4479 if (eaten <= 0) {
4480queue_and_out:
4481 if (eaten < 0 &&
4482 tcp_try_rmem_schedule(sk, skb->truesize))
4483 goto drop;
4484
4485 skb_set_owner_r(skb, sk);
4486 __skb_queue_tail(&sk->sk_receive_queue, skb);
4487 }
4488 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4489 if (skb->len)
4490 tcp_event_data_recv(sk, skb);
4491 if (th->fin)
4492 tcp_fin(sk);
4493
4494 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4495 tcp_ofo_queue(sk);
4496
4497
4498
4499
4500 if (skb_queue_empty(&tp->out_of_order_queue))
4501 inet_csk(sk)->icsk_ack.pingpong = 0;
4502 }
4503
4504 if (tp->rx_opt.num_sacks)
4505 tcp_sack_remove(tp);
4506
4507 tcp_fast_path_check(sk);
4508
4509 if (eaten > 0)
4510 __kfree_skb(skb);
4511 else if (!sock_flag(sk, SOCK_DEAD))
4512 sk->sk_data_ready(sk, 0);
4513 return;
4514 }
4515
4516 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4517
4518 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4519 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4520
4521out_of_window:
4522 tcp_enter_quickack_mode(sk);
4523 inet_csk_schedule_ack(sk);
4524drop:
4525 __kfree_skb(skb);
4526 return;
4527 }
4528
4529
4530 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
4531 goto out_of_window;
4532
4533 tcp_enter_quickack_mode(sk);
4534
4535 if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4536
4537 SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
4538 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4539 TCP_SKB_CB(skb)->end_seq);
4540
4541 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
4542
4543
4544
4545
4546 if (!tcp_receive_window(tp))
4547 goto out_of_window;
4548 goto queue_and_out;
4549 }
4550
4551 TCP_ECN_check_ce(tp, skb);
4552
4553 if (tcp_try_rmem_schedule(sk, skb->truesize))
4554 goto drop;
4555
4556
4557 tp->pred_flags = 0;
4558 inet_csk_schedule_ack(sk);
4559
4560 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
4561 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4562
4563 skb_set_owner_r(skb, sk);
4564
4565 if (!skb_peek(&tp->out_of_order_queue)) {
4566
4567 if (tcp_is_sack(tp)) {
4568 tp->rx_opt.num_sacks = 1;
4569 tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
4570 tp->selective_acks[0].end_seq =
4571 TCP_SKB_CB(skb)->end_seq;
4572 }
4573 __skb_queue_head(&tp->out_of_order_queue, skb);
4574 } else {
4575 struct sk_buff *skb1 = skb_peek_tail(&tp->out_of_order_queue);
4576 u32 seq = TCP_SKB_CB(skb)->seq;
4577 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4578
4579 if (seq == TCP_SKB_CB(skb1)->end_seq) {
4580 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4581
4582 if (!tp->rx_opt.num_sacks ||
4583 tp->selective_acks[0].end_seq != seq)
4584 goto add_sack;
4585
4586
4587 tp->selective_acks[0].end_seq = end_seq;
4588 return;
4589 }
4590
4591
4592 while (1) {
4593 if (!after(TCP_SKB_CB(skb1)->seq, seq))
4594 break;
4595 if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
4596 skb1 = NULL;
4597 break;
4598 }
4599 skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
4600 }
4601
4602
4603 if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4604 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4605
4606 __kfree_skb(skb);
4607 tcp_dsack_set(sk, seq, end_seq);
4608 goto add_sack;
4609 }
4610 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4611
4612 tcp_dsack_set(sk, seq,
4613 TCP_SKB_CB(skb1)->end_seq);
4614 } else {
4615 if (skb_queue_is_first(&tp->out_of_order_queue,
4616 skb1))
4617 skb1 = NULL;
4618 else
4619 skb1 = skb_queue_prev(
4620 &tp->out_of_order_queue,
4621 skb1);
4622 }
4623 }
4624 if (!skb1)
4625 __skb_queue_head(&tp->out_of_order_queue, skb);
4626 else
4627 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4628
4629
4630 while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
4631 skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
4632
4633 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4634 break;
4635 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4636 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4637 end_seq);
4638 break;
4639 }
4640 __skb_unlink(skb1, &tp->out_of_order_queue);
4641 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4642 TCP_SKB_CB(skb1)->end_seq);
4643 __kfree_skb(skb1);
4644 }
4645
4646add_sack:
4647 if (tcp_is_sack(tp))
4648 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4649 }
4650}
4651
4652static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
4653 struct sk_buff_head *list)
4654{
4655 struct sk_buff *next = NULL;
4656
4657 if (!skb_queue_is_last(list, skb))
4658 next = skb_queue_next(list, skb);
4659
4660 __skb_unlink(skb, list);
4661 __kfree_skb(skb);
4662 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
4663
4664 return next;
4665}
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675static void
4676tcp_collapse(struct sock *sk, struct sk_buff_head *list,
4677 struct sk_buff *head, struct sk_buff *tail,
4678 u32 start, u32 end)
4679{
4680 struct sk_buff *skb, *n;
4681 bool end_of_skbs;
4682
4683
4684
4685 skb = head;
4686restart:
4687 end_of_skbs = true;
4688 skb_queue_walk_from_safe(list, skb, n) {
4689 if (skb == tail)
4690 break;
4691
4692 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4693 skb = tcp_collapse_one(sk, skb, list);
4694 if (!skb)
4695 break;
4696 goto restart;
4697 }
4698
4699
4700
4701
4702
4703
4704 if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin &&
4705 (tcp_win_from_space(skb->truesize) > skb->len ||
4706 before(TCP_SKB_CB(skb)->seq, start))) {
4707 end_of_skbs = false;
4708 break;
4709 }
4710
4711 if (!skb_queue_is_last(list, skb)) {
4712 struct sk_buff *next = skb_queue_next(list, skb);
4713 if (next != tail &&
4714 TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) {
4715 end_of_skbs = false;
4716 break;
4717 }
4718 }
4719
4720
4721 start = TCP_SKB_CB(skb)->end_seq;
4722 }
4723 if (end_of_skbs || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin)
4724 return;
4725
4726 while (before(start, end)) {
4727 struct sk_buff *nskb;
4728 unsigned int header = skb_headroom(skb);
4729 int copy = SKB_MAX_ORDER(header, 0);
4730
4731
4732 if (copy < 0)
4733 return;
4734 if (end - start < copy)
4735 copy = end - start;
4736 nskb = alloc_skb(copy + header, GFP_ATOMIC);
4737 if (!nskb)
4738 return;
4739
4740 skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
4741 skb_set_network_header(nskb, (skb_network_header(skb) -
4742 skb->head));
4743 skb_set_transport_header(nskb, (skb_transport_header(skb) -
4744 skb->head));
4745 skb_reserve(nskb, header);
4746 memcpy(nskb->head, skb->head, header);
4747 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
4748 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
4749 __skb_queue_before(list, skb, nskb);
4750 skb_set_owner_r(nskb, sk);
4751
4752
4753 while (copy > 0) {
4754 int offset = start - TCP_SKB_CB(skb)->seq;
4755 int size = TCP_SKB_CB(skb)->end_seq - start;
4756
4757 BUG_ON(offset < 0);
4758 if (size > 0) {
4759 size = min(copy, size);
4760 if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
4761 BUG();
4762 TCP_SKB_CB(nskb)->end_seq += size;
4763 copy -= size;
4764 start += size;
4765 }
4766 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4767 skb = tcp_collapse_one(sk, skb, list);
4768 if (!skb ||
4769 skb == tail ||
4770 tcp_hdr(skb)->syn ||
4771 tcp_hdr(skb)->fin)
4772 return;
4773 }
4774 }
4775 }
4776}
4777
4778
4779
4780
4781static void tcp_collapse_ofo_queue(struct sock *sk)
4782{
4783 struct tcp_sock *tp = tcp_sk(sk);
4784 struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
4785 struct sk_buff *head;
4786 u32 start, end;
4787
4788 if (skb == NULL)
4789 return;
4790
4791 start = TCP_SKB_CB(skb)->seq;
4792 end = TCP_SKB_CB(skb)->end_seq;
4793 head = skb;
4794
4795 for (;;) {
4796 struct sk_buff *next = NULL;
4797
4798 if (!skb_queue_is_last(&tp->out_of_order_queue, skb))
4799 next = skb_queue_next(&tp->out_of_order_queue, skb);
4800 skb = next;
4801
4802
4803
4804 if (!skb ||
4805 after(TCP_SKB_CB(skb)->seq, end) ||
4806 before(TCP_SKB_CB(skb)->end_seq, start)) {
4807 tcp_collapse(sk, &tp->out_of_order_queue,
4808 head, skb, start, end);
4809 head = skb;
4810 if (!skb)
4811 break;
4812
4813 start = TCP_SKB_CB(skb)->seq;
4814 end = TCP_SKB_CB(skb)->end_seq;
4815 } else {
4816 if (before(TCP_SKB_CB(skb)->seq, start))
4817 start = TCP_SKB_CB(skb)->seq;
4818 if (after(TCP_SKB_CB(skb)->end_seq, end))
4819 end = TCP_SKB_CB(skb)->end_seq;
4820 }
4821 }
4822}
4823
4824
4825
4826
4827
4828static int tcp_prune_ofo_queue(struct sock *sk)
4829{
4830 struct tcp_sock *tp = tcp_sk(sk);
4831 int res = 0;
4832
4833 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4834 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED);
4835 __skb_queue_purge(&tp->out_of_order_queue);
4836
4837
4838
4839
4840
4841
4842 if (tp->rx_opt.sack_ok)
4843 tcp_sack_reset(&tp->rx_opt);
4844 sk_mem_reclaim(sk);
4845 res = 1;
4846 }
4847 return res;
4848}
4849
4850
4851
4852
4853
4854
4855
4856
4857static int tcp_prune_queue(struct sock *sk)
4858{
4859 struct tcp_sock *tp = tcp_sk(sk);
4860
4861 SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
4862
4863 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PRUNECALLED);
4864
4865 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
4866 tcp_clamp_window(sk);
4867 else if (tcp_memory_pressure)
4868 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
4869
4870 tcp_collapse_ofo_queue(sk);
4871 if (!skb_queue_empty(&sk->sk_receive_queue))
4872 tcp_collapse(sk, &sk->sk_receive_queue,
4873 skb_peek(&sk->sk_receive_queue),
4874 NULL,
4875 tp->copied_seq, tp->rcv_nxt);
4876 sk_mem_reclaim(sk);
4877
4878 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4879 return 0;
4880
4881
4882
4883
4884 tcp_prune_ofo_queue(sk);
4885
4886 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4887 return 0;
4888
4889
4890
4891
4892
4893 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_RCVPRUNED);
4894
4895
4896 tp->pred_flags = 0;
4897 return -1;
4898}
4899
4900
4901
4902
4903
4904void tcp_cwnd_application_limited(struct sock *sk)
4905{
4906 struct tcp_sock *tp = tcp_sk(sk);
4907
4908 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
4909 sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
4910
4911 u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
4912 u32 win_used = max(tp->snd_cwnd_used, init_win);
4913 if (win_used < tp->snd_cwnd) {
4914 tp->snd_ssthresh = tcp_current_ssthresh(sk);
4915 tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
4916 }
4917 tp->snd_cwnd_used = 0;
4918 }
4919 tp->snd_cwnd_stamp = tcp_time_stamp;
4920}
4921
4922static int tcp_should_expand_sndbuf(const struct sock *sk)
4923{
4924 const struct tcp_sock *tp = tcp_sk(sk);
4925
4926
4927
4928
4929 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
4930 return 0;
4931
4932
4933 if (tcp_memory_pressure)
4934 return 0;
4935
4936
4937 if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
4938 return 0;
4939
4940
4941 if (tp->packets_out >= tp->snd_cwnd)
4942 return 0;
4943
4944 return 1;
4945}
4946
4947
4948
4949
4950
4951
4952
4953static void tcp_new_space(struct sock *sk)
4954{
4955 struct tcp_sock *tp = tcp_sk(sk);
4956
4957 if (tcp_should_expand_sndbuf(sk)) {
4958 int sndmem = SKB_TRUESIZE(max_t(u32,
4959 tp->rx_opt.mss_clamp,
4960 tp->mss_cache) +
4961 MAX_TCP_HEADER);
4962 int demanded = max_t(unsigned int, tp->snd_cwnd,
4963 tp->reordering + 1);
4964 sndmem *= 2 * demanded;
4965 if (sndmem > sk->sk_sndbuf)
4966 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
4967 tp->snd_cwnd_stamp = tcp_time_stamp;
4968 }
4969
4970 sk->sk_write_space(sk);
4971}
4972
4973static void tcp_check_space(struct sock *sk)
4974{
4975 if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
4976 sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
4977 if (sk->sk_socket &&
4978 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
4979 tcp_new_space(sk);
4980 }
4981}
4982
4983static inline void tcp_data_snd_check(struct sock *sk)
4984{
4985 tcp_push_pending_frames(sk);
4986 tcp_check_space(sk);
4987}
4988
4989
4990
4991
4992static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
4993{
4994 struct tcp_sock *tp = tcp_sk(sk);
4995
4996
4997 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
4998
4999
5000
5001 __tcp_select_window(sk) >= tp->rcv_wnd) ||
5002
5003 tcp_in_quickack_mode(sk) ||
5004
5005 (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
5006
5007 tcp_send_ack(sk);
5008 } else {
5009
5010 tcp_send_delayed_ack(sk);
5011 }
5012}
5013
5014static inline void tcp_ack_snd_check(struct sock *sk)
5015{
5016 if (!inet_csk_ack_scheduled(sk)) {
5017
5018 return;
5019 }
5020 __tcp_ack_snd_check(sk, 1);
5021}
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
5034{
5035 struct tcp_sock *tp = tcp_sk(sk);
5036 u32 ptr = ntohs(th->urg_ptr);
5037
5038 if (ptr && !sysctl_tcp_stdurg)
5039 ptr--;
5040 ptr += ntohl(th->seq);
5041
5042
5043 if (after(tp->copied_seq, ptr))
5044 return;
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056 if (before(ptr, tp->rcv_nxt))
5057 return;
5058
5059
5060 if (tp->urg_data && !after(ptr, tp->urg_seq))
5061 return;
5062
5063
5064 sk_send_sigurg(sk);
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
5082 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
5083 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
5084 tp->copied_seq++;
5085 if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
5086 __skb_unlink(skb, &sk->sk_receive_queue);
5087 __kfree_skb(skb);
5088 }
5089 }
5090
5091 tp->urg_data = TCP_URG_NOTYET;
5092 tp->urg_seq = ptr;
5093
5094
5095 tp->pred_flags = 0;
5096}
5097
5098
5099static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
5100{
5101 struct tcp_sock *tp = tcp_sk(sk);
5102
5103
5104 if (th->urg)
5105 tcp_check_urg(sk, th);
5106
5107
5108 if (tp->urg_data == TCP_URG_NOTYET) {
5109 u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
5110 th->syn;
5111
5112
5113 if (ptr < skb->len) {
5114 u8 tmp;
5115 if (skb_copy_bits(skb, ptr, &tmp, 1))
5116 BUG();
5117 tp->urg_data = TCP_URG_VALID | tmp;
5118 if (!sock_flag(sk, SOCK_DEAD))
5119 sk->sk_data_ready(sk, 0);
5120 }
5121 }
5122}
5123
5124static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
5125{
5126 struct tcp_sock *tp = tcp_sk(sk);
5127 int chunk = skb->len - hlen;
5128 int err;
5129
5130 local_bh_enable();
5131 if (skb_csum_unnecessary(skb))
5132 err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk);
5133 else
5134 err = skb_copy_and_csum_datagram_iovec(skb, hlen,
5135 tp->ucopy.iov);
5136
5137 if (!err) {
5138 tp->ucopy.len -= chunk;
5139 tp->copied_seq += chunk;
5140 tcp_rcv_space_adjust(sk);
5141 }
5142
5143 local_bh_disable();
5144 return err;
5145}
5146
5147static __sum16 __tcp_checksum_complete_user(struct sock *sk,
5148 struct sk_buff *skb)
5149{
5150 __sum16 result;
5151
5152 if (sock_owned_by_user(sk)) {
5153 local_bh_enable();
5154 result = __tcp_checksum_complete(skb);
5155 local_bh_disable();
5156 } else {
5157 result = __tcp_checksum_complete(skb);
5158 }
5159 return result;
5160}
5161
5162static inline int tcp_checksum_complete_user(struct sock *sk,
5163 struct sk_buff *skb)
5164{
5165 return !skb_csum_unnecessary(skb) &&
5166 __tcp_checksum_complete_user(sk, skb);
5167}
5168
5169#ifdef CONFIG_NET_DMA
5170static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
5171 int hlen)
5172{
5173 struct tcp_sock *tp = tcp_sk(sk);
5174 int chunk = skb->len - hlen;
5175 int dma_cookie;
5176 int copied_early = 0;
5177
5178 if (tp->ucopy.wakeup)
5179 return 0;
5180
5181 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
5182 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
5183
5184 if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
5185
5186 dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
5187 skb, hlen,
5188 tp->ucopy.iov, chunk,
5189 tp->ucopy.pinned_list);
5190
5191 if (dma_cookie < 0)
5192 goto out;
5193
5194 tp->ucopy.dma_cookie = dma_cookie;
5195 copied_early = 1;
5196
5197 tp->ucopy.len -= chunk;
5198 tp->copied_seq += chunk;
5199 tcp_rcv_space_adjust(sk);
5200
5201 if ((tp->ucopy.len == 0) ||
5202 (tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) ||
5203 (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) {
5204 tp->ucopy.wakeup = 1;
5205 sk->sk_data_ready(sk, 0);
5206 }
5207 } else if (chunk > 0) {
5208 tp->ucopy.wakeup = 1;
5209 sk->sk_data_ready(sk, 0);
5210 }
5211out:
5212 return copied_early;
5213}
5214#endif
5215
5216
5217
5218
5219static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5220 const struct tcphdr *th, int syn_inerr)
5221{
5222 const u8 *hash_location;
5223 struct tcp_sock *tp = tcp_sk(sk);
5224
5225
5226 if (tcp_fast_parse_options(skb, th, tp, &hash_location) &&
5227 tp->rx_opt.saw_tstamp &&
5228 tcp_paws_discard(sk, skb)) {
5229 if (!th->rst) {
5230 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
5231 tcp_send_dupack(sk, skb);
5232 goto discard;
5233 }
5234
5235 }
5236
5237
5238 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
5239
5240
5241
5242
5243
5244
5245 if (!th->rst)
5246 tcp_send_dupack(sk, skb);
5247 goto discard;
5248 }
5249
5250
5251 if (th->rst) {
5252 tcp_reset(sk);
5253 goto discard;
5254 }
5255
5256
5257
5258
5259 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
5260
5261
5262
5263
5264 if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
5265 if (syn_inerr)
5266 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5267 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
5268 tcp_reset(sk);
5269 return -1;
5270 }
5271
5272 return 1;
5273
5274discard:
5275 __kfree_skb(skb);
5276 return 0;
5277}
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5303 const struct tcphdr *th, unsigned int len)
5304{
5305 struct tcp_sock *tp = tcp_sk(sk);
5306 int res;
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323 tp->rx_opt.saw_tstamp = 0;
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
5335 TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
5336 !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
5337 int tcp_header_len = tp->tcp_header_len;
5338
5339
5340
5341
5342
5343
5344
5345 if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
5346
5347 if (!tcp_parse_aligned_timestamp(tp, th))
5348 goto slow_path;
5349
5350
5351 if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
5352 goto slow_path;
5353
5354
5355
5356
5357
5358
5359 }
5360
5361 if (len <= tcp_header_len) {
5362
5363 if (len == tcp_header_len) {
5364
5365
5366
5367
5368 if (tcp_header_len ==
5369 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5370 tp->rcv_nxt == tp->rcv_wup)
5371 tcp_store_ts_recent(tp);
5372
5373
5374
5375
5376 tcp_ack(sk, skb, 0);
5377 __kfree_skb(skb);
5378 tcp_data_snd_check(sk);
5379 return 0;
5380 } else {
5381 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5382 goto discard;
5383 }
5384 } else {
5385 int eaten = 0;
5386 int copied_early = 0;
5387
5388 if (tp->copied_seq == tp->rcv_nxt &&
5389 len - tcp_header_len <= tp->ucopy.len) {
5390#ifdef CONFIG_NET_DMA
5391 if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
5392 copied_early = 1;
5393 eaten = 1;
5394 }
5395#endif
5396 if (tp->ucopy.task == current &&
5397 sock_owned_by_user(sk) && !copied_early) {
5398 __set_current_state(TASK_RUNNING);
5399
5400 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len))
5401 eaten = 1;
5402 }
5403 if (eaten) {
5404
5405
5406
5407
5408 if (tcp_header_len ==
5409 (sizeof(struct tcphdr) +
5410 TCPOLEN_TSTAMP_ALIGNED) &&
5411 tp->rcv_nxt == tp->rcv_wup)
5412 tcp_store_ts_recent(tp);
5413
5414 tcp_rcv_rtt_measure_ts(sk, skb);
5415
5416 __skb_pull(skb, tcp_header_len);
5417 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
5418 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
5419 }
5420 if (copied_early)
5421 tcp_cleanup_rbuf(sk, skb->len);
5422 }
5423 if (!eaten) {
5424 if (tcp_checksum_complete_user(sk, skb))
5425 goto csum_error;
5426
5427
5428
5429
5430
5431 if (tcp_header_len ==
5432 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5433 tp->rcv_nxt == tp->rcv_wup)
5434 tcp_store_ts_recent(tp);
5435
5436 tcp_rcv_rtt_measure_ts(sk, skb);
5437
5438 if ((int)skb->truesize > sk->sk_forward_alloc)
5439 goto step5;
5440
5441 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
5442
5443
5444 __skb_pull(skb, tcp_header_len);
5445 __skb_queue_tail(&sk->sk_receive_queue, skb);
5446 skb_set_owner_r(skb, sk);
5447 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
5448 }
5449
5450 tcp_event_data_recv(sk, skb);
5451
5452 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
5453
5454 tcp_ack(sk, skb, FLAG_DATA);
5455 tcp_data_snd_check(sk);
5456 if (!inet_csk_ack_scheduled(sk))
5457 goto no_ack;
5458 }
5459
5460 if (!copied_early || tp->rcv_nxt != tp->rcv_wup)
5461 __tcp_ack_snd_check(sk, 0);
5462no_ack:
5463#ifdef CONFIG_NET_DMA
5464 if (copied_early)
5465 __skb_queue_tail(&sk->sk_async_wait_queue, skb);
5466 else
5467#endif
5468 if (eaten)
5469 __kfree_skb(skb);
5470 else
5471 sk->sk_data_ready(sk, 0);
5472 return 0;
5473 }
5474 }
5475
5476slow_path:
5477 if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
5478 goto csum_error;
5479
5480
5481
5482
5483
5484 res = tcp_validate_incoming(sk, skb, th, 1);
5485 if (res <= 0)
5486 return -res;
5487
5488step5:
5489 if (th->ack && tcp_ack(sk, skb, FLAG_SLOWPATH) < 0)
5490 goto discard;
5491
5492 tcp_rcv_rtt_measure_ts(sk, skb);
5493
5494
5495 tcp_urg(sk, skb, th);
5496
5497
5498 tcp_data_queue(sk, skb);
5499
5500 tcp_data_snd_check(sk);
5501 tcp_ack_snd_check(sk);
5502 return 0;
5503
5504csum_error:
5505 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5506
5507discard:
5508 __kfree_skb(skb);
5509 return 0;
5510}
5511EXPORT_SYMBOL(tcp_rcv_established);
5512
5513static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5514 const struct tcphdr *th, unsigned int len)
5515{
5516 const u8 *hash_location;
5517 struct inet_connection_sock *icsk = inet_csk(sk);
5518 struct tcp_sock *tp = tcp_sk(sk);
5519 struct tcp_cookie_values *cvp = tp->cookie_values;
5520 int saved_clamp = tp->rx_opt.mss_clamp;
5521
5522 tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0);
5523
5524 if (th->ack) {
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
5537 goto reset_and_undo;
5538
5539 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
5540 !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
5541 tcp_time_stamp)) {
5542 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED);
5543 goto reset_and_undo;
5544 }
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554 if (th->rst) {
5555 tcp_reset(sk);
5556 goto discard;
5557 }
5558
5559
5560
5561
5562
5563
5564
5565
5566 if (!th->syn)
5567 goto discard_and_undo;
5568
5569
5570
5571
5572
5573
5574
5575
5576 TCP_ECN_rcv_synack(tp, th);
5577
5578 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
5579 tcp_ack(sk, skb, FLAG_SLOWPATH);
5580
5581
5582
5583
5584 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5585 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5586
5587
5588
5589
5590 tp->snd_wnd = ntohs(th->window);
5591 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5592
5593 if (!tp->rx_opt.wscale_ok) {
5594 tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
5595 tp->window_clamp = min(tp->window_clamp, 65535U);
5596 }
5597
5598 if (tp->rx_opt.saw_tstamp) {
5599 tp->rx_opt.tstamp_ok = 1;
5600 tp->tcp_header_len =
5601 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5602 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5603 tcp_store_ts_recent(tp);
5604 } else {
5605 tp->tcp_header_len = sizeof(struct tcphdr);
5606 }
5607
5608 if (tcp_is_sack(tp) && sysctl_tcp_fack)
5609 tcp_enable_fack(tp);
5610
5611 tcp_mtup_init(sk);
5612 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5613 tcp_initialize_rcv_mss(sk);
5614
5615
5616
5617
5618 tp->copied_seq = tp->rcv_nxt;
5619
5620 if (cvp != NULL &&
5621 cvp->cookie_pair_size > 0 &&
5622 tp->rx_opt.cookie_plus > 0) {
5623 int cookie_size = tp->rx_opt.cookie_plus
5624 - TCPOLEN_COOKIE_BASE;
5625 int cookie_pair_size = cookie_size
5626 + cvp->cookie_desired;
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637 if (sizeof(cvp->cookie_pair) >= cookie_pair_size) {
5638 memcpy(&cvp->cookie_pair[cvp->cookie_desired],
5639 hash_location, cookie_size);
5640 cvp->cookie_pair_size = cookie_pair_size;
5641 }
5642 }
5643
5644 smp_mb();
5645 tcp_set_state(sk, TCP_ESTABLISHED);
5646
5647 security_inet_conn_established(sk, skb);
5648
5649
5650 icsk->icsk_af_ops->rebuild_header(sk);
5651
5652 tcp_init_metrics(sk);
5653
5654 tcp_init_congestion_control(sk);
5655
5656
5657
5658
5659 tp->lsndtime = tcp_time_stamp;
5660
5661 tcp_init_buffer_space(sk);
5662
5663 if (sock_flag(sk, SOCK_KEEPOPEN))
5664 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
5665
5666 if (!tp->rx_opt.snd_wscale)
5667 __tcp_fast_path_on(tp, tp->snd_wnd);
5668 else
5669 tp->pred_flags = 0;
5670
5671 if (!sock_flag(sk, SOCK_DEAD)) {
5672 sk->sk_state_change(sk);
5673 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
5674 }
5675
5676 if (sk->sk_write_pending ||
5677 icsk->icsk_accept_queue.rskq_defer_accept ||
5678 icsk->icsk_ack.pingpong) {
5679
5680
5681
5682
5683
5684
5685
5686 inet_csk_schedule_ack(sk);
5687 icsk->icsk_ack.lrcvtime = tcp_time_stamp;
5688 icsk->icsk_ack.ato = TCP_ATO_MIN;
5689 tcp_incr_quickack(sk);
5690 tcp_enter_quickack_mode(sk);
5691 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
5692 TCP_DELACK_MAX, TCP_RTO_MAX);
5693
5694discard:
5695 __kfree_skb(skb);
5696 return 0;
5697 } else {
5698 tcp_send_ack(sk);
5699 }
5700 return -1;
5701 }
5702
5703
5704
5705 if (th->rst) {
5706
5707
5708
5709
5710
5711
5712 goto discard_and_undo;
5713 }
5714
5715
5716 if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
5717 tcp_paws_reject(&tp->rx_opt, 0))
5718 goto discard_and_undo;
5719
5720 if (th->syn) {
5721
5722
5723
5724
5725 tcp_set_state(sk, TCP_SYN_RECV);
5726
5727 if (tp->rx_opt.saw_tstamp) {
5728 tp->rx_opt.tstamp_ok = 1;
5729 tcp_store_ts_recent(tp);
5730 tp->tcp_header_len =
5731 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5732 } else {
5733 tp->tcp_header_len = sizeof(struct tcphdr);
5734 }
5735
5736 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5737 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5738
5739
5740
5741
5742 tp->snd_wnd = ntohs(th->window);
5743 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
5744 tp->max_window = tp->snd_wnd;
5745
5746 TCP_ECN_rcv_syn(tp, th);
5747
5748 tcp_mtup_init(sk);
5749 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5750 tcp_initialize_rcv_mss(sk);
5751
5752 tcp_send_synack(sk);
5753#if 0
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763 return -1;
5764#else
5765 goto discard;
5766#endif
5767 }
5768
5769
5770
5771
5772discard_and_undo:
5773 tcp_clear_options(&tp->rx_opt);
5774 tp->rx_opt.mss_clamp = saved_clamp;
5775 goto discard;
5776
5777reset_and_undo:
5778 tcp_clear_options(&tp->rx_opt);
5779 tp->rx_opt.mss_clamp = saved_clamp;
5780 return 1;
5781}
5782
5783
5784
5785
5786
5787
5788
5789
5790int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5791 const struct tcphdr *th, unsigned int len)
5792{
5793 struct tcp_sock *tp = tcp_sk(sk);
5794 struct inet_connection_sock *icsk = inet_csk(sk);
5795 int queued = 0;
5796 int res;
5797
5798 tp->rx_opt.saw_tstamp = 0;
5799
5800 switch (sk->sk_state) {
5801 case TCP_CLOSE:
5802 goto discard;
5803
5804 case TCP_LISTEN:
5805 if (th->ack)
5806 return 1;
5807
5808 if (th->rst)
5809 goto discard;
5810
5811 if (th->syn) {
5812 if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
5813 return 1;
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832 kfree_skb(skb);
5833 return 0;
5834 }
5835 goto discard;
5836
5837 case TCP_SYN_SENT:
5838 queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
5839 if (queued >= 0)
5840 return queued;
5841
5842
5843 tcp_urg(sk, skb, th);
5844 __kfree_skb(skb);
5845 tcp_data_snd_check(sk);
5846 return 0;
5847 }
5848
5849 res = tcp_validate_incoming(sk, skb, th, 0);
5850 if (res <= 0)
5851 return -res;
5852
5853
5854 if (th->ack) {
5855 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0;
5856
5857 switch (sk->sk_state) {
5858 case TCP_SYN_RECV:
5859 if (acceptable) {
5860 tp->copied_seq = tp->rcv_nxt;
5861 smp_mb();
5862 tcp_set_state(sk, TCP_ESTABLISHED);
5863 sk->sk_state_change(sk);
5864
5865
5866
5867
5868
5869
5870 if (sk->sk_socket)
5871 sk_wake_async(sk,
5872 SOCK_WAKE_IO, POLL_OUT);
5873
5874 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
5875 tp->snd_wnd = ntohs(th->window) <<
5876 tp->rx_opt.snd_wscale;
5877 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5878
5879 if (tp->rx_opt.tstamp_ok)
5880 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5881
5882
5883
5884
5885 icsk->icsk_af_ops->rebuild_header(sk);
5886
5887 tcp_init_metrics(sk);
5888
5889 tcp_init_congestion_control(sk);
5890
5891
5892
5893
5894 tp->lsndtime = tcp_time_stamp;
5895
5896 tcp_mtup_init(sk);
5897 tcp_initialize_rcv_mss(sk);
5898 tcp_init_buffer_space(sk);
5899 tcp_fast_path_on(tp);
5900 } else {
5901 return 1;
5902 }
5903 break;
5904
5905 case TCP_FIN_WAIT1:
5906 if (tp->snd_una == tp->write_seq) {
5907 tcp_set_state(sk, TCP_FIN_WAIT2);
5908 sk->sk_shutdown |= SEND_SHUTDOWN;
5909 dst_confirm(__sk_dst_get(sk));
5910
5911 if (!sock_flag(sk, SOCK_DEAD))
5912
5913 sk->sk_state_change(sk);
5914 else {
5915 int tmo;
5916
5917 if (tp->linger2 < 0 ||
5918 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5919 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
5920 tcp_done(sk);
5921 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5922 return 1;
5923 }
5924
5925 tmo = tcp_fin_time(sk);
5926 if (tmo > TCP_TIMEWAIT_LEN) {
5927 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
5928 } else if (th->fin || sock_owned_by_user(sk)) {
5929
5930
5931
5932
5933
5934
5935 inet_csk_reset_keepalive_timer(sk, tmo);
5936 } else {
5937 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
5938 goto discard;
5939 }
5940 }
5941 }
5942 break;
5943
5944 case TCP_CLOSING:
5945 if (tp->snd_una == tp->write_seq) {
5946 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
5947 goto discard;
5948 }
5949 break;
5950
5951 case TCP_LAST_ACK:
5952 if (tp->snd_una == tp->write_seq) {
5953 tcp_update_metrics(sk);
5954 tcp_done(sk);
5955 goto discard;
5956 }
5957 break;
5958 }
5959 } else
5960 goto discard;
5961
5962
5963 tcp_urg(sk, skb, th);
5964
5965
5966 switch (sk->sk_state) {
5967 case TCP_CLOSE_WAIT:
5968 case TCP_CLOSING:
5969 case TCP_LAST_ACK:
5970 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
5971 break;
5972 case TCP_FIN_WAIT1:
5973 case TCP_FIN_WAIT2:
5974
5975
5976
5977
5978 if (sk->sk_shutdown & RCV_SHUTDOWN) {
5979 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5980 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
5981 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5982 tcp_reset(sk);
5983 return 1;
5984 }
5985 }
5986
5987 case TCP_ESTABLISHED:
5988 tcp_data_queue(sk, skb);
5989 queued = 1;
5990 break;
5991 }
5992
5993
5994 if (sk->sk_state != TCP_CLOSE) {
5995 tcp_data_snd_check(sk);
5996 tcp_ack_snd_check(sk);
5997 }
5998
5999 if (!queued) {
6000discard:
6001 __kfree_skb(skb);
6002 }
6003 return 0;
6004}
6005EXPORT_SYMBOL(tcp_rcv_state_process);
6006