1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66#include <linux/mm.h>
67#include <linux/module.h>
68#include <linux/sysctl.h>
69#include <net/tcp.h>
70#include <net/inet_common.h>
71#include <linux/ipsec.h>
72#include <asm/unaligned.h>
73#include <net/netdma.h>
74
75int sysctl_tcp_timestamps __read_mostly = 1;
76int sysctl_tcp_window_scaling __read_mostly = 1;
77int sysctl_tcp_sack __read_mostly = 1;
78int sysctl_tcp_fack __read_mostly = 1;
79int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
80int sysctl_tcp_ecn __read_mostly;
81int sysctl_tcp_dsack __read_mostly = 1;
82int sysctl_tcp_app_win __read_mostly = 31;
83int sysctl_tcp_adv_win_scale __read_mostly = 2;
84
85int sysctl_tcp_stdurg __read_mostly;
86int sysctl_tcp_rfc1337 __read_mostly;
87int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
88int sysctl_tcp_frto __read_mostly = 2;
89int sysctl_tcp_frto_response __read_mostly;
90int sysctl_tcp_nometrics_save __read_mostly;
91
92int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
93int sysctl_tcp_abc __read_mostly;
94
95#define FLAG_DATA 0x01
96#define FLAG_WIN_UPDATE 0x02
97#define FLAG_DATA_ACKED 0x04
98#define FLAG_RETRANS_DATA_ACKED 0x08
99#define FLAG_SYN_ACKED 0x10
100#define FLAG_DATA_SACKED 0x20
101#define FLAG_ECE 0x40
102#define FLAG_DATA_LOST 0x80
103#define FLAG_SLOWPATH 0x100
104#define FLAG_ONLY_ORIG_SACKED 0x200
105#define FLAG_SND_UNA_ADVANCED 0x400
106#define FLAG_DSACKING_ACK 0x800
107#define FLAG_NONHEAD_RETRANS_ACKED 0x1000
108
109#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
110#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
111#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
112#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
113#define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED)
114
115#define IsSackFrto() (sysctl_tcp_frto == 0x2)
116
117#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
118#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
119
120
121
122
123static void tcp_measure_rcv_mss(struct sock *sk,
124 const struct sk_buff *skb)
125{
126 struct inet_connection_sock *icsk = inet_csk(sk);
127 const unsigned int lss = icsk->icsk_ack.last_seg_size;
128 unsigned int len;
129
130 icsk->icsk_ack.last_seg_size = 0;
131
132
133
134
135 len = skb_shinfo(skb)->gso_size ?: skb->len;
136 if (len >= icsk->icsk_ack.rcv_mss) {
137 icsk->icsk_ack.rcv_mss = len;
138 } else {
139
140
141
142
143
144 len += skb->data - skb_transport_header(skb);
145 if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) ||
146
147
148
149
150
151 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
152 !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
153
154
155
156
157 len -= tcp_sk(sk)->tcp_header_len;
158 icsk->icsk_ack.last_seg_size = len;
159 if (len == lss) {
160 icsk->icsk_ack.rcv_mss = len;
161 return;
162 }
163 }
164 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
165 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
166 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
167 }
168}
169
170static void tcp_incr_quickack(struct sock *sk)
171{
172 struct inet_connection_sock *icsk = inet_csk(sk);
173 unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
174
175 if (quickacks==0)
176 quickacks=2;
177 if (quickacks > icsk->icsk_ack.quick)
178 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
179}
180
181void tcp_enter_quickack_mode(struct sock *sk)
182{
183 struct inet_connection_sock *icsk = inet_csk(sk);
184 tcp_incr_quickack(sk);
185 icsk->icsk_ack.pingpong = 0;
186 icsk->icsk_ack.ato = TCP_ATO_MIN;
187}
188
189
190
191
192
193static inline int tcp_in_quickack_mode(const struct sock *sk)
194{
195 const struct inet_connection_sock *icsk = inet_csk(sk);
196 return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong;
197}
198
199static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
200{
201 if (tp->ecn_flags&TCP_ECN_OK)
202 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
203}
204
205static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb)
206{
207 if (tcp_hdr(skb)->cwr)
208 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
209}
210
211static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)
212{
213 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
214}
215
216static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
217{
218 if (tp->ecn_flags&TCP_ECN_OK) {
219 if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags))
220 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
221
222
223
224 else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags)))
225 tcp_enter_quickack_mode((struct sock *)tp);
226 }
227}
228
229static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th)
230{
231 if ((tp->ecn_flags&TCP_ECN_OK) && (!th->ece || th->cwr))
232 tp->ecn_flags &= ~TCP_ECN_OK;
233}
234
235static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th)
236{
237 if ((tp->ecn_flags&TCP_ECN_OK) && (!th->ece || !th->cwr))
238 tp->ecn_flags &= ~TCP_ECN_OK;
239}
240
241static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
242{
243 if (th->ece && !th->syn && (tp->ecn_flags&TCP_ECN_OK))
244 return 1;
245 return 0;
246}
247
248
249
250
251
252
253static void tcp_fixup_sndbuf(struct sock *sk)
254{
255 int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
256 sizeof(struct sk_buff);
257
258 if (sk->sk_sndbuf < 3 * sndmem)
259 sk->sk_sndbuf = min(3 * sndmem, sysctl_tcp_wmem[2]);
260}
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
289{
290 struct tcp_sock *tp = tcp_sk(sk);
291
292 int truesize = tcp_win_from_space(skb->truesize)/2;
293 int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2;
294
295 while (tp->rcv_ssthresh <= window) {
296 if (truesize <= skb->len)
297 return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
298
299 truesize >>= 1;
300 window >>= 1;
301 }
302 return 0;
303}
304
305static void tcp_grow_window(struct sock *sk,
306 struct sk_buff *skb)
307{
308 struct tcp_sock *tp = tcp_sk(sk);
309
310
311 if (tp->rcv_ssthresh < tp->window_clamp &&
312 (int)tp->rcv_ssthresh < tcp_space(sk) &&
313 !tcp_memory_pressure) {
314 int incr;
315
316
317
318
319 if (tcp_win_from_space(skb->truesize) <= skb->len)
320 incr = 2*tp->advmss;
321 else
322 incr = __tcp_grow_window(sk, skb);
323
324 if (incr) {
325 tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp);
326 inet_csk(sk)->icsk_ack.quick |= 1;
327 }
328 }
329}
330
331
332
333static void tcp_fixup_rcvbuf(struct sock *sk)
334{
335 struct tcp_sock *tp = tcp_sk(sk);
336 int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
337
338
339
340
341
342 while (tcp_win_from_space(rcvmem) < tp->advmss)
343 rcvmem += 128;
344 if (sk->sk_rcvbuf < 4 * rcvmem)
345 sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]);
346}
347
348
349
350
351static void tcp_init_buffer_space(struct sock *sk)
352{
353 struct tcp_sock *tp = tcp_sk(sk);
354 int maxwin;
355
356 if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
357 tcp_fixup_rcvbuf(sk);
358 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
359 tcp_fixup_sndbuf(sk);
360
361 tp->rcvq_space.space = tp->rcv_wnd;
362
363 maxwin = tcp_full_space(sk);
364
365 if (tp->window_clamp >= maxwin) {
366 tp->window_clamp = maxwin;
367
368 if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
369 tp->window_clamp = max(maxwin -
370 (maxwin >> sysctl_tcp_app_win),
371 4 * tp->advmss);
372 }
373
374
375 if (sysctl_tcp_app_win &&
376 tp->window_clamp > 2 * tp->advmss &&
377 tp->window_clamp + tp->advmss > maxwin)
378 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
379
380 tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
381 tp->snd_cwnd_stamp = tcp_time_stamp;
382}
383
384
385static void tcp_clamp_window(struct sock *sk)
386{
387 struct tcp_sock *tp = tcp_sk(sk);
388 struct inet_connection_sock *icsk = inet_csk(sk);
389
390 icsk->icsk_ack.quick = 0;
391
392 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
393 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
394 !tcp_memory_pressure &&
395 atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
396 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
397 sysctl_tcp_rmem[2]);
398 }
399 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
400 tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss);
401}
402
403
404
405
406
407
408
409
410
411void tcp_initialize_rcv_mss(struct sock *sk)
412{
413 struct tcp_sock *tp = tcp_sk(sk);
414 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
415
416 hint = min(hint, tp->rcv_wnd/2);
417 hint = min(hint, TCP_MIN_RCVMSS);
418 hint = max(hint, TCP_MIN_MSS);
419
420 inet_csk(sk)->icsk_ack.rcv_mss = hint;
421}
422
423
424
425
426
427
428
429
430
431
432
433
434static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
435{
436 u32 new_sample = tp->rcv_rtt_est.rtt;
437 long m = sample;
438
439 if (m == 0)
440 m = 1;
441
442 if (new_sample != 0) {
443
444
445
446
447
448
449
450
451
452
453 if (!win_dep) {
454 m -= (new_sample >> 3);
455 new_sample += m;
456 } else if (m < new_sample)
457 new_sample = m << 3;
458 } else {
459
460 new_sample = m << 3;
461 }
462
463 if (tp->rcv_rtt_est.rtt != new_sample)
464 tp->rcv_rtt_est.rtt = new_sample;
465}
466
467static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
468{
469 if (tp->rcv_rtt_est.time == 0)
470 goto new_measure;
471 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
472 return;
473 tcp_rcv_rtt_update(tp,
474 jiffies - tp->rcv_rtt_est.time,
475 1);
476
477new_measure:
478 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
479 tp->rcv_rtt_est.time = tcp_time_stamp;
480}
481
482static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, const struct sk_buff *skb)
483{
484 struct tcp_sock *tp = tcp_sk(sk);
485 if (tp->rx_opt.rcv_tsecr &&
486 (TCP_SKB_CB(skb)->end_seq -
487 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
488 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
489}
490
491
492
493
494
495void tcp_rcv_space_adjust(struct sock *sk)
496{
497 struct tcp_sock *tp = tcp_sk(sk);
498 int time;
499 int space;
500
501 if (tp->rcvq_space.time == 0)
502 goto new_measure;
503
504 time = tcp_time_stamp - tp->rcvq_space.time;
505 if (time < (tp->rcv_rtt_est.rtt >> 3) ||
506 tp->rcv_rtt_est.rtt == 0)
507 return;
508
509 space = 2 * (tp->copied_seq - tp->rcvq_space.seq);
510
511 space = max(tp->rcvq_space.space, space);
512
513 if (tp->rcvq_space.space != space) {
514 int rcvmem;
515
516 tp->rcvq_space.space = space;
517
518 if (sysctl_tcp_moderate_rcvbuf &&
519 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
520 int new_clamp = space;
521
522
523
524
525
526 space /= tp->advmss;
527 if (!space)
528 space = 1;
529 rcvmem = (tp->advmss + MAX_TCP_HEADER +
530 16 + sizeof(struct sk_buff));
531 while (tcp_win_from_space(rcvmem) < tp->advmss)
532 rcvmem += 128;
533 space *= rcvmem;
534 space = min(space, sysctl_tcp_rmem[2]);
535 if (space > sk->sk_rcvbuf) {
536 sk->sk_rcvbuf = space;
537
538
539 tp->window_clamp = new_clamp;
540 }
541 }
542 }
543
544new_measure:
545 tp->rcvq_space.seq = tp->copied_seq;
546 tp->rcvq_space.time = tcp_time_stamp;
547}
548
549
550
551
552
553
554
555
556
557
558
559static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
560{
561 struct tcp_sock *tp = tcp_sk(sk);
562 struct inet_connection_sock *icsk = inet_csk(sk);
563 u32 now;
564
565 inet_csk_schedule_ack(sk);
566
567 tcp_measure_rcv_mss(sk, skb);
568
569 tcp_rcv_rtt_measure(tp);
570
571 now = tcp_time_stamp;
572
573 if (!icsk->icsk_ack.ato) {
574
575
576
577 tcp_incr_quickack(sk);
578 icsk->icsk_ack.ato = TCP_ATO_MIN;
579 } else {
580 int m = now - icsk->icsk_ack.lrcvtime;
581
582 if (m <= TCP_ATO_MIN/2) {
583
584 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
585 } else if (m < icsk->icsk_ack.ato) {
586 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
587 if (icsk->icsk_ack.ato > icsk->icsk_rto)
588 icsk->icsk_ack.ato = icsk->icsk_rto;
589 } else if (m > icsk->icsk_rto) {
590
591
592
593 tcp_incr_quickack(sk);
594 sk_stream_mem_reclaim(sk);
595 }
596 }
597 icsk->icsk_ack.lrcvtime = now;
598
599 TCP_ECN_check_ce(tp, skb);
600
601 if (skb->len >= 128)
602 tcp_grow_window(sk, skb);
603}
604
605static u32 tcp_rto_min(struct sock *sk)
606{
607 struct dst_entry *dst = __sk_dst_get(sk);
608 u32 rto_min = TCP_RTO_MIN;
609
610 if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
611 rto_min = dst->metrics[RTAX_RTO_MIN-1];
612 return rto_min;
613}
614
615
616
617
618
619
620
621
622
623
624static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
625{
626 struct tcp_sock *tp = tcp_sk(sk);
627 long m = mrtt;
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645 if (m == 0)
646 m = 1;
647 if (tp->srtt != 0) {
648 m -= (tp->srtt >> 3);
649 tp->srtt += m;
650 if (m < 0) {
651 m = -m;
652 m -= (tp->mdev >> 2);
653
654
655
656
657
658
659
660
661 if (m > 0)
662 m >>= 3;
663 } else {
664 m -= (tp->mdev >> 2);
665 }
666 tp->mdev += m;
667 if (tp->mdev > tp->mdev_max) {
668 tp->mdev_max = tp->mdev;
669 if (tp->mdev_max > tp->rttvar)
670 tp->rttvar = tp->mdev_max;
671 }
672 if (after(tp->snd_una, tp->rtt_seq)) {
673 if (tp->mdev_max < tp->rttvar)
674 tp->rttvar -= (tp->rttvar-tp->mdev_max)>>2;
675 tp->rtt_seq = tp->snd_nxt;
676 tp->mdev_max = tcp_rto_min(sk);
677 }
678 } else {
679
680 tp->srtt = m<<3;
681 tp->mdev = m<<1;
682 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
683 tp->rtt_seq = tp->snd_nxt;
684 }
685}
686
687
688
689
690static inline void tcp_set_rto(struct sock *sk)
691{
692 const struct tcp_sock *tp = tcp_sk(sk);
693
694
695
696
697
698
699
700
701
702
703 inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar;
704
705
706
707
708
709
710}
711
712
713
714
715static inline void tcp_bound_rto(struct sock *sk)
716{
717 if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX)
718 inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
719}
720
721
722
723
724
725void tcp_update_metrics(struct sock *sk)
726{
727 struct tcp_sock *tp = tcp_sk(sk);
728 struct dst_entry *dst = __sk_dst_get(sk);
729
730 if (sysctl_tcp_nometrics_save)
731 return;
732
733 dst_confirm(dst);
734
735 if (dst && (dst->flags&DST_HOST)) {
736 const struct inet_connection_sock *icsk = inet_csk(sk);
737 int m;
738
739 if (icsk->icsk_backoff || !tp->srtt) {
740
741
742
743
744 if (!(dst_metric_locked(dst, RTAX_RTT)))
745 dst->metrics[RTAX_RTT-1] = 0;
746 return;
747 }
748
749 m = dst_metric(dst, RTAX_RTT) - tp->srtt;
750
751
752
753
754
755 if (!(dst_metric_locked(dst, RTAX_RTT))) {
756 if (m <= 0)
757 dst->metrics[RTAX_RTT-1] = tp->srtt;
758 else
759 dst->metrics[RTAX_RTT-1] -= (m>>3);
760 }
761
762 if (!(dst_metric_locked(dst, RTAX_RTTVAR))) {
763 if (m < 0)
764 m = -m;
765
766
767 m >>= 1;
768 if (m < tp->mdev)
769 m = tp->mdev;
770
771 if (m >= dst_metric(dst, RTAX_RTTVAR))
772 dst->metrics[RTAX_RTTVAR-1] = m;
773 else
774 dst->metrics[RTAX_RTTVAR-1] -=
775 (dst->metrics[RTAX_RTTVAR-1] - m)>>2;
776 }
777
778 if (tp->snd_ssthresh >= 0xFFFF) {
779
780 if (dst_metric(dst, RTAX_SSTHRESH) &&
781 !dst_metric_locked(dst, RTAX_SSTHRESH) &&
782 (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH))
783 dst->metrics[RTAX_SSTHRESH-1] = tp->snd_cwnd >> 1;
784 if (!dst_metric_locked(dst, RTAX_CWND) &&
785 tp->snd_cwnd > dst_metric(dst, RTAX_CWND))
786 dst->metrics[RTAX_CWND-1] = tp->snd_cwnd;
787 } else if (tp->snd_cwnd > tp->snd_ssthresh &&
788 icsk->icsk_ca_state == TCP_CA_Open) {
789
790 if (!dst_metric_locked(dst, RTAX_SSTHRESH))
791 dst->metrics[RTAX_SSTHRESH-1] =
792 max(tp->snd_cwnd >> 1, tp->snd_ssthresh);
793 if (!dst_metric_locked(dst, RTAX_CWND))
794 dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_cwnd) >> 1;
795 } else {
796
797
798
799 if (!dst_metric_locked(dst, RTAX_CWND))
800 dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_ssthresh) >> 1;
801 if (dst->metrics[RTAX_SSTHRESH-1] &&
802 !dst_metric_locked(dst, RTAX_SSTHRESH) &&
803 tp->snd_ssthresh > dst->metrics[RTAX_SSTHRESH-1])
804 dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh;
805 }
806
807 if (!dst_metric_locked(dst, RTAX_REORDERING)) {
808 if (dst->metrics[RTAX_REORDERING-1] < tp->reordering &&
809 tp->reordering != sysctl_tcp_reordering)
810 dst->metrics[RTAX_REORDERING-1] = tp->reordering;
811 }
812 }
813}
814
815
816
817
818
819
820
821
822
823
824__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
825{
826 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
827
828 if (!cwnd) {
829 if (tp->mss_cache > 1460)
830 cwnd = 2;
831 else
832 cwnd = (tp->mss_cache > 1095) ? 3 : 4;
833 }
834 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
835}
836
837
838void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
839{
840 struct tcp_sock *tp = tcp_sk(sk);
841 const struct inet_connection_sock *icsk = inet_csk(sk);
842
843 tp->prior_ssthresh = 0;
844 tp->bytes_acked = 0;
845 if (icsk->icsk_ca_state < TCP_CA_CWR) {
846 tp->undo_marker = 0;
847 if (set_ssthresh)
848 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
849 tp->snd_cwnd = min(tp->snd_cwnd,
850 tcp_packets_in_flight(tp) + 1U);
851 tp->snd_cwnd_cnt = 0;
852 tp->high_seq = tp->snd_nxt;
853 tp->snd_cwnd_stamp = tcp_time_stamp;
854 TCP_ECN_queue_cwr(tp);
855
856 tcp_set_ca_state(sk, TCP_CA_CWR);
857 }
858}
859
860
861
862
863
864static void tcp_disable_fack(struct tcp_sock *tp)
865{
866 tp->rx_opt.sack_ok &= ~2;
867}
868
869
870static void tcp_dsack_seen(struct tcp_sock *tp)
871{
872 tp->rx_opt.sack_ok |= 4;
873}
874
875
876
877static void tcp_init_metrics(struct sock *sk)
878{
879 struct tcp_sock *tp = tcp_sk(sk);
880 struct dst_entry *dst = __sk_dst_get(sk);
881
882 if (dst == NULL)
883 goto reset;
884
885 dst_confirm(dst);
886
887 if (dst_metric_locked(dst, RTAX_CWND))
888 tp->snd_cwnd_clamp = dst_metric(dst, RTAX_CWND);
889 if (dst_metric(dst, RTAX_SSTHRESH)) {
890 tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH);
891 if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
892 tp->snd_ssthresh = tp->snd_cwnd_clamp;
893 }
894 if (dst_metric(dst, RTAX_REORDERING) &&
895 tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
896 tcp_disable_fack(tp);
897 tp->reordering = dst_metric(dst, RTAX_REORDERING);
898 }
899
900 if (dst_metric(dst, RTAX_RTT) == 0)
901 goto reset;
902
903 if (!tp->srtt && dst_metric(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3))
904 goto reset;
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920 if (dst_metric(dst, RTAX_RTT) > tp->srtt) {
921 tp->srtt = dst_metric(dst, RTAX_RTT);
922 tp->rtt_seq = tp->snd_nxt;
923 }
924 if (dst_metric(dst, RTAX_RTTVAR) > tp->mdev) {
925 tp->mdev = dst_metric(dst, RTAX_RTTVAR);
926 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
927 }
928 tcp_set_rto(sk);
929 tcp_bound_rto(sk);
930 if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp)
931 goto reset;
932 tp->snd_cwnd = tcp_init_cwnd(tp, dst);
933 tp->snd_cwnd_stamp = tcp_time_stamp;
934 return;
935
936reset:
937
938
939
940
941 if (!tp->rx_opt.saw_tstamp && tp->srtt) {
942 tp->srtt = 0;
943 tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
944 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
945 }
946}
947
948static void tcp_update_reordering(struct sock *sk, const int metric,
949 const int ts)
950{
951 struct tcp_sock *tp = tcp_sk(sk);
952 if (metric > tp->reordering) {
953 tp->reordering = min(TCP_MAX_REORDERING, metric);
954
955
956 if (ts)
957 NET_INC_STATS_BH(LINUX_MIB_TCPTSREORDER);
958 else if (tcp_is_reno(tp))
959 NET_INC_STATS_BH(LINUX_MIB_TCPRENOREORDER);
960 else if (tcp_is_fack(tp))
961 NET_INC_STATS_BH(LINUX_MIB_TCPFACKREORDER);
962 else
963 NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER);
964#if FASTRETRANS_DEBUG > 1
965 printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
966 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
967 tp->reordering,
968 tp->fackets_out,
969 tp->sacked_out,
970 tp->undo_marker ? tp->undo_retrans : 0);
971#endif
972 tcp_disable_fack(tp);
973 }
974}
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
1073 u32 start_seq, u32 end_seq)
1074{
1075
1076 if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
1077 return 0;
1078
1079
1080 if (!before(start_seq, tp->snd_nxt))
1081 return 0;
1082
1083
1084
1085
1086 if (after(start_seq, tp->snd_una))
1087 return 1;
1088
1089 if (!is_dsack || !tp->undo_marker)
1090 return 0;
1091
1092
1093 if (!after(end_seq, tp->snd_una))
1094 return 0;
1095
1096 if (!before(start_seq, tp->undo_marker))
1097 return 1;
1098
1099
1100 if (!after(end_seq, tp->undo_marker))
1101 return 0;
1102
1103
1104
1105
1106 return !before(start_seq, end_seq - tp->max_window);
1107}
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118static int tcp_mark_lost_retrans(struct sock *sk, u32 received_upto)
1119{
1120 struct tcp_sock *tp = tcp_sk(sk);
1121 struct sk_buff *skb;
1122 int flag = 0;
1123 int cnt = 0;
1124 u32 new_low_seq = tp->snd_nxt;
1125
1126 tcp_for_write_queue(skb, sk) {
1127 u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
1128
1129 if (skb == tcp_send_head(sk))
1130 break;
1131 if (cnt == tp->retrans_out)
1132 break;
1133 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1134 continue;
1135
1136 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS))
1137 continue;
1138
1139 if (after(received_upto, ack_seq) &&
1140 (tcp_is_fack(tp) ||
1141 !before(received_upto,
1142 ack_seq + tp->reordering * tp->mss_cache))) {
1143 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1144 tp->retrans_out -= tcp_skb_pcount(skb);
1145
1146
1147 tp->retransmit_skb_hint = NULL;
1148
1149 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
1150 tp->lost_out += tcp_skb_pcount(skb);
1151 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1152 flag |= FLAG_DATA_SACKED;
1153 NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
1154 }
1155 } else {
1156 if (before(ack_seq, new_low_seq))
1157 new_low_seq = ack_seq;
1158 cnt += tcp_skb_pcount(skb);
1159 }
1160 }
1161
1162 if (tp->retrans_out)
1163 tp->lost_retrans_low = new_low_seq;
1164
1165 return flag;
1166}
1167
1168static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb,
1169 struct tcp_sack_block_wire *sp, int num_sacks,
1170 u32 prior_snd_una)
1171{
1172 u32 start_seq_0 = ntohl(get_unaligned(&sp[0].start_seq));
1173 u32 end_seq_0 = ntohl(get_unaligned(&sp[0].end_seq));
1174 int dup_sack = 0;
1175
1176 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1177 dup_sack = 1;
1178 tcp_dsack_seen(tp);
1179 NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV);
1180 } else if (num_sacks > 1) {
1181 u32 end_seq_1 = ntohl(get_unaligned(&sp[1].end_seq));
1182 u32 start_seq_1 = ntohl(get_unaligned(&sp[1].start_seq));
1183
1184 if (!after(end_seq_0, end_seq_1) &&
1185 !before(start_seq_0, start_seq_1)) {
1186 dup_sack = 1;
1187 tcp_dsack_seen(tp);
1188 NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV);
1189 }
1190 }
1191
1192
1193 if (dup_sack &&
1194 !after(end_seq_0, prior_snd_una) &&
1195 after(end_seq_0, tp->undo_marker))
1196 tp->undo_retrans--;
1197
1198 return dup_sack;
1199}
1200
1201
1202
1203
1204
1205
1206
1207static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1208 u32 start_seq, u32 end_seq)
1209{
1210 int in_sack, err;
1211 unsigned int pkt_len;
1212
1213 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1214 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1215
1216 if (tcp_skb_pcount(skb) > 1 && !in_sack &&
1217 after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
1218
1219 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1220
1221 if (!in_sack)
1222 pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
1223 else
1224 pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
1225 err = tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->gso_size);
1226 if (err < 0)
1227 return err;
1228 }
1229
1230 return in_sack;
1231}
1232
1233static int
1234tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una)
1235{
1236 const struct inet_connection_sock *icsk = inet_csk(sk);
1237 struct tcp_sock *tp = tcp_sk(sk);
1238 unsigned char *ptr = (skb_transport_header(ack_skb) +
1239 TCP_SKB_CB(ack_skb)->sacked);
1240 struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2);
1241 struct sk_buff *cached_skb;
1242 int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
1243 int reord = tp->packets_out;
1244 int prior_fackets;
1245 u32 highest_sack_end_seq = tp->lost_retrans_low;
1246 int flag = 0;
1247 int found_dup_sack = 0;
1248 int cached_fack_count;
1249 int i;
1250 int first_sack_index;
1251 int force_one_sack;
1252
1253 if (!tp->sacked_out) {
1254 if (WARN_ON(tp->fackets_out))
1255 tp->fackets_out = 0;
1256 tp->highest_sack = tp->snd_una;
1257 }
1258 prior_fackets = tp->fackets_out;
1259
1260 found_dup_sack = tcp_check_dsack(tp, ack_skb, sp,
1261 num_sacks, prior_snd_una);
1262 if (found_dup_sack)
1263 flag |= FLAG_DSACKING_ACK;
1264
1265
1266
1267
1268
1269 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
1270 return 0;
1271
1272 if (!tp->packets_out)
1273 goto out;
1274
1275
1276
1277
1278
1279 force_one_sack = 1;
1280 for (i = 0; i < num_sacks; i++) {
1281 __be32 start_seq = sp[i].start_seq;
1282 __be32 end_seq = sp[i].end_seq;
1283
1284 if (i == 0) {
1285 if (tp->recv_sack_cache[i].start_seq != start_seq)
1286 force_one_sack = 0;
1287 } else {
1288 if ((tp->recv_sack_cache[i].start_seq != start_seq) ||
1289 (tp->recv_sack_cache[i].end_seq != end_seq))
1290 force_one_sack = 0;
1291 }
1292 tp->recv_sack_cache[i].start_seq = start_seq;
1293 tp->recv_sack_cache[i].end_seq = end_seq;
1294 }
1295
1296 for (; i < ARRAY_SIZE(tp->recv_sack_cache); i++) {
1297 tp->recv_sack_cache[i].start_seq = 0;
1298 tp->recv_sack_cache[i].end_seq = 0;
1299 }
1300
1301 first_sack_index = 0;
1302 if (force_one_sack)
1303 num_sacks = 1;
1304 else {
1305 int j;
1306 tp->fastpath_skb_hint = NULL;
1307
1308
1309 for (i = num_sacks-1; i > 0; i--) {
1310 for (j = 0; j < i; j++){
1311 if (after(ntohl(sp[j].start_seq),
1312 ntohl(sp[j+1].start_seq))){
1313 struct tcp_sack_block_wire tmp;
1314
1315 tmp = sp[j];
1316 sp[j] = sp[j+1];
1317 sp[j+1] = tmp;
1318
1319
1320 if (j == first_sack_index)
1321 first_sack_index = j+1;
1322 }
1323
1324 }
1325 }
1326 }
1327
1328
1329 cached_skb = tp->fastpath_skb_hint;
1330 cached_fack_count = tp->fastpath_cnt_hint;
1331 if (!cached_skb) {
1332 cached_skb = tcp_write_queue_head(sk);
1333 cached_fack_count = 0;
1334 }
1335
1336 for (i = 0; i < num_sacks; i++) {
1337 struct sk_buff *skb;
1338 __u32 start_seq = ntohl(sp->start_seq);
1339 __u32 end_seq = ntohl(sp->end_seq);
1340 int fack_count;
1341 int dup_sack = (found_dup_sack && (i == first_sack_index));
1342 int next_dup = (found_dup_sack && (i+1 == first_sack_index));
1343
1344 sp++;
1345
1346 if (!tcp_is_sackblock_valid(tp, dup_sack, start_seq, end_seq)) {
1347 if (dup_sack) {
1348 if (!tp->undo_marker)
1349 NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDNOUNDO);
1350 else
1351 NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDOLD);
1352 } else {
1353
1354 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1355 !after(end_seq, tp->snd_una))
1356 continue;
1357 NET_INC_STATS_BH(LINUX_MIB_TCPSACKDISCARD);
1358 }
1359 continue;
1360 }
1361
1362 skb = cached_skb;
1363 fack_count = cached_fack_count;
1364
1365
1366 if (after(end_seq, tp->high_seq))
1367 flag |= FLAG_DATA_LOST;
1368
1369 tcp_for_write_queue_from(skb, sk) {
1370 int in_sack = 0;
1371 u8 sacked;
1372
1373 if (skb == tcp_send_head(sk))
1374 break;
1375
1376 cached_skb = skb;
1377 cached_fack_count = fack_count;
1378 if (i == first_sack_index) {
1379 tp->fastpath_skb_hint = skb;
1380 tp->fastpath_cnt_hint = fack_count;
1381 }
1382
1383
1384
1385
1386 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1387 break;
1388
1389 dup_sack = (found_dup_sack && (i == first_sack_index));
1390
1391
1392 if (next_dup) {
1393 u32 dup_start = ntohl(sp->start_seq);
1394 u32 dup_end = ntohl(sp->end_seq);
1395
1396 if (before(TCP_SKB_CB(skb)->seq, dup_end)) {
1397 in_sack = tcp_match_skb_to_sack(sk, skb, dup_start, dup_end);
1398 if (in_sack > 0)
1399 dup_sack = 1;
1400 }
1401 }
1402
1403
1404 if (in_sack <= 0)
1405 in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, end_seq);
1406 if (unlikely(in_sack < 0))
1407 break;
1408
1409 sacked = TCP_SKB_CB(skb)->sacked;
1410
1411
1412 if ((dup_sack && in_sack) &&
1413 (sacked & TCPCB_RETRANS) &&
1414 after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
1415 tp->undo_retrans--;
1416
1417
1418 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) {
1419 if (sacked&TCPCB_RETRANS) {
1420 if ((dup_sack && in_sack) &&
1421 (sacked&TCPCB_SACKED_ACKED))
1422 reord = min(fack_count, reord);
1423 }
1424
1425
1426 fack_count += tcp_skb_pcount(skb);
1427 continue;
1428 }
1429
1430 if (!in_sack) {
1431 fack_count += tcp_skb_pcount(skb);
1432 continue;
1433 }
1434
1435 if (!(sacked&TCPCB_SACKED_ACKED)) {
1436 if (sacked & TCPCB_SACKED_RETRANS) {
1437
1438
1439
1440
1441 if (sacked & TCPCB_LOST) {
1442 TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1443 tp->lost_out -= tcp_skb_pcount(skb);
1444 tp->retrans_out -= tcp_skb_pcount(skb);
1445
1446
1447 tp->retransmit_skb_hint = NULL;
1448 }
1449 } else {
1450 if (!(sacked & TCPCB_RETRANS)) {
1451
1452
1453
1454 if (fack_count < prior_fackets)
1455 reord = min(fack_count, reord);
1456
1457
1458 if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))
1459 flag |= FLAG_ONLY_ORIG_SACKED;
1460 }
1461
1462 if (sacked & TCPCB_LOST) {
1463 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
1464 tp->lost_out -= tcp_skb_pcount(skb);
1465
1466
1467 tp->retransmit_skb_hint = NULL;
1468 }
1469 }
1470
1471 TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
1472 flag |= FLAG_DATA_SACKED;
1473 tp->sacked_out += tcp_skb_pcount(skb);
1474
1475 fack_count += tcp_skb_pcount(skb);
1476 if (fack_count > tp->fackets_out)
1477 tp->fackets_out = fack_count;
1478
1479 if (after(TCP_SKB_CB(skb)->seq, tp->highest_sack)) {
1480 tp->highest_sack = TCP_SKB_CB(skb)->seq;
1481 highest_sack_end_seq = TCP_SKB_CB(skb)->end_seq;
1482 }
1483 } else {
1484 if (dup_sack && (sacked&TCPCB_RETRANS))
1485 reord = min(fack_count, reord);
1486
1487 fack_count += tcp_skb_pcount(skb);
1488 }
1489
1490
1491
1492
1493
1494
1495 if (dup_sack &&
1496 (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {
1497 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1498 tp->retrans_out -= tcp_skb_pcount(skb);
1499 tp->retransmit_skb_hint = NULL;
1500 }
1501 }
1502
1503
1504
1505
1506 if (after(end_seq, tp->frto_highmark))
1507 flag &= ~FLAG_ONLY_ORIG_SACKED;
1508 }
1509
1510 if (tp->retrans_out &&
1511 after(highest_sack_end_seq, tp->lost_retrans_low) &&
1512 icsk->icsk_ca_state == TCP_CA_Recovery)
1513 flag |= tcp_mark_lost_retrans(sk, highest_sack_end_seq);
1514
1515 tcp_verify_left_out(tp);
1516
1517 if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss &&
1518 (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
1519 tcp_update_reordering(sk, tp->fackets_out - reord, 0);
1520
1521out:
1522
1523#if FASTRETRANS_DEBUG > 0
1524 BUG_TRAP((int)tp->sacked_out >= 0);
1525 BUG_TRAP((int)tp->lost_out >= 0);
1526 BUG_TRAP((int)tp->retrans_out >= 0);
1527 BUG_TRAP((int)tcp_packets_in_flight(tp) >= 0);
1528#endif
1529 return flag;
1530}
1531
1532
1533
1534
1535
1536static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1537{
1538 struct tcp_sock *tp = tcp_sk(sk);
1539 u32 holes;
1540
1541 holes = max(tp->lost_out, 1U);
1542 holes = min(holes, tp->packets_out);
1543
1544 if ((tp->sacked_out + holes) > tp->packets_out) {
1545 tp->sacked_out = tp->packets_out - holes;
1546 tcp_update_reordering(sk, tp->packets_out + addend, 0);
1547 }
1548}
1549
1550
1551
1552static void tcp_add_reno_sack(struct sock *sk)
1553{
1554 struct tcp_sock *tp = tcp_sk(sk);
1555 tp->sacked_out++;
1556 tcp_check_reno_reordering(sk, 0);
1557 tcp_verify_left_out(tp);
1558}
1559
1560
1561
1562static void tcp_remove_reno_sacks(struct sock *sk, int acked)
1563{
1564 struct tcp_sock *tp = tcp_sk(sk);
1565
1566 if (acked > 0) {
1567
1568 if (acked-1 >= tp->sacked_out)
1569 tp->sacked_out = 0;
1570 else
1571 tp->sacked_out -= acked-1;
1572 }
1573 tcp_check_reno_reordering(sk, acked);
1574 tcp_verify_left_out(tp);
1575}
1576
1577static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1578{
1579 tp->sacked_out = 0;
1580}
1581
1582
1583
1584
1585int tcp_use_frto(struct sock *sk)
1586{
1587 const struct tcp_sock *tp = tcp_sk(sk);
1588 struct sk_buff *skb;
1589
1590 if (!sysctl_tcp_frto)
1591 return 0;
1592
1593 if (IsSackFrto())
1594 return 1;
1595
1596
1597 if (tp->retrans_out > 1)
1598 return 0;
1599
1600 skb = tcp_write_queue_head(sk);
1601 skb = tcp_write_queue_next(sk, skb);
1602 tcp_for_write_queue_from(skb, sk) {
1603 if (skb == tcp_send_head(sk))
1604 break;
1605 if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
1606 return 0;
1607
1608 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED))
1609 break;
1610 }
1611 return 1;
1612}
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626void tcp_enter_frto(struct sock *sk)
1627{
1628 const struct inet_connection_sock *icsk = inet_csk(sk);
1629 struct tcp_sock *tp = tcp_sk(sk);
1630 struct sk_buff *skb;
1631
1632 if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) ||
1633 tp->snd_una == tp->high_seq ||
1634 ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&
1635 !icsk->icsk_retransmits)) {
1636 tp->prior_ssthresh = tcp_current_ssthresh(sk);
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646 if (tp->frto_counter) {
1647 u32 stored_cwnd;
1648 stored_cwnd = tp->snd_cwnd;
1649 tp->snd_cwnd = 2;
1650 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1651 tp->snd_cwnd = stored_cwnd;
1652 } else {
1653 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1654 }
1655
1656
1657
1658
1659
1660
1661
1662 tcp_ca_event(sk, CA_EVENT_FRTO);
1663 }
1664
1665 tp->undo_marker = tp->snd_una;
1666 tp->undo_retrans = 0;
1667
1668 skb = tcp_write_queue_head(sk);
1669 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1670 tp->undo_marker = 0;
1671 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
1672 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1673 tp->retrans_out -= tcp_skb_pcount(skb);
1674 }
1675 tcp_verify_left_out(tp);
1676
1677
1678 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
1679
1680
1681
1682
1683 if (IsSackFrto() && (tp->frto_counter ||
1684 ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
1685 after(tp->high_seq, tp->snd_una)) {
1686 tp->frto_highmark = tp->high_seq;
1687 } else {
1688 tp->frto_highmark = tp->snd_nxt;
1689 }
1690 tcp_set_ca_state(sk, TCP_CA_Disorder);
1691 tp->high_seq = tp->snd_nxt;
1692 tp->frto_counter = 1;
1693}
1694
1695
1696
1697
1698
1699static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
1700{
1701 struct tcp_sock *tp = tcp_sk(sk);
1702 struct sk_buff *skb;
1703
1704 tp->lost_out = 0;
1705 tp->retrans_out = 0;
1706 if (tcp_is_reno(tp))
1707 tcp_reset_reno_sack(tp);
1708
1709 tcp_for_write_queue(skb, sk) {
1710 if (skb == tcp_send_head(sk))
1711 break;
1712
1713 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
1714
1715
1716
1717
1718 if ((tp->frto_counter == 1) && !(flag&FLAG_DATA_ACKED)) {
1719
1720 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
1721 tp->retrans_out += tcp_skb_pcount(skb);
1722
1723 flag |= FLAG_DATA_ACKED;
1724 } else {
1725 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1726 tp->undo_marker = 0;
1727 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1728 }
1729
1730
1731 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) &&
1732 !after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) {
1733 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1734 tp->lost_out += tcp_skb_pcount(skb);
1735 }
1736 }
1737 tcp_verify_left_out(tp);
1738
1739 tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
1740 tp->snd_cwnd_cnt = 0;
1741 tp->snd_cwnd_stamp = tcp_time_stamp;
1742 tp->frto_counter = 0;
1743 tp->bytes_acked = 0;
1744
1745 tp->reordering = min_t(unsigned int, tp->reordering,
1746 sysctl_tcp_reordering);
1747 tcp_set_ca_state(sk, TCP_CA_Loss);
1748 tp->high_seq = tp->frto_highmark;
1749 TCP_ECN_queue_cwr(tp);
1750
1751 tcp_clear_retrans_hints_partial(tp);
1752}
1753
1754static void tcp_clear_retrans_partial(struct tcp_sock *tp)
1755{
1756 tp->retrans_out = 0;
1757 tp->lost_out = 0;
1758
1759 tp->undo_marker = 0;
1760 tp->undo_retrans = 0;
1761}
1762
1763void tcp_clear_retrans(struct tcp_sock *tp)
1764{
1765 tcp_clear_retrans_partial(tp);
1766
1767 tp->fackets_out = 0;
1768 tp->sacked_out = 0;
1769}
1770
1771
1772
1773
1774
1775void tcp_enter_loss(struct sock *sk, int how)
1776{
1777 const struct inet_connection_sock *icsk = inet_csk(sk);
1778 struct tcp_sock *tp = tcp_sk(sk);
1779 struct sk_buff *skb;
1780
1781
1782 if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq ||
1783 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
1784 tp->prior_ssthresh = tcp_current_ssthresh(sk);
1785 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1786 tcp_ca_event(sk, CA_EVENT_LOSS);
1787 }
1788 tp->snd_cwnd = 1;
1789 tp->snd_cwnd_cnt = 0;
1790 tp->snd_cwnd_stamp = tcp_time_stamp;
1791
1792 tp->bytes_acked = 0;
1793 tcp_clear_retrans_partial(tp);
1794
1795 if (tcp_is_reno(tp))
1796 tcp_reset_reno_sack(tp);
1797
1798 if (!how) {
1799
1800
1801 tp->undo_marker = tp->snd_una;
1802 tcp_clear_retrans_hints_partial(tp);
1803 } else {
1804 tp->sacked_out = 0;
1805 tp->fackets_out = 0;
1806 tcp_clear_all_retrans_hints(tp);
1807 }
1808
1809 tcp_for_write_queue(skb, sk) {
1810 if (skb == tcp_send_head(sk))
1811 break;
1812
1813 if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
1814 tp->undo_marker = 0;
1815 TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
1816 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
1817 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
1818 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1819 tp->lost_out += tcp_skb_pcount(skb);
1820 }
1821 }
1822 tcp_verify_left_out(tp);
1823
1824 tp->reordering = min_t(unsigned int, tp->reordering,
1825 sysctl_tcp_reordering);
1826 tcp_set_ca_state(sk, TCP_CA_Loss);
1827 tp->high_seq = tp->snd_nxt;
1828 TCP_ECN_queue_cwr(tp);
1829
1830 tp->frto_counter = 0;
1831}
1832
1833static int tcp_check_sack_reneging(struct sock *sk)
1834{
1835 struct sk_buff *skb;
1836
1837
1838
1839
1840
1841
1842
1843 if ((skb = tcp_write_queue_head(sk)) != NULL &&
1844 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
1845 struct inet_connection_sock *icsk = inet_csk(sk);
1846 NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING);
1847
1848 tcp_enter_loss(sk, 1);
1849 icsk->icsk_retransmits++;
1850 tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
1851 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
1852 icsk->icsk_rto, TCP_RTO_MAX);
1853 return 1;
1854 }
1855 return 0;
1856}
1857
1858static inline int tcp_fackets_out(struct tcp_sock *tp)
1859{
1860 return tcp_is_reno(tp) ? tp->sacked_out+1 : tp->fackets_out;
1861}
1862
1863static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
1864{
1865 return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto);
1866}
1867
1868static inline int tcp_head_timedout(struct sock *sk)
1869{
1870 struct tcp_sock *tp = tcp_sk(sk);
1871
1872 return tp->packets_out &&
1873 tcp_skb_timedout(sk, tcp_write_queue_head(sk));
1874}
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969static int tcp_time_to_recover(struct sock *sk)
1970{
1971 struct tcp_sock *tp = tcp_sk(sk);
1972 __u32 packets_out;
1973
1974
1975 if (tp->frto_counter)
1976 return 0;
1977
1978
1979 if (tp->lost_out)
1980 return 1;
1981
1982
1983 if (tcp_fackets_out(tp) > tp->reordering)
1984 return 1;
1985
1986
1987
1988
1989 if (tcp_head_timedout(sk))
1990 return 1;
1991
1992
1993
1994
1995 packets_out = tp->packets_out;
1996 if (packets_out <= tp->reordering &&
1997 tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
1998 !tcp_may_send_now(sk)) {
1999
2000
2001
2002 return 1;
2003 }
2004
2005 return 0;
2006}
2007
2008
2009
2010
2011
2012
2013static void tcp_verify_retransmit_hint(struct tcp_sock *tp,
2014 struct sk_buff *skb)
2015{
2016 if ((tp->retransmit_skb_hint != NULL) &&
2017 before(TCP_SKB_CB(skb)->seq,
2018 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
2019 tp->retransmit_skb_hint = NULL;
2020}
2021
2022
2023static void tcp_mark_head_lost(struct sock *sk, int packets)
2024{
2025 struct tcp_sock *tp = tcp_sk(sk);
2026 struct sk_buff *skb;
2027 int cnt;
2028
2029 BUG_TRAP(packets <= tp->packets_out);
2030 if (tp->lost_skb_hint) {
2031 skb = tp->lost_skb_hint;
2032 cnt = tp->lost_cnt_hint;
2033 } else {
2034 skb = tcp_write_queue_head(sk);
2035 cnt = 0;
2036 }
2037
2038 tcp_for_write_queue_from(skb, sk) {
2039 if (skb == tcp_send_head(sk))
2040 break;
2041
2042
2043 tp->lost_skb_hint = skb;
2044 tp->lost_cnt_hint = cnt;
2045 cnt += tcp_skb_pcount(skb);
2046 if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
2047 break;
2048 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
2049 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
2050 tp->lost_out += tcp_skb_pcount(skb);
2051 tcp_verify_retransmit_hint(tp, skb);
2052 }
2053 }
2054 tcp_verify_left_out(tp);
2055}
2056
2057
2058
2059static void tcp_update_scoreboard(struct sock *sk)
2060{
2061 struct tcp_sock *tp = tcp_sk(sk);
2062
2063 if (tcp_is_fack(tp)) {
2064 int lost = tp->fackets_out - tp->reordering;
2065 if (lost <= 0)
2066 lost = 1;
2067 tcp_mark_head_lost(sk, lost);
2068 } else {
2069 tcp_mark_head_lost(sk, 1);
2070 }
2071
2072
2073
2074
2075
2076
2077 if (!tcp_is_reno(tp) && tcp_head_timedout(sk)) {
2078 struct sk_buff *skb;
2079
2080 skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
2081 : tcp_write_queue_head(sk);
2082
2083 tcp_for_write_queue_from(skb, sk) {
2084 if (skb == tcp_send_head(sk))
2085 break;
2086 if (!tcp_skb_timedout(sk, skb))
2087 break;
2088
2089 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
2090 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
2091 tp->lost_out += tcp_skb_pcount(skb);
2092 tcp_verify_retransmit_hint(tp, skb);
2093 }
2094 }
2095
2096 tp->scoreboard_skb_hint = skb;
2097
2098 tcp_verify_left_out(tp);
2099 }
2100}
2101
2102
2103
2104
2105static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
2106{
2107 tp->snd_cwnd = min(tp->snd_cwnd,
2108 tcp_packets_in_flight(tp)+tcp_max_burst(tp));
2109 tp->snd_cwnd_stamp = tcp_time_stamp;
2110}
2111
2112
2113
2114
2115static inline u32 tcp_cwnd_min(const struct sock *sk)
2116{
2117 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
2118
2119 return ca_ops->min_cwnd ? ca_ops->min_cwnd(sk) : tcp_sk(sk)->snd_ssthresh;
2120}
2121
2122
2123static void tcp_cwnd_down(struct sock *sk, int flag)
2124{
2125 struct tcp_sock *tp = tcp_sk(sk);
2126 int decr = tp->snd_cwnd_cnt + 1;
2127
2128 if ((flag&(FLAG_ANY_PROGRESS|FLAG_DSACKING_ACK)) ||
2129 (tcp_is_reno(tp) && !(flag&FLAG_NOT_DUP))) {
2130 tp->snd_cwnd_cnt = decr&1;
2131 decr >>= 1;
2132
2133 if (decr && tp->snd_cwnd > tcp_cwnd_min(sk))
2134 tp->snd_cwnd -= decr;
2135
2136 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1);
2137 tp->snd_cwnd_stamp = tcp_time_stamp;
2138 }
2139}
2140
2141
2142
2143
2144static inline int tcp_packet_delayed(struct tcp_sock *tp)
2145{
2146 return !tp->retrans_stamp ||
2147 (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2148 (__s32)(tp->rx_opt.rcv_tsecr - tp->retrans_stamp) < 0);
2149}
2150
2151
2152
2153#if FASTRETRANS_DEBUG > 1
2154static void DBGUNDO(struct sock *sk, const char *msg)
2155{
2156 struct tcp_sock *tp = tcp_sk(sk);
2157 struct inet_sock *inet = inet_sk(sk);
2158
2159 printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n",
2160 msg,
2161 NIPQUAD(inet->daddr), ntohs(inet->dport),
2162 tp->snd_cwnd, tcp_left_out(tp),
2163 tp->snd_ssthresh, tp->prior_ssthresh,
2164 tp->packets_out);
2165}
2166#else
2167#define DBGUNDO(x...) do { } while (0)
2168#endif
2169
2170static void tcp_undo_cwr(struct sock *sk, const int undo)
2171{
2172 struct tcp_sock *tp = tcp_sk(sk);
2173
2174 if (tp->prior_ssthresh) {
2175 const struct inet_connection_sock *icsk = inet_csk(sk);
2176
2177 if (icsk->icsk_ca_ops->undo_cwnd)
2178 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
2179 else
2180 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1);
2181
2182 if (undo && tp->prior_ssthresh > tp->snd_ssthresh) {
2183 tp->snd_ssthresh = tp->prior_ssthresh;
2184 TCP_ECN_withdraw_cwr(tp);
2185 }
2186 } else {
2187 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
2188 }
2189 tcp_moderate_cwnd(tp);
2190 tp->snd_cwnd_stamp = tcp_time_stamp;
2191
2192
2193
2194 tcp_clear_all_retrans_hints(tp);
2195}
2196
2197static inline int tcp_may_undo(struct tcp_sock *tp)
2198{
2199 return tp->undo_marker &&
2200 (!tp->undo_retrans || tcp_packet_delayed(tp));
2201}
2202
2203
2204static int tcp_try_undo_recovery(struct sock *sk)
2205{
2206 struct tcp_sock *tp = tcp_sk(sk);
2207
2208 if (tcp_may_undo(tp)) {
2209
2210
2211
2212 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2213 tcp_undo_cwr(sk, 1);
2214 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2215 NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
2216 else
2217 NET_INC_STATS_BH(LINUX_MIB_TCPFULLUNDO);
2218 tp->undo_marker = 0;
2219 }
2220 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2221
2222
2223
2224 tcp_moderate_cwnd(tp);
2225 return 1;
2226 }
2227 tcp_set_ca_state(sk, TCP_CA_Open);
2228 return 0;
2229}
2230
2231
2232static void tcp_try_undo_dsack(struct sock *sk)
2233{
2234 struct tcp_sock *tp = tcp_sk(sk);
2235
2236 if (tp->undo_marker && !tp->undo_retrans) {
2237 DBGUNDO(sk, "D-SACK");
2238 tcp_undo_cwr(sk, 1);
2239 tp->undo_marker = 0;
2240 NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO);
2241 }
2242}
2243
2244
2245
2246static int tcp_try_undo_partial(struct sock *sk, int acked)
2247{
2248 struct tcp_sock *tp = tcp_sk(sk);
2249
2250 int failed = tcp_is_reno(tp) || tp->fackets_out>tp->reordering;
2251
2252 if (tcp_may_undo(tp)) {
2253
2254
2255
2256 if (tp->retrans_out == 0)
2257 tp->retrans_stamp = 0;
2258
2259 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
2260
2261 DBGUNDO(sk, "Hoe");
2262 tcp_undo_cwr(sk, 0);
2263 NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO);
2264
2265
2266
2267
2268
2269 failed = 0;
2270 }
2271 return failed;
2272}
2273
2274
2275static int tcp_try_undo_loss(struct sock *sk)
2276{
2277 struct tcp_sock *tp = tcp_sk(sk);
2278
2279 if (tcp_may_undo(tp)) {
2280 struct sk_buff *skb;
2281 tcp_for_write_queue(skb, sk) {
2282 if (skb == tcp_send_head(sk))
2283 break;
2284 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2285 }
2286
2287 tcp_clear_all_retrans_hints(tp);
2288
2289 DBGUNDO(sk, "partial loss");
2290 tp->lost_out = 0;
2291 tcp_undo_cwr(sk, 1);
2292 NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
2293 inet_csk(sk)->icsk_retransmits = 0;
2294 tp->undo_marker = 0;
2295 if (tcp_is_sack(tp))
2296 tcp_set_ca_state(sk, TCP_CA_Open);
2297 return 1;
2298 }
2299 return 0;
2300}
2301
2302static inline void tcp_complete_cwr(struct sock *sk)
2303{
2304 struct tcp_sock *tp = tcp_sk(sk);
2305 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
2306 tp->snd_cwnd_stamp = tcp_time_stamp;
2307 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2308}
2309
2310static void tcp_try_to_open(struct sock *sk, int flag)
2311{
2312 struct tcp_sock *tp = tcp_sk(sk);
2313
2314 tcp_verify_left_out(tp);
2315
2316 if (tp->retrans_out == 0)
2317 tp->retrans_stamp = 0;
2318
2319 if (flag&FLAG_ECE)
2320 tcp_enter_cwr(sk, 1);
2321
2322 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2323 int state = TCP_CA_Open;
2324
2325 if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker)
2326 state = TCP_CA_Disorder;
2327
2328 if (inet_csk(sk)->icsk_ca_state != state) {
2329 tcp_set_ca_state(sk, state);
2330 tp->high_seq = tp->snd_nxt;
2331 }
2332 tcp_moderate_cwnd(tp);
2333 } else {
2334 tcp_cwnd_down(sk, flag);
2335 }
2336}
2337
2338static void tcp_mtup_probe_failed(struct sock *sk)
2339{
2340 struct inet_connection_sock *icsk = inet_csk(sk);
2341
2342 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2343 icsk->icsk_mtup.probe_size = 0;
2344}
2345
2346static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb)
2347{
2348 struct tcp_sock *tp = tcp_sk(sk);
2349 struct inet_connection_sock *icsk = inet_csk(sk);
2350
2351
2352 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2353 tp->snd_cwnd = tp->snd_cwnd *
2354 tcp_mss_to_mtu(sk, tp->mss_cache) /
2355 icsk->icsk_mtup.probe_size;
2356 tp->snd_cwnd_cnt = 0;
2357 tp->snd_cwnd_stamp = tcp_time_stamp;
2358 tp->rcv_ssthresh = tcp_current_ssthresh(sk);
2359
2360 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2361 icsk->icsk_mtup.probe_size = 0;
2362 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2363}
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377static void
2378tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2379{
2380 struct inet_connection_sock *icsk = inet_csk(sk);
2381 struct tcp_sock *tp = tcp_sk(sk);
2382 int is_dupack = !(flag&(FLAG_SND_UNA_ADVANCED|FLAG_NOT_DUP));
2383 int do_lost = is_dupack || ((flag&FLAG_DATA_SACKED) &&
2384 (tp->fackets_out > tp->reordering));
2385
2386
2387
2388 if (!tp->packets_out)
2389 tp->sacked_out = 0;
2390
2391 if (WARN_ON(!tp->sacked_out && tp->fackets_out))
2392 tp->fackets_out = 0;
2393
2394
2395
2396 if (flag&FLAG_ECE)
2397 tp->prior_ssthresh = 0;
2398
2399
2400 if (tp->sacked_out && tcp_check_sack_reneging(sk))
2401 return;
2402
2403
2404 if ((flag&FLAG_DATA_LOST) &&
2405 before(tp->snd_una, tp->high_seq) &&
2406 icsk->icsk_ca_state != TCP_CA_Open &&
2407 tp->fackets_out > tp->reordering) {
2408 tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering);
2409 NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
2410 }
2411
2412
2413 tcp_verify_left_out(tp);
2414
2415
2416
2417 if (icsk->icsk_ca_state == TCP_CA_Open) {
2418 BUG_TRAP(tp->retrans_out == 0);
2419 tp->retrans_stamp = 0;
2420 } else if (!before(tp->snd_una, tp->high_seq)) {
2421 switch (icsk->icsk_ca_state) {
2422 case TCP_CA_Loss:
2423 icsk->icsk_retransmits = 0;
2424 if (tcp_try_undo_recovery(sk))
2425 return;
2426 break;
2427
2428 case TCP_CA_CWR:
2429
2430
2431 if (tp->snd_una != tp->high_seq) {
2432 tcp_complete_cwr(sk);
2433 tcp_set_ca_state(sk, TCP_CA_Open);
2434 }
2435 break;
2436
2437 case TCP_CA_Disorder:
2438 tcp_try_undo_dsack(sk);
2439 if (!tp->undo_marker ||
2440
2441
2442 tcp_is_reno(tp) || tp->snd_una != tp->high_seq) {
2443 tp->undo_marker = 0;
2444 tcp_set_ca_state(sk, TCP_CA_Open);
2445 }
2446 break;
2447
2448 case TCP_CA_Recovery:
2449 if (tcp_is_reno(tp))
2450 tcp_reset_reno_sack(tp);
2451 if (tcp_try_undo_recovery(sk))
2452 return;
2453 tcp_complete_cwr(sk);
2454 break;
2455 }
2456 }
2457
2458
2459 switch (icsk->icsk_ca_state) {
2460 case TCP_CA_Recovery:
2461 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
2462 if (tcp_is_reno(tp) && is_dupack)
2463 tcp_add_reno_sack(sk);
2464 } else
2465 do_lost = tcp_try_undo_partial(sk, pkts_acked);
2466 break;
2467 case TCP_CA_Loss:
2468 if (flag&FLAG_DATA_ACKED)
2469 icsk->icsk_retransmits = 0;
2470 if (!tcp_try_undo_loss(sk)) {
2471 tcp_moderate_cwnd(tp);
2472 tcp_xmit_retransmit_queue(sk);
2473 return;
2474 }
2475 if (icsk->icsk_ca_state != TCP_CA_Open)
2476 return;
2477
2478 default:
2479 if (tcp_is_reno(tp)) {
2480 if (flag & FLAG_SND_UNA_ADVANCED)
2481 tcp_reset_reno_sack(tp);
2482 if (is_dupack)
2483 tcp_add_reno_sack(sk);
2484 }
2485
2486 if (icsk->icsk_ca_state == TCP_CA_Disorder)
2487 tcp_try_undo_dsack(sk);
2488
2489 if (!tcp_time_to_recover(sk)) {
2490 tcp_try_to_open(sk, flag);
2491 return;
2492 }
2493
2494
2495 if (icsk->icsk_ca_state < TCP_CA_CWR &&
2496 icsk->icsk_mtup.probe_size &&
2497 tp->snd_una == tp->mtu_probe.probe_seq_start) {
2498 tcp_mtup_probe_failed(sk);
2499
2500 tp->snd_cwnd++;
2501 tcp_simple_retransmit(sk);
2502 return;
2503 }
2504
2505
2506
2507 if (tcp_is_reno(tp))
2508 NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERY);
2509 else
2510 NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERY);
2511
2512 tp->high_seq = tp->snd_nxt;
2513 tp->prior_ssthresh = 0;
2514 tp->undo_marker = tp->snd_una;
2515 tp->undo_retrans = tp->retrans_out;
2516
2517 if (icsk->icsk_ca_state < TCP_CA_CWR) {
2518 if (!(flag&FLAG_ECE))
2519 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2520 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2521 TCP_ECN_queue_cwr(tp);
2522 }
2523
2524 tp->bytes_acked = 0;
2525 tp->snd_cwnd_cnt = 0;
2526 tcp_set_ca_state(sk, TCP_CA_Recovery);
2527 }
2528
2529 if (do_lost || tcp_head_timedout(sk))
2530 tcp_update_scoreboard(sk);
2531 tcp_cwnd_down(sk, flag);
2532 tcp_xmit_retransmit_queue(sk);
2533}
2534
2535
2536
2537
2538static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
2539{
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555 struct tcp_sock *tp = tcp_sk(sk);
2556 const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
2557 tcp_rtt_estimator(sk, seq_rtt);
2558 tcp_set_rto(sk);
2559 inet_csk(sk)->icsk_backoff = 0;
2560 tcp_bound_rto(sk);
2561}
2562
2563static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag)
2564{
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574 if (flag & FLAG_RETRANS_DATA_ACKED)
2575 return;
2576
2577 tcp_rtt_estimator(sk, seq_rtt);
2578 tcp_set_rto(sk);
2579 inet_csk(sk)->icsk_backoff = 0;
2580 tcp_bound_rto(sk);
2581}
2582
2583static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
2584 const s32 seq_rtt)
2585{
2586 const struct tcp_sock *tp = tcp_sk(sk);
2587
2588 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
2589 tcp_ack_saw_tstamp(sk, flag);
2590 else if (seq_rtt >= 0)
2591 tcp_ack_no_tstamp(sk, seq_rtt, flag);
2592}
2593
2594static void tcp_cong_avoid(struct sock *sk, u32 ack,
2595 u32 in_flight, int good)
2596{
2597 const struct inet_connection_sock *icsk = inet_csk(sk);
2598 icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight, good);
2599 tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
2600}
2601
2602
2603
2604
2605static void tcp_rearm_rto(struct sock *sk)
2606{
2607 struct tcp_sock *tp = tcp_sk(sk);
2608
2609 if (!tp->packets_out) {
2610 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
2611 } else {
2612 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
2613 }
2614}
2615
2616
2617static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
2618{
2619 struct tcp_sock *tp = tcp_sk(sk);
2620 u32 packets_acked;
2621
2622 BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
2623
2624 packets_acked = tcp_skb_pcount(skb);
2625 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
2626 return 0;
2627 packets_acked -= tcp_skb_pcount(skb);
2628
2629 if (packets_acked) {
2630 BUG_ON(tcp_skb_pcount(skb) == 0);
2631 BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
2632 }
2633
2634 return packets_acked;
2635}
2636
2637
2638
2639
2640
2641static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
2642 int prior_fackets)
2643{
2644 struct tcp_sock *tp = tcp_sk(sk);
2645 const struct inet_connection_sock *icsk = inet_csk(sk);
2646 struct sk_buff *skb;
2647 u32 now = tcp_time_stamp;
2648 int fully_acked = 1;
2649 int flag = 0;
2650 int prior_packets = tp->packets_out;
2651 u32 cnt = 0;
2652 u32 reord = tp->packets_out;
2653 s32 seq_rtt = -1;
2654 s32 ca_seq_rtt = -1;
2655 ktime_t last_ackt = net_invalid_timestamp();
2656
2657 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
2658 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
2659 u32 end_seq;
2660 u32 packets_acked;
2661 u8 sacked = scb->sacked;
2662
2663
2664 if (after(scb->end_seq, tp->snd_una)) {
2665 if (tcp_skb_pcount(skb) == 1 ||
2666 !after(tp->snd_una, scb->seq))
2667 break;
2668
2669 packets_acked = tcp_tso_acked(sk, skb);
2670 if (!packets_acked)
2671 break;
2672
2673 fully_acked = 0;
2674 end_seq = tp->snd_una;
2675 } else {
2676 packets_acked = tcp_skb_pcount(skb);
2677 end_seq = scb->end_seq;
2678 }
2679
2680
2681 if (fully_acked && icsk->icsk_mtup.probe_size &&
2682 !after(tp->mtu_probe.probe_seq_end, scb->end_seq)) {
2683 tcp_mtup_probe_success(sk, skb);
2684 }
2685
2686 if (sacked) {
2687 if (sacked & TCPCB_RETRANS) {
2688 if (sacked & TCPCB_SACKED_RETRANS)
2689 tp->retrans_out -= packets_acked;
2690 flag |= FLAG_RETRANS_DATA_ACKED;
2691 ca_seq_rtt = -1;
2692 seq_rtt = -1;
2693 if ((flag & FLAG_DATA_ACKED) ||
2694 (packets_acked > 1))
2695 flag |= FLAG_NONHEAD_RETRANS_ACKED;
2696 } else {
2697 ca_seq_rtt = now - scb->when;
2698 last_ackt = skb->tstamp;
2699 if (seq_rtt < 0) {
2700 seq_rtt = ca_seq_rtt;
2701 }
2702 if (!(sacked & TCPCB_SACKED_ACKED))
2703 reord = min(cnt, reord);
2704 }
2705
2706 if (sacked & TCPCB_SACKED_ACKED)
2707 tp->sacked_out -= packets_acked;
2708 if (sacked & TCPCB_LOST)
2709 tp->lost_out -= packets_acked;
2710
2711 if ((sacked & TCPCB_URG) && tp->urg_mode &&
2712 !before(end_seq, tp->snd_up))
2713 tp->urg_mode = 0;
2714 } else {
2715 ca_seq_rtt = now - scb->when;
2716 last_ackt = skb->tstamp;
2717 if (seq_rtt < 0) {
2718 seq_rtt = ca_seq_rtt;
2719 }
2720 reord = min(cnt, reord);
2721 }
2722 tp->packets_out -= packets_acked;
2723 cnt += packets_acked;
2724
2725
2726
2727
2728
2729
2730
2731
2732 if (!(scb->flags & TCPCB_FLAG_SYN)) {
2733 flag |= FLAG_DATA_ACKED;
2734 } else {
2735 flag |= FLAG_SYN_ACKED;
2736 tp->retrans_stamp = 0;
2737 }
2738
2739 if (!fully_acked)
2740 break;
2741
2742 tcp_unlink_write_queue(skb, sk);
2743 sk_stream_free_skb(sk, skb);
2744 tcp_clear_all_retrans_hints(tp);
2745 }
2746
2747 if (flag & FLAG_ACKED) {
2748 u32 pkts_acked = prior_packets - tp->packets_out;
2749 const struct tcp_congestion_ops *ca_ops
2750 = inet_csk(sk)->icsk_ca_ops;
2751
2752 tcp_ack_update_rtt(sk, flag, seq_rtt);
2753 tcp_rearm_rto(sk);
2754
2755 if (tcp_is_reno(tp)) {
2756 tcp_remove_reno_sacks(sk, pkts_acked);
2757 } else {
2758
2759 if (reord < prior_fackets)
2760 tcp_update_reordering(sk, tp->fackets_out - reord, 0);
2761 }
2762
2763 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
2764
2765 tp->fastpath_cnt_hint -= min_t(u32, pkts_acked,
2766 tp->fastpath_cnt_hint);
2767 if (ca_ops->pkts_acked) {
2768 s32 rtt_us = -1;
2769
2770
2771 if (!(flag & FLAG_RETRANS_DATA_ACKED)) {
2772
2773 if (ca_ops->flags & TCP_CONG_RTT_STAMP &&
2774 !ktime_equal(last_ackt,
2775 net_invalid_timestamp()))
2776 rtt_us = ktime_us_delta(ktime_get_real(),
2777 last_ackt);
2778 else if (ca_seq_rtt > 0)
2779 rtt_us = jiffies_to_usecs(ca_seq_rtt);
2780 }
2781
2782 ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
2783 }
2784 }
2785
2786#if FASTRETRANS_DEBUG > 0
2787 BUG_TRAP((int)tp->sacked_out >= 0);
2788 BUG_TRAP((int)tp->lost_out >= 0);
2789 BUG_TRAP((int)tp->retrans_out >= 0);
2790 if (!tp->packets_out && tcp_is_sack(tp)) {
2791 icsk = inet_csk(sk);
2792 if (tp->lost_out) {
2793 printk(KERN_DEBUG "Leak l=%u %d\n",
2794 tp->lost_out, icsk->icsk_ca_state);
2795 tp->lost_out = 0;
2796 }
2797 if (tp->sacked_out) {
2798 printk(KERN_DEBUG "Leak s=%u %d\n",
2799 tp->sacked_out, icsk->icsk_ca_state);
2800 tp->sacked_out = 0;
2801 }
2802 if (tp->retrans_out) {
2803 printk(KERN_DEBUG "Leak r=%u %d\n",
2804 tp->retrans_out, icsk->icsk_ca_state);
2805 tp->retrans_out = 0;
2806 }
2807 }
2808#endif
2809 *seq_rtt_p = seq_rtt;
2810 return flag;
2811}
2812
2813static void tcp_ack_probe(struct sock *sk)
2814{
2815 const struct tcp_sock *tp = tcp_sk(sk);
2816 struct inet_connection_sock *icsk = inet_csk(sk);
2817
2818
2819
2820 if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq,
2821 tp->snd_una + tp->snd_wnd)) {
2822 icsk->icsk_backoff = 0;
2823 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
2824
2825
2826
2827 } else {
2828 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
2829 min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
2830 TCP_RTO_MAX);
2831 }
2832}
2833
2834static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag)
2835{
2836 return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
2837 inet_csk(sk)->icsk_ca_state != TCP_CA_Open);
2838}
2839
2840static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag)
2841{
2842 const struct tcp_sock *tp = tcp_sk(sk);
2843 return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
2844 !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR));
2845}
2846
2847
2848
2849
2850static inline int tcp_may_update_window(const struct tcp_sock *tp, const u32 ack,
2851 const u32 ack_seq, const u32 nwin)
2852{
2853 return (after(ack, tp->snd_una) ||
2854 after(ack_seq, tp->snd_wl1) ||
2855 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd));
2856}
2857
2858
2859
2860
2861
2862
2863static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack,
2864 u32 ack_seq)
2865{
2866 struct tcp_sock *tp = tcp_sk(sk);
2867 int flag = 0;
2868 u32 nwin = ntohs(tcp_hdr(skb)->window);
2869
2870 if (likely(!tcp_hdr(skb)->syn))
2871 nwin <<= tp->rx_opt.snd_wscale;
2872
2873 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
2874 flag |= FLAG_WIN_UPDATE;
2875 tcp_update_wl(tp, ack, ack_seq);
2876
2877 if (tp->snd_wnd != nwin) {
2878 tp->snd_wnd = nwin;
2879
2880
2881
2882
2883 tp->pred_flags = 0;
2884 tcp_fast_path_check(sk);
2885
2886 if (nwin > tp->max_window) {
2887 tp->max_window = nwin;
2888 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
2889 }
2890 }
2891 }
2892
2893 tp->snd_una = ack;
2894
2895 return flag;
2896}
2897
2898
2899
2900
2901static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
2902{
2903 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
2904 tp->snd_cwnd_cnt = 0;
2905 tp->bytes_acked = 0;
2906 TCP_ECN_queue_cwr(tp);
2907 tcp_moderate_cwnd(tp);
2908}
2909
2910
2911
2912
2913static void tcp_ratehalving_spur_to_response(struct sock *sk)
2914{
2915 tcp_enter_cwr(sk, 0);
2916}
2917
2918static void tcp_undo_spur_to_response(struct sock *sk, int flag)
2919{
2920 if (flag&FLAG_ECE)
2921 tcp_ratehalving_spur_to_response(sk);
2922 else
2923 tcp_undo_cwr(sk, 1);
2924}
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956static int tcp_process_frto(struct sock *sk, int flag)
2957{
2958 struct tcp_sock *tp = tcp_sk(sk);
2959
2960 tcp_verify_left_out(tp);
2961
2962
2963 if (flag&FLAG_DATA_ACKED)
2964 inet_csk(sk)->icsk_retransmits = 0;
2965
2966 if ((flag & FLAG_NONHEAD_RETRANS_ACKED) ||
2967 ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED)))
2968 tp->undo_marker = 0;
2969
2970 if (!before(tp->snd_una, tp->frto_highmark)) {
2971 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
2972 return 1;
2973 }
2974
2975 if (!IsSackFrto() || tcp_is_reno(tp)) {
2976
2977
2978
2979
2980 if (!(flag&FLAG_ANY_PROGRESS) && (flag&FLAG_NOT_DUP))
2981 return 1;
2982
2983 if (!(flag&FLAG_DATA_ACKED)) {
2984 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
2985 flag);
2986 return 1;
2987 }
2988 } else {
2989 if (!(flag&FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
2990
2991 tp->snd_cwnd = min(tp->snd_cwnd,
2992 tcp_packets_in_flight(tp));
2993 return 1;
2994 }
2995
2996 if ((tp->frto_counter >= 2) &&
2997 (!(flag&FLAG_FORWARD_PROGRESS) ||
2998 ((flag&FLAG_DATA_SACKED) && !(flag&FLAG_ONLY_ORIG_SACKED)))) {
2999
3000 if (!(flag&FLAG_FORWARD_PROGRESS) && (flag&FLAG_NOT_DUP))
3001 return 1;
3002
3003 tcp_enter_frto_loss(sk, 3, flag);
3004 return 1;
3005 }
3006 }
3007
3008 if (tp->frto_counter == 1) {
3009
3010 tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
3011 tp->frto_counter = 2;
3012
3013 if (!tcp_may_send_now(sk))
3014 tcp_enter_frto_loss(sk, 2, flag);
3015
3016 return 1;
3017 } else {
3018 switch (sysctl_tcp_frto_response) {
3019 case 2:
3020 tcp_undo_spur_to_response(sk, flag);
3021 break;
3022 case 1:
3023 tcp_conservative_spur_to_response(tp);
3024 break;
3025 default:
3026 tcp_ratehalving_spur_to_response(sk);
3027 break;
3028 }
3029 tp->frto_counter = 0;
3030 tp->undo_marker = 0;
3031 NET_INC_STATS_BH(LINUX_MIB_TCPSPURIOUSRTOS);
3032 }
3033 return 0;
3034}
3035
3036
3037static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3038{
3039 struct inet_connection_sock *icsk = inet_csk(sk);
3040 struct tcp_sock *tp = tcp_sk(sk);
3041 u32 prior_snd_una = tp->snd_una;
3042 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3043 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3044 u32 prior_in_flight;
3045 u32 prior_fackets;
3046 s32 seq_rtt;
3047 int prior_packets;
3048 int frto_cwnd = 0;
3049
3050
3051
3052
3053 if (after(ack, tp->snd_nxt))
3054 goto uninteresting_ack;
3055
3056 if (before(ack, prior_snd_una))
3057 goto old_ack;
3058
3059 if (after(ack, prior_snd_una))
3060 flag |= FLAG_SND_UNA_ADVANCED;
3061
3062 if (sysctl_tcp_abc) {
3063 if (icsk->icsk_ca_state < TCP_CA_CWR)
3064 tp->bytes_acked += ack - prior_snd_una;
3065 else if (icsk->icsk_ca_state == TCP_CA_Loss)
3066
3067 tp->bytes_acked += min(ack - prior_snd_una, tp->mss_cache);
3068 }
3069
3070 prior_fackets = tp->fackets_out;
3071 prior_in_flight = tcp_packets_in_flight(tp);
3072
3073 if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
3074
3075
3076
3077
3078 tcp_update_wl(tp, ack, ack_seq);
3079 tp->snd_una = ack;
3080 flag |= FLAG_WIN_UPDATE;
3081
3082 tcp_ca_event(sk, CA_EVENT_FAST_ACK);
3083
3084 NET_INC_STATS_BH(LINUX_MIB_TCPHPACKS);
3085 } else {
3086 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3087 flag |= FLAG_DATA;
3088 else
3089 NET_INC_STATS_BH(LINUX_MIB_TCPPUREACKS);
3090
3091 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3092
3093 if (TCP_SKB_CB(skb)->sacked)
3094 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3095
3096 if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
3097 flag |= FLAG_ECE;
3098
3099 tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
3100 }
3101
3102
3103
3104
3105 sk->sk_err_soft = 0;
3106 tp->rcv_tstamp = tcp_time_stamp;
3107 prior_packets = tp->packets_out;
3108 if (!prior_packets)
3109 goto no_queue;
3110
3111
3112 flag |= tcp_clean_rtx_queue(sk, &seq_rtt, prior_fackets);
3113
3114 if (tp->frto_counter)
3115 frto_cwnd = tcp_process_frto(sk, flag);
3116
3117 if (before(tp->frto_highmark, tp->snd_una))
3118 tp->frto_highmark = 0;
3119
3120 if (tcp_ack_is_dubious(sk, flag)) {
3121
3122 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
3123 tcp_may_raise_cwnd(sk, flag))
3124 tcp_cong_avoid(sk, ack, prior_in_flight, 0);
3125 tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, flag);
3126 } else {
3127 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
3128 tcp_cong_avoid(sk, ack, prior_in_flight, 1);
3129 }
3130
3131 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP))
3132 dst_confirm(sk->sk_dst_cache);
3133
3134 return 1;
3135
3136no_queue:
3137 icsk->icsk_probes_out = 0;
3138
3139
3140
3141
3142
3143 if (tcp_send_head(sk))
3144 tcp_ack_probe(sk);
3145 return 1;
3146
3147old_ack:
3148 if (TCP_SKB_CB(skb)->sacked)
3149 tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3150
3151uninteresting_ack:
3152 SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3153 return 0;
3154}
3155
3156
3157
3158
3159
3160
3161void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab)
3162{
3163 unsigned char *ptr;
3164 struct tcphdr *th = tcp_hdr(skb);
3165 int length=(th->doff*4)-sizeof(struct tcphdr);
3166
3167 ptr = (unsigned char *)(th + 1);
3168 opt_rx->saw_tstamp = 0;
3169
3170 while (length > 0) {
3171 int opcode=*ptr++;
3172 int opsize;
3173
3174 switch (opcode) {
3175 case TCPOPT_EOL:
3176 return;
3177 case TCPOPT_NOP:
3178 length--;
3179 continue;
3180 default:
3181 opsize=*ptr++;
3182 if (opsize < 2)
3183 return;
3184 if (opsize > length)
3185 return;
3186 switch (opcode) {
3187 case TCPOPT_MSS:
3188 if (opsize==TCPOLEN_MSS && th->syn && !estab) {
3189 u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
3190 if (in_mss) {
3191 if (opt_rx->user_mss && opt_rx->user_mss < in_mss)
3192 in_mss = opt_rx->user_mss;
3193 opt_rx->mss_clamp = in_mss;
3194 }
3195 }
3196 break;
3197 case TCPOPT_WINDOW:
3198 if (opsize==TCPOLEN_WINDOW && th->syn && !estab)
3199 if (sysctl_tcp_window_scaling) {
3200 __u8 snd_wscale = *(__u8 *) ptr;
3201 opt_rx->wscale_ok = 1;
3202 if (snd_wscale > 14) {
3203 if (net_ratelimit())
3204 printk(KERN_INFO "tcp_parse_options: Illegal window "
3205 "scaling value %d >14 received.\n",
3206 snd_wscale);
3207 snd_wscale = 14;
3208 }
3209 opt_rx->snd_wscale = snd_wscale;
3210 }
3211 break;
3212 case TCPOPT_TIMESTAMP:
3213 if (opsize==TCPOLEN_TIMESTAMP) {
3214 if ((estab && opt_rx->tstamp_ok) ||
3215 (!estab && sysctl_tcp_timestamps)) {
3216 opt_rx->saw_tstamp = 1;
3217 opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr));
3218 opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4)));
3219 }
3220 }
3221 break;
3222 case TCPOPT_SACK_PERM:
3223 if (opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
3224 if (sysctl_tcp_sack) {
3225 opt_rx->sack_ok = 1;
3226 tcp_sack_reset(opt_rx);
3227 }
3228 }
3229 break;
3230
3231 case TCPOPT_SACK:
3232 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
3233 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
3234 opt_rx->sack_ok) {
3235 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
3236 }
3237 break;
3238#ifdef CONFIG_TCP_MD5SIG
3239 case TCPOPT_MD5SIG:
3240
3241
3242
3243
3244 break;
3245#endif
3246 }
3247
3248 ptr+=opsize-2;
3249 length-=opsize;
3250 }
3251 }
3252}
3253
3254
3255
3256
3257static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
3258 struct tcp_sock *tp)
3259{
3260 if (th->doff == sizeof(struct tcphdr)>>2) {
3261 tp->rx_opt.saw_tstamp = 0;
3262 return 0;
3263 } else if (tp->rx_opt.tstamp_ok &&
3264 th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
3265 __be32 *ptr = (__be32 *)(th + 1);
3266 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
3267 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
3268 tp->rx_opt.saw_tstamp = 1;
3269 ++ptr;
3270 tp->rx_opt.rcv_tsval = ntohl(*ptr);
3271 ++ptr;
3272 tp->rx_opt.rcv_tsecr = ntohl(*ptr);
3273 return 1;
3274 }
3275 }
3276 tcp_parse_options(skb, &tp->rx_opt, 1);
3277 return 1;
3278}
3279
3280static inline void tcp_store_ts_recent(struct tcp_sock *tp)
3281{
3282 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
3283 tp->rx_opt.ts_recent_stamp = get_seconds();
3284}
3285
3286static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3287{
3288 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
3289
3290
3291
3292
3293
3294
3295
3296 if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 ||
3297 get_seconds() >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS)
3298 tcp_store_ts_recent(tp);
3299 }
3300}
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
3326{
3327 struct tcp_sock *tp = tcp_sk(sk);
3328 struct tcphdr *th = tcp_hdr(skb);
3329 u32 seq = TCP_SKB_CB(skb)->seq;
3330 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3331
3332 return (
3333 (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
3334
3335
3336 ack == tp->snd_una &&
3337
3338
3339 !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
3340
3341
3342 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
3343}
3344
3345static inline int tcp_paws_discard(const struct sock *sk, const struct sk_buff *skb)
3346{
3347 const struct tcp_sock *tp = tcp_sk(sk);
3348 return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW &&
3349 get_seconds() < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS &&
3350 !tcp_disordered_ack(sk, skb));
3351}
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366static inline int tcp_sequence(struct tcp_sock *tp, u32 seq, u32 end_seq)
3367{
3368 return !before(end_seq, tp->rcv_wup) &&
3369 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
3370}
3371
3372
3373static void tcp_reset(struct sock *sk)
3374{
3375
3376 switch (sk->sk_state) {
3377 case TCP_SYN_SENT:
3378 sk->sk_err = ECONNREFUSED;
3379 break;
3380 case TCP_CLOSE_WAIT:
3381 sk->sk_err = EPIPE;
3382 break;
3383 case TCP_CLOSE:
3384 return;
3385 default:
3386 sk->sk_err = ECONNRESET;
3387 }
3388
3389 if (!sock_flag(sk, SOCK_DEAD))
3390 sk->sk_error_report(sk);
3391
3392 tcp_done(sk);
3393}
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
3410{
3411 struct tcp_sock *tp = tcp_sk(sk);
3412
3413 inet_csk_schedule_ack(sk);
3414
3415 sk->sk_shutdown |= RCV_SHUTDOWN;
3416 sock_set_flag(sk, SOCK_DONE);
3417
3418 switch (sk->sk_state) {
3419 case TCP_SYN_RECV:
3420 case TCP_ESTABLISHED:
3421
3422 tcp_set_state(sk, TCP_CLOSE_WAIT);
3423 inet_csk(sk)->icsk_ack.pingpong = 1;
3424 break;
3425
3426 case TCP_CLOSE_WAIT:
3427 case TCP_CLOSING:
3428
3429
3430
3431 break;
3432 case TCP_LAST_ACK:
3433
3434 break;
3435
3436 case TCP_FIN_WAIT1:
3437
3438
3439
3440
3441 tcp_send_ack(sk);
3442 tcp_set_state(sk, TCP_CLOSING);
3443 break;
3444 case TCP_FIN_WAIT2:
3445
3446 tcp_send_ack(sk);
3447 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
3448 break;
3449 default:
3450
3451
3452
3453 printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
3454 __FUNCTION__, sk->sk_state);
3455 break;
3456 }
3457
3458
3459
3460
3461 __skb_queue_purge(&tp->out_of_order_queue);
3462 if (tcp_is_sack(tp))
3463 tcp_sack_reset(&tp->rx_opt);
3464 sk_stream_mem_reclaim(sk);
3465
3466 if (!sock_flag(sk, SOCK_DEAD)) {
3467 sk->sk_state_change(sk);
3468
3469
3470 if (sk->sk_shutdown == SHUTDOWN_MASK ||
3471 sk->sk_state == TCP_CLOSE)
3472 sk_wake_async(sk, 1, POLL_HUP);
3473 else
3474 sk_wake_async(sk, 1, POLL_IN);
3475 }
3476}
3477
3478static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
3479{
3480 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
3481 if (before(seq, sp->start_seq))
3482 sp->start_seq = seq;
3483 if (after(end_seq, sp->end_seq))
3484 sp->end_seq = end_seq;
3485 return 1;
3486 }
3487 return 0;
3488}
3489
3490static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
3491{
3492 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
3493 if (before(seq, tp->rcv_nxt))
3494 NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOLDSENT);
3495 else
3496 NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFOSENT);
3497
3498 tp->rx_opt.dsack = 1;
3499 tp->duplicate_sack[0].start_seq = seq;
3500 tp->duplicate_sack[0].end_seq = end_seq;
3501 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + 1, 4 - tp->rx_opt.tstamp_ok);
3502 }
3503}
3504
3505static void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq)
3506{
3507 if (!tp->rx_opt.dsack)
3508 tcp_dsack_set(tp, seq, end_seq);
3509 else
3510 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
3511}
3512
3513static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
3514{
3515 struct tcp_sock *tp = tcp_sk(sk);
3516
3517 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
3518 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
3519 NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST);
3520 tcp_enter_quickack_mode(sk);
3521
3522 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
3523 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
3524
3525 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
3526 end_seq = tp->rcv_nxt;
3527 tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, end_seq);
3528 }
3529 }
3530
3531 tcp_send_ack(sk);
3532}
3533
3534
3535
3536
3537static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
3538{
3539 int this_sack;
3540 struct tcp_sack_block *sp = &tp->selective_acks[0];
3541 struct tcp_sack_block *swalk = sp+1;
3542
3543
3544
3545
3546 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks; ) {
3547 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
3548 int i;
3549
3550
3551
3552
3553 tp->rx_opt.num_sacks--;
3554 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
3555 for (i=this_sack; i < tp->rx_opt.num_sacks; i++)
3556 sp[i] = sp[i+1];
3557 continue;
3558 }
3559 this_sack++, swalk++;
3560 }
3561}
3562
3563static inline void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2)
3564{
3565 __u32 tmp;
3566
3567 tmp = sack1->start_seq;
3568 sack1->start_seq = sack2->start_seq;
3569 sack2->start_seq = tmp;
3570
3571 tmp = sack1->end_seq;
3572 sack1->end_seq = sack2->end_seq;
3573 sack2->end_seq = tmp;
3574}
3575
3576static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
3577{
3578 struct tcp_sock *tp = tcp_sk(sk);
3579 struct tcp_sack_block *sp = &tp->selective_acks[0];
3580 int cur_sacks = tp->rx_opt.num_sacks;
3581 int this_sack;
3582
3583 if (!cur_sacks)
3584 goto new_sack;
3585
3586 for (this_sack=0; this_sack<cur_sacks; this_sack++, sp++) {
3587 if (tcp_sack_extend(sp, seq, end_seq)) {
3588
3589 for (; this_sack>0; this_sack--, sp--)
3590 tcp_sack_swap(sp, sp-1);
3591 if (cur_sacks > 1)
3592 tcp_sack_maybe_coalesce(tp);
3593 return;
3594 }
3595 }
3596
3597
3598
3599
3600
3601
3602
3603 if (this_sack >= 4) {
3604 this_sack--;
3605 tp->rx_opt.num_sacks--;
3606 sp--;
3607 }
3608 for (; this_sack > 0; this_sack--, sp--)
3609 *sp = *(sp-1);
3610
3611new_sack:
3612
3613 sp->start_seq = seq;
3614 sp->end_seq = end_seq;
3615 tp->rx_opt.num_sacks++;
3616 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
3617}
3618
3619
3620
3621static void tcp_sack_remove(struct tcp_sock *tp)
3622{
3623 struct tcp_sack_block *sp = &tp->selective_acks[0];
3624 int num_sacks = tp->rx_opt.num_sacks;
3625 int this_sack;
3626
3627
3628 if (skb_queue_empty(&tp->out_of_order_queue)) {
3629 tp->rx_opt.num_sacks = 0;
3630 tp->rx_opt.eff_sacks = tp->rx_opt.dsack;
3631 return;
3632 }
3633
3634 for (this_sack = 0; this_sack < num_sacks; ) {
3635
3636 if (!before(tp->rcv_nxt, sp->start_seq)) {
3637 int i;
3638
3639
3640 BUG_TRAP(!before(tp->rcv_nxt, sp->end_seq));
3641
3642
3643 for (i=this_sack+1; i < num_sacks; i++)
3644 tp->selective_acks[i-1] = tp->selective_acks[i];
3645 num_sacks--;
3646 continue;
3647 }
3648 this_sack++;
3649 sp++;
3650 }
3651 if (num_sacks != tp->rx_opt.num_sacks) {
3652 tp->rx_opt.num_sacks = num_sacks;
3653 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
3654 }
3655}
3656
3657
3658
3659
3660static void tcp_ofo_queue(struct sock *sk)
3661{
3662 struct tcp_sock *tp = tcp_sk(sk);
3663 __u32 dsack_high = tp->rcv_nxt;
3664 struct sk_buff *skb;
3665
3666 while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
3667 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
3668 break;
3669
3670 if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
3671 __u32 dsack = dsack_high;
3672 if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
3673 dsack_high = TCP_SKB_CB(skb)->end_seq;
3674 tcp_dsack_extend(tp, TCP_SKB_CB(skb)->seq, dsack);
3675 }
3676
3677 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
3678 SOCK_DEBUG(sk, "ofo packet was already received \n");
3679 __skb_unlink(skb, &tp->out_of_order_queue);
3680 __kfree_skb(skb);
3681 continue;
3682 }
3683 SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
3684 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
3685 TCP_SKB_CB(skb)->end_seq);
3686
3687 __skb_unlink(skb, &tp->out_of_order_queue);
3688 __skb_queue_tail(&sk->sk_receive_queue, skb);
3689 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
3690 if (tcp_hdr(skb)->fin)
3691 tcp_fin(skb, sk, tcp_hdr(skb));
3692 }
3693}
3694
3695static int tcp_prune_queue(struct sock *sk);
3696
3697static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
3698{
3699 struct tcphdr *th = tcp_hdr(skb);
3700 struct tcp_sock *tp = tcp_sk(sk);
3701 int eaten = -1;
3702
3703 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
3704 goto drop;
3705
3706 __skb_pull(skb, th->doff*4);
3707
3708 TCP_ECN_accept_cwr(tp, skb);
3709
3710 if (tp->rx_opt.dsack) {
3711 tp->rx_opt.dsack = 0;
3712 tp->rx_opt.eff_sacks = min_t(unsigned int, tp->rx_opt.num_sacks,
3713 4 - tp->rx_opt.tstamp_ok);
3714 }
3715
3716
3717
3718
3719
3720 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
3721 if (tcp_receive_window(tp) == 0)
3722 goto out_of_window;
3723
3724
3725 if (tp->ucopy.task == current &&
3726 tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
3727 sock_owned_by_user(sk) && !tp->urg_data) {
3728 int chunk = min_t(unsigned int, skb->len,
3729 tp->ucopy.len);
3730
3731 __set_current_state(TASK_RUNNING);
3732
3733 local_bh_enable();
3734 if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) {
3735 tp->ucopy.len -= chunk;
3736 tp->copied_seq += chunk;
3737 eaten = (chunk == skb->len && !th->fin);
3738 tcp_rcv_space_adjust(sk);
3739 }
3740 local_bh_disable();
3741 }
3742
3743 if (eaten <= 0) {
3744queue_and_out:
3745 if (eaten < 0 &&
3746 (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
3747 !sk_stream_rmem_schedule(sk, skb))) {
3748 if (tcp_prune_queue(sk) < 0 ||
3749 !sk_stream_rmem_schedule(sk, skb))
3750 goto drop;
3751 }
3752 sk_stream_set_owner_r(skb, sk);
3753 __skb_queue_tail(&sk->sk_receive_queue, skb);
3754 }
3755 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
3756 if (skb->len)
3757 tcp_event_data_recv(sk, skb);
3758 if (th->fin)
3759 tcp_fin(skb, sk, th);
3760
3761 if (!skb_queue_empty(&tp->out_of_order_queue)) {
3762 tcp_ofo_queue(sk);
3763
3764
3765
3766
3767 if (skb_queue_empty(&tp->out_of_order_queue))
3768 inet_csk(sk)->icsk_ack.pingpong = 0;
3769 }
3770
3771 if (tp->rx_opt.num_sacks)
3772 tcp_sack_remove(tp);
3773
3774 tcp_fast_path_check(sk);
3775
3776 if (eaten > 0)
3777 __kfree_skb(skb);
3778 else if (!sock_flag(sk, SOCK_DEAD))
3779 sk->sk_data_ready(sk, 0);
3780 return;
3781 }
3782
3783 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
3784
3785 NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST);
3786 tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
3787
3788out_of_window:
3789 tcp_enter_quickack_mode(sk);
3790 inet_csk_schedule_ack(sk);
3791drop:
3792 __kfree_skb(skb);
3793 return;
3794 }
3795
3796
3797 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
3798 goto out_of_window;
3799
3800 tcp_enter_quickack_mode(sk);
3801
3802 if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
3803
3804 SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
3805 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
3806 TCP_SKB_CB(skb)->end_seq);
3807
3808 tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
3809
3810
3811
3812
3813 if (!tcp_receive_window(tp))
3814 goto out_of_window;
3815 goto queue_and_out;
3816 }
3817
3818 TCP_ECN_check_ce(tp, skb);
3819
3820 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
3821 !sk_stream_rmem_schedule(sk, skb)) {
3822 if (tcp_prune_queue(sk) < 0 ||
3823 !sk_stream_rmem_schedule(sk, skb))
3824 goto drop;
3825 }
3826
3827
3828 tp->pred_flags = 0;
3829 inet_csk_schedule_ack(sk);
3830
3831 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
3832 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
3833
3834 sk_stream_set_owner_r(skb, sk);
3835
3836 if (!skb_peek(&tp->out_of_order_queue)) {
3837
3838 if (tcp_is_sack(tp)) {
3839 tp->rx_opt.num_sacks = 1;
3840 tp->rx_opt.dsack = 0;
3841 tp->rx_opt.eff_sacks = 1;
3842 tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
3843 tp->selective_acks[0].end_seq =
3844 TCP_SKB_CB(skb)->end_seq;
3845 }
3846 __skb_queue_head(&tp->out_of_order_queue,skb);
3847 } else {
3848 struct sk_buff *skb1 = tp->out_of_order_queue.prev;
3849 u32 seq = TCP_SKB_CB(skb)->seq;
3850 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
3851
3852 if (seq == TCP_SKB_CB(skb1)->end_seq) {
3853 __skb_append(skb1, skb, &tp->out_of_order_queue);
3854
3855 if (!tp->rx_opt.num_sacks ||
3856 tp->selective_acks[0].end_seq != seq)
3857 goto add_sack;
3858
3859
3860 tp->selective_acks[0].end_seq = end_seq;
3861 return;
3862 }
3863
3864
3865 do {
3866 if (!after(TCP_SKB_CB(skb1)->seq, seq))
3867 break;
3868 } while ((skb1 = skb1->prev) !=
3869 (struct sk_buff*)&tp->out_of_order_queue);
3870
3871
3872 if (skb1 != (struct sk_buff*)&tp->out_of_order_queue &&
3873 before(seq, TCP_SKB_CB(skb1)->end_seq)) {
3874 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
3875
3876 __kfree_skb(skb);
3877 tcp_dsack_set(tp, seq, end_seq);
3878 goto add_sack;
3879 }
3880 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
3881
3882 tcp_dsack_set(tp, seq, TCP_SKB_CB(skb1)->end_seq);
3883 } else {
3884 skb1 = skb1->prev;
3885 }
3886 }
3887 __skb_insert(skb, skb1, skb1->next, &tp->out_of_order_queue);
3888
3889
3890 while ((skb1 = skb->next) !=
3891 (struct sk_buff*)&tp->out_of_order_queue &&
3892 after(end_seq, TCP_SKB_CB(skb1)->seq)) {
3893 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
3894 tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, end_seq);
3895 break;
3896 }
3897 __skb_unlink(skb1, &tp->out_of_order_queue);
3898 tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq);
3899 __kfree_skb(skb1);
3900 }
3901
3902add_sack:
3903 if (tcp_is_sack(tp))
3904 tcp_sack_new_ofo_skb(sk, seq, end_seq);
3905 }
3906}
3907
3908
3909
3910
3911
3912
3913static void
3914tcp_collapse(struct sock *sk, struct sk_buff_head *list,
3915 struct sk_buff *head, struct sk_buff *tail,
3916 u32 start, u32 end)
3917{
3918 struct sk_buff *skb;
3919
3920
3921
3922 for (skb = head; skb != tail; ) {
3923
3924 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
3925 struct sk_buff *next = skb->next;
3926 __skb_unlink(skb, list);
3927 __kfree_skb(skb);
3928 NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
3929 skb = next;
3930 continue;
3931 }
3932
3933
3934
3935
3936
3937
3938 if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin &&
3939 (tcp_win_from_space(skb->truesize) > skb->len ||
3940 before(TCP_SKB_CB(skb)->seq, start) ||
3941 (skb->next != tail &&
3942 TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb->next)->seq)))
3943 break;
3944
3945
3946 start = TCP_SKB_CB(skb)->end_seq;
3947 skb = skb->next;
3948 }
3949 if (skb == tail || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin)
3950 return;
3951
3952 while (before(start, end)) {
3953 struct sk_buff *nskb;
3954 unsigned int header = skb_headroom(skb);
3955 int copy = SKB_MAX_ORDER(header, 0);
3956
3957
3958 if (copy < 0)
3959 return;
3960 if (end-start < copy)
3961 copy = end-start;
3962 nskb = alloc_skb(copy+header, GFP_ATOMIC);
3963 if (!nskb)
3964 return;
3965
3966 skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
3967 skb_set_network_header(nskb, (skb_network_header(skb) -
3968 skb->head));
3969 skb_set_transport_header(nskb, (skb_transport_header(skb) -
3970 skb->head));
3971 skb_reserve(nskb, header);
3972 memcpy(nskb->head, skb->head, header);
3973 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
3974 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
3975 __skb_insert(nskb, skb->prev, skb, list);
3976 sk_stream_set_owner_r(nskb, sk);
3977
3978
3979 while (copy > 0) {
3980 int offset = start - TCP_SKB_CB(skb)->seq;
3981 int size = TCP_SKB_CB(skb)->end_seq - start;
3982
3983 BUG_ON(offset < 0);
3984 if (size > 0) {
3985 size = min(copy, size);
3986 if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
3987 BUG();
3988 TCP_SKB_CB(nskb)->end_seq += size;
3989 copy -= size;
3990 start += size;
3991 }
3992 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
3993 struct sk_buff *next = skb->next;
3994 __skb_unlink(skb, list);
3995 __kfree_skb(skb);
3996 NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
3997 skb = next;
3998 if (skb == tail ||
3999 tcp_hdr(skb)->syn ||
4000 tcp_hdr(skb)->fin)
4001 return;
4002 }
4003 }
4004 }
4005}
4006
4007
4008
4009
4010static void tcp_collapse_ofo_queue(struct sock *sk)
4011{
4012 struct tcp_sock *tp = tcp_sk(sk);
4013 struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
4014 struct sk_buff *head;
4015 u32 start, end;
4016
4017 if (skb == NULL)
4018 return;
4019
4020 start = TCP_SKB_CB(skb)->seq;
4021 end = TCP_SKB_CB(skb)->end_seq;
4022 head = skb;
4023
4024 for (;;) {
4025 skb = skb->next;
4026
4027
4028
4029 if (skb == (struct sk_buff *)&tp->out_of_order_queue ||
4030 after(TCP_SKB_CB(skb)->seq, end) ||
4031 before(TCP_SKB_CB(skb)->end_seq, start)) {
4032 tcp_collapse(sk, &tp->out_of_order_queue,
4033 head, skb, start, end);
4034 head = skb;
4035 if (skb == (struct sk_buff *)&tp->out_of_order_queue)
4036 break;
4037
4038 start = TCP_SKB_CB(skb)->seq;
4039 end = TCP_SKB_CB(skb)->end_seq;
4040 } else {
4041 if (before(TCP_SKB_CB(skb)->seq, start))
4042 start = TCP_SKB_CB(skb)->seq;
4043 if (after(TCP_SKB_CB(skb)->end_seq, end))
4044 end = TCP_SKB_CB(skb)->end_seq;
4045 }
4046 }
4047}
4048
4049
4050
4051
4052
4053
4054
4055
4056static int tcp_prune_queue(struct sock *sk)
4057{
4058 struct tcp_sock *tp = tcp_sk(sk);
4059
4060 SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
4061
4062 NET_INC_STATS_BH(LINUX_MIB_PRUNECALLED);
4063
4064 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
4065 tcp_clamp_window(sk);
4066 else if (tcp_memory_pressure)
4067 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
4068
4069 tcp_collapse_ofo_queue(sk);
4070 tcp_collapse(sk, &sk->sk_receive_queue,
4071 sk->sk_receive_queue.next,
4072 (struct sk_buff*)&sk->sk_receive_queue,
4073 tp->copied_seq, tp->rcv_nxt);
4074 sk_stream_mem_reclaim(sk);
4075
4076 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4077 return 0;
4078
4079
4080
4081
4082
4083 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4084 NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED);
4085 __skb_queue_purge(&tp->out_of_order_queue);
4086
4087
4088
4089
4090
4091
4092 if (tcp_is_sack(tp))
4093 tcp_sack_reset(&tp->rx_opt);
4094 sk_stream_mem_reclaim(sk);
4095 }
4096
4097 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4098 return 0;
4099
4100
4101
4102
4103
4104 NET_INC_STATS_BH(LINUX_MIB_RCVPRUNED);
4105
4106
4107 tp->pred_flags = 0;
4108 return -1;
4109}
4110
4111
4112
4113
4114
4115
4116void tcp_cwnd_application_limited(struct sock *sk)
4117{
4118 struct tcp_sock *tp = tcp_sk(sk);
4119
4120 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
4121 sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
4122
4123 u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
4124 u32 win_used = max(tp->snd_cwnd_used, init_win);
4125 if (win_used < tp->snd_cwnd) {
4126 tp->snd_ssthresh = tcp_current_ssthresh(sk);
4127 tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
4128 }
4129 tp->snd_cwnd_used = 0;
4130 }
4131 tp->snd_cwnd_stamp = tcp_time_stamp;
4132}
4133
4134static int tcp_should_expand_sndbuf(struct sock *sk)
4135{
4136 struct tcp_sock *tp = tcp_sk(sk);
4137
4138
4139
4140
4141 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
4142 return 0;
4143
4144
4145 if (tcp_memory_pressure)
4146 return 0;
4147
4148
4149 if (atomic_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
4150 return 0;
4151
4152
4153 if (tp->packets_out >= tp->snd_cwnd)
4154 return 0;
4155
4156 return 1;
4157}
4158
4159
4160
4161
4162
4163
4164
4165static void tcp_new_space(struct sock *sk)
4166{
4167 struct tcp_sock *tp = tcp_sk(sk);
4168
4169 if (tcp_should_expand_sndbuf(sk)) {
4170 int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
4171 MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
4172 demanded = max_t(unsigned int, tp->snd_cwnd,
4173 tp->reordering + 1);
4174 sndmem *= 2*demanded;
4175 if (sndmem > sk->sk_sndbuf)
4176 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
4177 tp->snd_cwnd_stamp = tcp_time_stamp;
4178 }
4179
4180 sk->sk_write_space(sk);
4181}
4182
4183static void tcp_check_space(struct sock *sk)
4184{
4185 if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
4186 sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
4187 if (sk->sk_socket &&
4188 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
4189 tcp_new_space(sk);
4190 }
4191}
4192
4193static inline void tcp_data_snd_check(struct sock *sk)
4194{
4195 tcp_push_pending_frames(sk);
4196 tcp_check_space(sk);
4197}
4198
4199
4200
4201
4202static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
4203{
4204 struct tcp_sock *tp = tcp_sk(sk);
4205
4206
4207 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss
4208
4209
4210
4211 && __tcp_select_window(sk) >= tp->rcv_wnd) ||
4212
4213 tcp_in_quickack_mode(sk) ||
4214
4215 (ofo_possible &&
4216 skb_peek(&tp->out_of_order_queue))) {
4217
4218 tcp_send_ack(sk);
4219 } else {
4220
4221 tcp_send_delayed_ack(sk);
4222 }
4223}
4224
4225static inline void tcp_ack_snd_check(struct sock *sk)
4226{
4227 if (!inet_csk_ack_scheduled(sk)) {
4228
4229 return;
4230 }
4231 __tcp_ack_snd_check(sk, 1);
4232}
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
4245{
4246 struct tcp_sock *tp = tcp_sk(sk);
4247 u32 ptr = ntohs(th->urg_ptr);
4248
4249 if (ptr && !sysctl_tcp_stdurg)
4250 ptr--;
4251 ptr += ntohl(th->seq);
4252
4253
4254 if (after(tp->copied_seq, ptr))
4255 return;
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267 if (before(ptr, tp->rcv_nxt))
4268 return;
4269
4270
4271 if (tp->urg_data && !after(ptr, tp->urg_seq))
4272 return;
4273
4274
4275 sk_send_sigurg(sk);
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
4293 !sock_flag(sk, SOCK_URGINLINE) &&
4294 tp->copied_seq != tp->rcv_nxt) {
4295 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
4296 tp->copied_seq++;
4297 if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
4298 __skb_unlink(skb, &sk->sk_receive_queue);
4299 __kfree_skb(skb);
4300 }
4301 }
4302
4303 tp->urg_data = TCP_URG_NOTYET;
4304 tp->urg_seq = ptr;
4305
4306
4307 tp->pred_flags = 0;
4308}
4309
4310
4311static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
4312{
4313 struct tcp_sock *tp = tcp_sk(sk);
4314
4315
4316 if (th->urg)
4317 tcp_check_urg(sk,th);
4318
4319
4320 if (tp->urg_data == TCP_URG_NOTYET) {
4321 u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
4322 th->syn;
4323
4324
4325 if (ptr < skb->len) {
4326 u8 tmp;
4327 if (skb_copy_bits(skb, ptr, &tmp, 1))
4328 BUG();
4329 tp->urg_data = TCP_URG_VALID | tmp;
4330 if (!sock_flag(sk, SOCK_DEAD))
4331 sk->sk_data_ready(sk, 0);
4332 }
4333 }
4334}
4335
4336static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
4337{
4338 struct tcp_sock *tp = tcp_sk(sk);
4339 int chunk = skb->len - hlen;
4340 int err;
4341
4342 local_bh_enable();
4343 if (skb_csum_unnecessary(skb))
4344 err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk);
4345 else
4346 err = skb_copy_and_csum_datagram_iovec(skb, hlen,
4347 tp->ucopy.iov);
4348
4349 if (!err) {
4350 tp->ucopy.len -= chunk;
4351 tp->copied_seq += chunk;
4352 tcp_rcv_space_adjust(sk);
4353 }
4354
4355 local_bh_disable();
4356 return err;
4357}
4358
4359static __sum16 __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
4360{
4361 __sum16 result;
4362
4363 if (sock_owned_by_user(sk)) {
4364 local_bh_enable();
4365 result = __tcp_checksum_complete(skb);
4366 local_bh_disable();
4367 } else {
4368 result = __tcp_checksum_complete(skb);
4369 }
4370 return result;
4371}
4372
4373static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
4374{
4375 return !skb_csum_unnecessary(skb) &&
4376 __tcp_checksum_complete_user(sk, skb);
4377}
4378
4379#ifdef CONFIG_NET_DMA
4380static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen)
4381{
4382 struct tcp_sock *tp = tcp_sk(sk);
4383 int chunk = skb->len - hlen;
4384 int dma_cookie;
4385 int copied_early = 0;
4386
4387 if (tp->ucopy.wakeup)
4388 return 0;
4389
4390 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
4391 tp->ucopy.dma_chan = get_softnet_dma();
4392
4393 if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
4394
4395 dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
4396 skb, hlen, tp->ucopy.iov, chunk, tp->ucopy.pinned_list);
4397
4398 if (dma_cookie < 0)
4399 goto out;
4400
4401 tp->ucopy.dma_cookie = dma_cookie;
4402 copied_early = 1;
4403
4404 tp->ucopy.len -= chunk;
4405 tp->copied_seq += chunk;
4406 tcp_rcv_space_adjust(sk);
4407
4408 if ((tp->ucopy.len == 0) ||
4409 (tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) ||
4410 (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) {
4411 tp->ucopy.wakeup = 1;
4412 sk->sk_data_ready(sk, 0);
4413 }
4414 } else if (chunk > 0) {
4415 tp->ucopy.wakeup = 1;
4416 sk->sk_data_ready(sk, 0);
4417 }
4418out:
4419 return copied_early;
4420}
4421#endif
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
4447 struct tcphdr *th, unsigned len)
4448{
4449 struct tcp_sock *tp = tcp_sk(sk);
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466 tp->rx_opt.saw_tstamp = 0;
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
4478 TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
4479 int tcp_header_len = tp->tcp_header_len;
4480
4481
4482
4483
4484
4485
4486
4487 if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
4488 __be32 *ptr = (__be32 *)(th + 1);
4489
4490
4491 if (*ptr != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
4492 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP))
4493 goto slow_path;
4494
4495 tp->rx_opt.saw_tstamp = 1;
4496 ++ptr;
4497 tp->rx_opt.rcv_tsval = ntohl(*ptr);
4498 ++ptr;
4499 tp->rx_opt.rcv_tsecr = ntohl(*ptr);
4500
4501
4502 if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
4503 goto slow_path;
4504
4505
4506
4507
4508
4509
4510 }
4511
4512 if (len <= tcp_header_len) {
4513
4514 if (len == tcp_header_len) {
4515
4516
4517
4518
4519 if (tcp_header_len ==
4520 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
4521 tp->rcv_nxt == tp->rcv_wup)
4522 tcp_store_ts_recent(tp);
4523
4524
4525
4526
4527 tcp_ack(sk, skb, 0);
4528 __kfree_skb(skb);
4529 tcp_data_snd_check(sk);
4530 return 0;
4531 } else {
4532 TCP_INC_STATS_BH(TCP_MIB_INERRS);
4533 goto discard;
4534 }
4535 } else {
4536 int eaten = 0;
4537 int copied_early = 0;
4538
4539 if (tp->copied_seq == tp->rcv_nxt &&
4540 len - tcp_header_len <= tp->ucopy.len) {
4541#ifdef CONFIG_NET_DMA
4542 if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
4543 copied_early = 1;
4544 eaten = 1;
4545 }
4546#endif
4547 if (tp->ucopy.task == current && sock_owned_by_user(sk) && !copied_early) {
4548 __set_current_state(TASK_RUNNING);
4549
4550 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len))
4551 eaten = 1;
4552 }
4553 if (eaten) {
4554
4555
4556
4557
4558 if (tcp_header_len ==
4559 (sizeof(struct tcphdr) +
4560 TCPOLEN_TSTAMP_ALIGNED) &&
4561 tp->rcv_nxt == tp->rcv_wup)
4562 tcp_store_ts_recent(tp);
4563
4564 tcp_rcv_rtt_measure_ts(sk, skb);
4565
4566 __skb_pull(skb, tcp_header_len);
4567 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4568 NET_INC_STATS_BH(LINUX_MIB_TCPHPHITSTOUSER);
4569 }
4570 if (copied_early)
4571 tcp_cleanup_rbuf(sk, skb->len);
4572 }
4573 if (!eaten) {
4574 if (tcp_checksum_complete_user(sk, skb))
4575 goto csum_error;
4576
4577
4578
4579
4580
4581 if (tcp_header_len ==
4582 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
4583 tp->rcv_nxt == tp->rcv_wup)
4584 tcp_store_ts_recent(tp);
4585
4586 tcp_rcv_rtt_measure_ts(sk, skb);
4587
4588 if ((int)skb->truesize > sk->sk_forward_alloc)
4589 goto step5;
4590
4591 NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS);
4592
4593
4594 __skb_pull(skb,tcp_header_len);
4595 __skb_queue_tail(&sk->sk_receive_queue, skb);
4596 sk_stream_set_owner_r(skb, sk);
4597 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4598 }
4599
4600 tcp_event_data_recv(sk, skb);
4601
4602 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
4603
4604 tcp_ack(sk, skb, FLAG_DATA);
4605 tcp_data_snd_check(sk);
4606 if (!inet_csk_ack_scheduled(sk))
4607 goto no_ack;
4608 }
4609
4610 __tcp_ack_snd_check(sk, 0);
4611no_ack:
4612#ifdef CONFIG_NET_DMA
4613 if (copied_early)
4614 __skb_queue_tail(&sk->sk_async_wait_queue, skb);
4615 else
4616#endif
4617 if (eaten)
4618 __kfree_skb(skb);
4619 else
4620 sk->sk_data_ready(sk, 0);
4621 return 0;
4622 }
4623 }
4624
4625slow_path:
4626 if (len < (th->doff<<2) || tcp_checksum_complete_user(sk, skb))
4627 goto csum_error;
4628
4629
4630
4631
4632 if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
4633 tcp_paws_discard(sk, skb)) {
4634 if (!th->rst) {
4635 NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
4636 tcp_send_dupack(sk, skb);
4637 goto discard;
4638 }
4639
4640
4641
4642
4643
4644 }
4645
4646
4647
4648
4649
4650 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
4651
4652
4653
4654
4655
4656
4657 if (!th->rst)
4658 tcp_send_dupack(sk, skb);
4659 goto discard;
4660 }
4661
4662 if (th->rst) {
4663 tcp_reset(sk);
4664 goto discard;
4665 }
4666
4667 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
4668
4669 if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4670 TCP_INC_STATS_BH(TCP_MIB_INERRS);
4671 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN);
4672 tcp_reset(sk);
4673 return 1;
4674 }
4675
4676step5:
4677 if (th->ack)
4678 tcp_ack(sk, skb, FLAG_SLOWPATH);
4679
4680 tcp_rcv_rtt_measure_ts(sk, skb);
4681
4682
4683 tcp_urg(sk, skb, th);
4684
4685
4686 tcp_data_queue(sk, skb);
4687
4688 tcp_data_snd_check(sk);
4689 tcp_ack_snd_check(sk);
4690 return 0;
4691
4692csum_error:
4693 TCP_INC_STATS_BH(TCP_MIB_INERRS);
4694
4695discard:
4696 __kfree_skb(skb);
4697 return 0;
4698}
4699
4700static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
4701 struct tcphdr *th, unsigned len)
4702{
4703 struct tcp_sock *tp = tcp_sk(sk);
4704 struct inet_connection_sock *icsk = inet_csk(sk);
4705 int saved_clamp = tp->rx_opt.mss_clamp;
4706
4707 tcp_parse_options(skb, &tp->rx_opt, 0);
4708
4709 if (th->ack) {
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
4722 goto reset_and_undo;
4723
4724 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
4725 !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
4726 tcp_time_stamp)) {
4727 NET_INC_STATS_BH(LINUX_MIB_PAWSACTIVEREJECTED);
4728 goto reset_and_undo;
4729 }
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739 if (th->rst) {
4740 tcp_reset(sk);
4741 goto discard;
4742 }
4743
4744
4745
4746
4747
4748
4749
4750
4751 if (!th->syn)
4752 goto discard_and_undo;
4753
4754
4755
4756
4757
4758
4759
4760
4761 TCP_ECN_rcv_synack(tp, th);
4762
4763 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
4764 tcp_ack(sk, skb, FLAG_SLOWPATH);
4765
4766
4767
4768
4769 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
4770 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
4771
4772
4773
4774
4775 tp->snd_wnd = ntohs(th->window);
4776 tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq);
4777
4778 if (!tp->rx_opt.wscale_ok) {
4779 tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
4780 tp->window_clamp = min(tp->window_clamp, 65535U);
4781 }
4782
4783 if (tp->rx_opt.saw_tstamp) {
4784 tp->rx_opt.tstamp_ok = 1;
4785 tp->tcp_header_len =
4786 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
4787 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
4788 tcp_store_ts_recent(tp);
4789 } else {
4790 tp->tcp_header_len = sizeof(struct tcphdr);
4791 }
4792
4793 if (tcp_is_sack(tp) && sysctl_tcp_fack)
4794 tcp_enable_fack(tp);
4795
4796 tcp_mtup_init(sk);
4797 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
4798 tcp_initialize_rcv_mss(sk);
4799
4800
4801
4802
4803 tp->copied_seq = tp->rcv_nxt;
4804 smp_mb();
4805 tcp_set_state(sk, TCP_ESTABLISHED);
4806
4807 security_inet_conn_established(sk, skb);
4808
4809
4810 icsk->icsk_af_ops->rebuild_header(sk);
4811
4812 tcp_init_metrics(sk);
4813
4814 tcp_init_congestion_control(sk);
4815
4816
4817
4818
4819 tp->lsndtime = tcp_time_stamp;
4820
4821 tcp_init_buffer_space(sk);
4822
4823 if (sock_flag(sk, SOCK_KEEPOPEN))
4824 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
4825
4826 if (!tp->rx_opt.snd_wscale)
4827 __tcp_fast_path_on(tp, tp->snd_wnd);
4828 else
4829 tp->pred_flags = 0;
4830
4831 if (!sock_flag(sk, SOCK_DEAD)) {
4832 sk->sk_state_change(sk);
4833 sk_wake_async(sk, 0, POLL_OUT);
4834 }
4835
4836 if (sk->sk_write_pending ||
4837 icsk->icsk_accept_queue.rskq_defer_accept ||
4838 icsk->icsk_ack.pingpong) {
4839
4840
4841
4842
4843
4844
4845
4846 inet_csk_schedule_ack(sk);
4847 icsk->icsk_ack.lrcvtime = tcp_time_stamp;
4848 icsk->icsk_ack.ato = TCP_ATO_MIN;
4849 tcp_incr_quickack(sk);
4850 tcp_enter_quickack_mode(sk);
4851 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
4852 TCP_DELACK_MAX, TCP_RTO_MAX);
4853
4854discard:
4855 __kfree_skb(skb);
4856 return 0;
4857 } else {
4858 tcp_send_ack(sk);
4859 }
4860 return -1;
4861 }
4862
4863
4864
4865 if (th->rst) {
4866
4867
4868
4869
4870
4871
4872 goto discard_and_undo;
4873 }
4874
4875
4876 if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp && tcp_paws_check(&tp->rx_opt, 0))
4877 goto discard_and_undo;
4878
4879 if (th->syn) {
4880
4881
4882
4883
4884 tcp_set_state(sk, TCP_SYN_RECV);
4885
4886 if (tp->rx_opt.saw_tstamp) {
4887 tp->rx_opt.tstamp_ok = 1;
4888 tcp_store_ts_recent(tp);
4889 tp->tcp_header_len =
4890 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
4891 } else {
4892 tp->tcp_header_len = sizeof(struct tcphdr);
4893 }
4894
4895 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
4896 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
4897
4898
4899
4900
4901 tp->snd_wnd = ntohs(th->window);
4902 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
4903 tp->max_window = tp->snd_wnd;
4904
4905 TCP_ECN_rcv_syn(tp, th);
4906
4907 tcp_mtup_init(sk);
4908 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
4909 tcp_initialize_rcv_mss(sk);
4910
4911
4912 tcp_send_synack(sk);
4913#if 0
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923 return -1;
4924#else
4925 goto discard;
4926#endif
4927 }
4928
4929
4930
4931
4932discard_and_undo:
4933 tcp_clear_options(&tp->rx_opt);
4934 tp->rx_opt.mss_clamp = saved_clamp;
4935 goto discard;
4936
4937reset_and_undo:
4938 tcp_clear_options(&tp->rx_opt);
4939 tp->rx_opt.mss_clamp = saved_clamp;
4940 return 1;
4941}
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
4952 struct tcphdr *th, unsigned len)
4953{
4954 struct tcp_sock *tp = tcp_sk(sk);
4955 struct inet_connection_sock *icsk = inet_csk(sk);
4956 int queued = 0;
4957
4958 tp->rx_opt.saw_tstamp = 0;
4959
4960 switch (sk->sk_state) {
4961 case TCP_CLOSE:
4962 goto discard;
4963
4964 case TCP_LISTEN:
4965 if (th->ack)
4966 return 1;
4967
4968 if (th->rst)
4969 goto discard;
4970
4971 if (th->syn) {
4972 if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
4973 return 1;
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992 kfree_skb(skb);
4993 return 0;
4994 }
4995 goto discard;
4996
4997 case TCP_SYN_SENT:
4998 queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
4999 if (queued >= 0)
5000 return queued;
5001
5002
5003 tcp_urg(sk, skb, th);
5004 __kfree_skb(skb);
5005 tcp_data_snd_check(sk);
5006 return 0;
5007 }
5008
5009 if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
5010 tcp_paws_discard(sk, skb)) {
5011 if (!th->rst) {
5012 NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
5013 tcp_send_dupack(sk, skb);
5014 goto discard;
5015 }
5016
5017 }
5018
5019
5020 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
5021 if (!th->rst)
5022 tcp_send_dupack(sk, skb);
5023 goto discard;
5024 }
5025
5026
5027 if (th->rst) {
5028 tcp_reset(sk);
5029 goto discard;
5030 }
5031
5032 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
5033
5034
5035
5036
5037
5038
5039
5040 if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
5041 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN);
5042 tcp_reset(sk);
5043 return 1;
5044 }
5045
5046
5047 if (th->ack) {
5048 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH);
5049
5050 switch (sk->sk_state) {
5051 case TCP_SYN_RECV:
5052 if (acceptable) {
5053 tp->copied_seq = tp->rcv_nxt;
5054 smp_mb();
5055 tcp_set_state(sk, TCP_ESTABLISHED);
5056 sk->sk_state_change(sk);
5057
5058
5059
5060
5061
5062
5063 if (sk->sk_socket) {
5064 sk_wake_async(sk,0,POLL_OUT);
5065 }
5066
5067 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
5068 tp->snd_wnd = ntohs(th->window) <<
5069 tp->rx_opt.snd_wscale;
5070 tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq,
5071 TCP_SKB_CB(skb)->seq);
5072
5073
5074
5075
5076
5077 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
5078 !tp->srtt)
5079 tcp_ack_saw_tstamp(sk, 0);
5080
5081 if (tp->rx_opt.tstamp_ok)
5082 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5083
5084
5085
5086
5087 icsk->icsk_af_ops->rebuild_header(sk);
5088
5089 tcp_init_metrics(sk);
5090
5091 tcp_init_congestion_control(sk);
5092
5093
5094
5095
5096 tp->lsndtime = tcp_time_stamp;
5097
5098 tcp_mtup_init(sk);
5099 tcp_initialize_rcv_mss(sk);
5100 tcp_init_buffer_space(sk);
5101 tcp_fast_path_on(tp);
5102 } else {
5103 return 1;
5104 }
5105 break;
5106
5107 case TCP_FIN_WAIT1:
5108 if (tp->snd_una == tp->write_seq) {
5109 tcp_set_state(sk, TCP_FIN_WAIT2);
5110 sk->sk_shutdown |= SEND_SHUTDOWN;
5111 dst_confirm(sk->sk_dst_cache);
5112
5113 if (!sock_flag(sk, SOCK_DEAD))
5114
5115 sk->sk_state_change(sk);
5116 else {
5117 int tmo;
5118
5119 if (tp->linger2 < 0 ||
5120 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5121 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
5122 tcp_done(sk);
5123 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONDATA);
5124 return 1;
5125 }
5126
5127 tmo = tcp_fin_time(sk);
5128 if (tmo > TCP_TIMEWAIT_LEN) {
5129 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
5130 } else if (th->fin || sock_owned_by_user(sk)) {
5131
5132
5133
5134
5135
5136
5137 inet_csk_reset_keepalive_timer(sk, tmo);
5138 } else {
5139 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
5140 goto discard;
5141 }
5142 }
5143 }
5144 break;
5145
5146 case TCP_CLOSING:
5147 if (tp->snd_una == tp->write_seq) {
5148 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
5149 goto discard;
5150 }
5151 break;
5152
5153 case TCP_LAST_ACK:
5154 if (tp->snd_una == tp->write_seq) {
5155 tcp_update_metrics(sk);
5156 tcp_done(sk);
5157 goto discard;
5158 }
5159 break;
5160 }
5161 } else
5162 goto discard;
5163
5164
5165 tcp_urg(sk, skb, th);
5166
5167
5168 switch (sk->sk_state) {
5169 case TCP_CLOSE_WAIT:
5170 case TCP_CLOSING:
5171 case TCP_LAST_ACK:
5172 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
5173 break;
5174 case TCP_FIN_WAIT1:
5175 case TCP_FIN_WAIT2:
5176
5177
5178
5179
5180 if (sk->sk_shutdown & RCV_SHUTDOWN) {
5181 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5182 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
5183 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONDATA);
5184 tcp_reset(sk);
5185 return 1;
5186 }
5187 }
5188
5189 case TCP_ESTABLISHED:
5190 tcp_data_queue(sk, skb);
5191 queued = 1;
5192 break;
5193 }
5194
5195
5196 if (sk->sk_state != TCP_CLOSE) {
5197 tcp_data_snd_check(sk);
5198 tcp_ack_snd_check(sk);
5199 }
5200
5201 if (!queued) {
5202discard:
5203 __kfree_skb(skb);
5204 }
5205 return 0;
5206}
5207
5208EXPORT_SYMBOL(sysctl_tcp_ecn);
5209EXPORT_SYMBOL(sysctl_tcp_reordering);
5210EXPORT_SYMBOL(tcp_parse_options);
5211EXPORT_SYMBOL(tcp_rcv_established);
5212EXPORT_SYMBOL(tcp_rcv_state_process);
5213EXPORT_SYMBOL(tcp_initialize_rcv_mss);
5214