1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64#define pr_fmt(fmt) "TCP: " fmt
65
66#include <linux/mm.h>
67#include <linux/slab.h>
68#include <linux/module.h>
69#include <linux/sysctl.h>
70#include <linux/kernel.h>
71#include <net/dst.h>
72#include <net/tcp.h>
73#include <net/inet_common.h>
74#include <linux/ipsec.h>
75#include <asm/unaligned.h>
76#include <net/netdma.h>
77
78int sysctl_tcp_timestamps __read_mostly = 1;
79int sysctl_tcp_window_scaling __read_mostly = 1;
80int sysctl_tcp_sack __read_mostly = 1;
81int sysctl_tcp_fack __read_mostly = 1;
82int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
83EXPORT_SYMBOL(sysctl_tcp_reordering);
84int sysctl_tcp_dsack __read_mostly = 1;
85int sysctl_tcp_app_win __read_mostly = 31;
86int sysctl_tcp_adv_win_scale __read_mostly = 1;
87EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
88
89
90int sysctl_tcp_challenge_ack_limit = 100;
91
92int sysctl_tcp_stdurg __read_mostly;
93int sysctl_tcp_rfc1337 __read_mostly;
94int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
95int sysctl_tcp_frto __read_mostly = 2;
96
97int sysctl_tcp_thin_dupack __read_mostly;
98
99int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
100int sysctl_tcp_early_retrans __read_mostly = 3;
101
102#define FLAG_DATA 0x01
103#define FLAG_WIN_UPDATE 0x02
104#define FLAG_DATA_ACKED 0x04
105#define FLAG_RETRANS_DATA_ACKED 0x08
106#define FLAG_SYN_ACKED 0x10
107#define FLAG_DATA_SACKED 0x20
108#define FLAG_ECE 0x40
109#define FLAG_SLOWPATH 0x100
110#define FLAG_ORIG_SACK_ACKED 0x200
111#define FLAG_SND_UNA_ADVANCED 0x400
112#define FLAG_DSACKING_ACK 0x800
113#define FLAG_SACK_RENEGING 0x2000
114#define FLAG_UPDATE_TS_RECENT 0x4000
115
116#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
117#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
118#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
119#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
120
121#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
122#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
123
124
125
126
127static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
128{
129 struct inet_connection_sock *icsk = inet_csk(sk);
130 const unsigned int lss = icsk->icsk_ack.last_seg_size;
131 unsigned int len;
132
133 icsk->icsk_ack.last_seg_size = 0;
134
135
136
137
138 len = skb_shinfo(skb)->gso_size ? : skb->len;
139 if (len >= icsk->icsk_ack.rcv_mss) {
140 icsk->icsk_ack.rcv_mss = len;
141 } else {
142
143
144
145
146
147 len += skb->data - skb_transport_header(skb);
148 if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
149
150
151
152
153
154 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
155 !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
156
157
158
159
160 len -= tcp_sk(sk)->tcp_header_len;
161 icsk->icsk_ack.last_seg_size = len;
162 if (len == lss) {
163 icsk->icsk_ack.rcv_mss = len;
164 return;
165 }
166 }
167 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
168 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
169 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
170 }
171}
172
173static void tcp_incr_quickack(struct sock *sk)
174{
175 struct inet_connection_sock *icsk = inet_csk(sk);
176 unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
177
178 if (quickacks == 0)
179 quickacks = 2;
180 if (quickacks > icsk->icsk_ack.quick)
181 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
182}
183
184static void tcp_enter_quickack_mode(struct sock *sk)
185{
186 struct inet_connection_sock *icsk = inet_csk(sk);
187 tcp_incr_quickack(sk);
188 icsk->icsk_ack.pingpong = 0;
189 icsk->icsk_ack.ato = TCP_ATO_MIN;
190}
191
192
193
194
195
196static inline bool tcp_in_quickack_mode(const struct sock *sk)
197{
198 const struct inet_connection_sock *icsk = inet_csk(sk);
199
200 return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong;
201}
202
203static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
204{
205 if (tp->ecn_flags & TCP_ECN_OK)
206 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
207}
208
209static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb)
210{
211 if (tcp_hdr(skb)->cwr)
212 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
213}
214
215static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)
216{
217 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
218}
219
220static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
221{
222 if (!(tp->ecn_flags & TCP_ECN_OK))
223 return;
224
225 switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
226 case INET_ECN_NOT_ECT:
227
228
229
230
231 if (tp->ecn_flags & TCP_ECN_SEEN)
232 tcp_enter_quickack_mode((struct sock *)tp);
233 break;
234 case INET_ECN_CE:
235 if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
236
237 tcp_enter_quickack_mode((struct sock *)tp);
238 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
239 }
240
241 default:
242 tp->ecn_flags |= TCP_ECN_SEEN;
243 }
244}
245
246static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
247{
248 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
249 tp->ecn_flags &= ~TCP_ECN_OK;
250}
251
252static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
253{
254 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
255 tp->ecn_flags &= ~TCP_ECN_OK;
256}
257
258static bool TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
259{
260 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
261 return true;
262 return false;
263}
264
265
266
267
268
269
270static void tcp_fixup_sndbuf(struct sock *sk)
271{
272 int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER);
273
274 sndmem *= TCP_INIT_CWND;
275 if (sk->sk_sndbuf < sndmem)
276 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
277}
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
306{
307 struct tcp_sock *tp = tcp_sk(sk);
308
309 int truesize = tcp_win_from_space(skb->truesize) >> 1;
310 int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
311
312 while (tp->rcv_ssthresh <= window) {
313 if (truesize <= skb->len)
314 return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
315
316 truesize >>= 1;
317 window >>= 1;
318 }
319 return 0;
320}
321
322static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
323{
324 struct tcp_sock *tp = tcp_sk(sk);
325
326
327 if (tp->rcv_ssthresh < tp->window_clamp &&
328 (int)tp->rcv_ssthresh < tcp_space(sk) &&
329 !sk_under_memory_pressure(sk)) {
330 int incr;
331
332
333
334
335 if (tcp_win_from_space(skb->truesize) <= skb->len)
336 incr = 2 * tp->advmss;
337 else
338 incr = __tcp_grow_window(sk, skb);
339
340 if (incr) {
341 incr = max_t(int, incr, 2 * skb->len);
342 tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
343 tp->window_clamp);
344 inet_csk(sk)->icsk_ack.quick |= 1;
345 }
346 }
347}
348
349
350
351static void tcp_fixup_rcvbuf(struct sock *sk)
352{
353 u32 mss = tcp_sk(sk)->advmss;
354 u32 icwnd = TCP_DEFAULT_INIT_RCVWND;
355 int rcvmem;
356
357
358
359
360 if (mss > 1460)
361 icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
362
363 rcvmem = SKB_TRUESIZE(mss + MAX_TCP_HEADER);
364 while (tcp_win_from_space(rcvmem) < mss)
365 rcvmem += 128;
366
367 rcvmem *= icwnd;
368
369 if (sk->sk_rcvbuf < rcvmem)
370 sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
371}
372
373
374
375
376void tcp_init_buffer_space(struct sock *sk)
377{
378 struct tcp_sock *tp = tcp_sk(sk);
379 int maxwin;
380
381 if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
382 tcp_fixup_rcvbuf(sk);
383 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
384 tcp_fixup_sndbuf(sk);
385
386 tp->rcvq_space.space = tp->rcv_wnd;
387
388 maxwin = tcp_full_space(sk);
389
390 if (tp->window_clamp >= maxwin) {
391 tp->window_clamp = maxwin;
392
393 if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
394 tp->window_clamp = max(maxwin -
395 (maxwin >> sysctl_tcp_app_win),
396 4 * tp->advmss);
397 }
398
399
400 if (sysctl_tcp_app_win &&
401 tp->window_clamp > 2 * tp->advmss &&
402 tp->window_clamp + tp->advmss > maxwin)
403 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
404
405 tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
406 tp->snd_cwnd_stamp = tcp_time_stamp;
407}
408
409
410static void tcp_clamp_window(struct sock *sk)
411{
412 struct tcp_sock *tp = tcp_sk(sk);
413 struct inet_connection_sock *icsk = inet_csk(sk);
414
415 icsk->icsk_ack.quick = 0;
416
417 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
418 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
419 !sk_under_memory_pressure(sk) &&
420 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
421 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
422 sysctl_tcp_rmem[2]);
423 }
424 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
425 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
426}
427
428
429
430
431
432
433
434
435void tcp_initialize_rcv_mss(struct sock *sk)
436{
437 const struct tcp_sock *tp = tcp_sk(sk);
438 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
439
440 hint = min(hint, tp->rcv_wnd / 2);
441 hint = min(hint, TCP_MSS_DEFAULT);
442 hint = max(hint, TCP_MIN_MSS);
443
444 inet_csk(sk)->icsk_ack.rcv_mss = hint;
445}
446EXPORT_SYMBOL(tcp_initialize_rcv_mss);
447
448
449
450
451
452
453
454
455
456
457
458
459static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
460{
461 u32 new_sample = tp->rcv_rtt_est.rtt;
462 long m = sample;
463
464 if (m == 0)
465 m = 1;
466
467 if (new_sample != 0) {
468
469
470
471
472
473
474
475
476
477
478 if (!win_dep) {
479 m -= (new_sample >> 3);
480 new_sample += m;
481 } else {
482 m <<= 3;
483 if (m < new_sample)
484 new_sample = m;
485 }
486 } else {
487
488 new_sample = m << 3;
489 }
490
491 if (tp->rcv_rtt_est.rtt != new_sample)
492 tp->rcv_rtt_est.rtt = new_sample;
493}
494
495static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
496{
497 if (tp->rcv_rtt_est.time == 0)
498 goto new_measure;
499 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
500 return;
501 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_rtt_est.time, 1);
502
503new_measure:
504 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
505 tp->rcv_rtt_est.time = tcp_time_stamp;
506}
507
508static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
509 const struct sk_buff *skb)
510{
511 struct tcp_sock *tp = tcp_sk(sk);
512 if (tp->rx_opt.rcv_tsecr &&
513 (TCP_SKB_CB(skb)->end_seq -
514 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
515 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
516}
517
518
519
520
521
522void tcp_rcv_space_adjust(struct sock *sk)
523{
524 struct tcp_sock *tp = tcp_sk(sk);
525 int time;
526 int space;
527
528 if (tp->rcvq_space.time == 0)
529 goto new_measure;
530
531 time = tcp_time_stamp - tp->rcvq_space.time;
532 if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
533 return;
534
535 space = 2 * (tp->copied_seq - tp->rcvq_space.seq);
536
537 space = max(tp->rcvq_space.space, space);
538
539 if (tp->rcvq_space.space != space) {
540 int rcvmem;
541
542 tp->rcvq_space.space = space;
543
544 if (sysctl_tcp_moderate_rcvbuf &&
545 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
546 int new_clamp = space;
547
548
549
550
551
552 space /= tp->advmss;
553 if (!space)
554 space = 1;
555 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
556 while (tcp_win_from_space(rcvmem) < tp->advmss)
557 rcvmem += 128;
558 space *= rcvmem;
559 space = min(space, sysctl_tcp_rmem[2]);
560 if (space > sk->sk_rcvbuf) {
561 sk->sk_rcvbuf = space;
562
563
564 tp->window_clamp = new_clamp;
565 }
566 }
567 }
568
569new_measure:
570 tp->rcvq_space.seq = tp->copied_seq;
571 tp->rcvq_space.time = tcp_time_stamp;
572}
573
574
575
576
577
578
579
580
581
582
583
584static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
585{
586 struct tcp_sock *tp = tcp_sk(sk);
587 struct inet_connection_sock *icsk = inet_csk(sk);
588 u32 now;
589
590 inet_csk_schedule_ack(sk);
591
592 tcp_measure_rcv_mss(sk, skb);
593
594 tcp_rcv_rtt_measure(tp);
595
596 now = tcp_time_stamp;
597
598 if (!icsk->icsk_ack.ato) {
599
600
601
602 tcp_incr_quickack(sk);
603 icsk->icsk_ack.ato = TCP_ATO_MIN;
604 } else {
605 int m = now - icsk->icsk_ack.lrcvtime;
606
607 if (m <= TCP_ATO_MIN / 2) {
608
609 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
610 } else if (m < icsk->icsk_ack.ato) {
611 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
612 if (icsk->icsk_ack.ato > icsk->icsk_rto)
613 icsk->icsk_ack.ato = icsk->icsk_rto;
614 } else if (m > icsk->icsk_rto) {
615
616
617
618 tcp_incr_quickack(sk);
619 sk_mem_reclaim(sk);
620 }
621 }
622 icsk->icsk_ack.lrcvtime = now;
623
624 TCP_ECN_check_ce(tp, skb);
625
626 if (skb->len >= 128)
627 tcp_grow_window(sk, skb);
628}
629
630
631
632
633
634
635
636
637
638
639static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
640{
641 struct tcp_sock *tp = tcp_sk(sk);
642 long m = mrtt;
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660 if (m == 0)
661 m = 1;
662 if (tp->srtt != 0) {
663 m -= (tp->srtt >> 3);
664 tp->srtt += m;
665 if (m < 0) {
666 m = -m;
667 m -= (tp->mdev >> 2);
668
669
670
671
672
673
674
675
676 if (m > 0)
677 m >>= 3;
678 } else {
679 m -= (tp->mdev >> 2);
680 }
681 tp->mdev += m;
682 if (tp->mdev > tp->mdev_max) {
683 tp->mdev_max = tp->mdev;
684 if (tp->mdev_max > tp->rttvar)
685 tp->rttvar = tp->mdev_max;
686 }
687 if (after(tp->snd_una, tp->rtt_seq)) {
688 if (tp->mdev_max < tp->rttvar)
689 tp->rttvar -= (tp->rttvar - tp->mdev_max) >> 2;
690 tp->rtt_seq = tp->snd_nxt;
691 tp->mdev_max = tcp_rto_min(sk);
692 }
693 } else {
694
695 tp->srtt = m << 3;
696 tp->mdev = m << 1;
697 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
698 tp->rtt_seq = tp->snd_nxt;
699 }
700}
701
702
703
704
705void tcp_set_rto(struct sock *sk)
706{
707 const struct tcp_sock *tp = tcp_sk(sk);
708
709
710
711
712
713
714
715
716
717
718 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
719
720
721
722
723
724
725
726
727
728
729 tcp_bound_rto(sk);
730}
731
732__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
733{
734 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
735
736 if (!cwnd)
737 cwnd = TCP_INIT_CWND;
738 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
739}
740
741
742
743
744
745void tcp_disable_fack(struct tcp_sock *tp)
746{
747
748 if (tcp_is_fack(tp))
749 tp->lost_skb_hint = NULL;
750 tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;
751}
752
753
754static void tcp_dsack_seen(struct tcp_sock *tp)
755{
756 tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
757}
758
759static void tcp_update_reordering(struct sock *sk, const int metric,
760 const int ts)
761{
762 struct tcp_sock *tp = tcp_sk(sk);
763 if (metric > tp->reordering) {
764 int mib_idx;
765
766 tp->reordering = min(TCP_MAX_REORDERING, metric);
767
768
769 if (ts)
770 mib_idx = LINUX_MIB_TCPTSREORDER;
771 else if (tcp_is_reno(tp))
772 mib_idx = LINUX_MIB_TCPRENOREORDER;
773 else if (tcp_is_fack(tp))
774 mib_idx = LINUX_MIB_TCPFACKREORDER;
775 else
776 mib_idx = LINUX_MIB_TCPSACKREORDER;
777
778 NET_INC_STATS_BH(sock_net(sk), mib_idx);
779#if FASTRETRANS_DEBUG > 1
780 pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
781 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
782 tp->reordering,
783 tp->fackets_out,
784 tp->sacked_out,
785 tp->undo_marker ? tp->undo_retrans : 0);
786#endif
787 tcp_disable_fack(tp);
788 }
789
790 if (metric > 0)
791 tcp_disable_early_retrans(tp);
792}
793
794
795static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
796{
797 if ((tp->retransmit_skb_hint == NULL) ||
798 before(TCP_SKB_CB(skb)->seq,
799 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
800 tp->retransmit_skb_hint = skb;
801
802 if (!tp->lost_out ||
803 after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high))
804 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
805}
806
807static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
808{
809 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
810 tcp_verify_retransmit_hint(tp, skb);
811
812 tp->lost_out += tcp_skb_pcount(skb);
813 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
814 }
815}
816
817static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
818 struct sk_buff *skb)
819{
820 tcp_verify_retransmit_hint(tp, skb);
821
822 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
823 tp->lost_out += tcp_skb_pcount(skb);
824 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
825 }
826}
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
923 u32 start_seq, u32 end_seq)
924{
925
926 if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
927 return false;
928
929
930 if (!before(start_seq, tp->snd_nxt))
931 return false;
932
933
934
935
936 if (after(start_seq, tp->snd_una))
937 return true;
938
939 if (!is_dsack || !tp->undo_marker)
940 return false;
941
942
943 if (after(end_seq, tp->snd_una))
944 return false;
945
946 if (!before(start_seq, tp->undo_marker))
947 return true;
948
949
950 if (!after(end_seq, tp->undo_marker))
951 return false;
952
953
954
955
956 return !before(start_seq, end_seq - tp->max_window);
957}
958
959
960
961
962
963
964
965
966
967
968static void tcp_mark_lost_retrans(struct sock *sk)
969{
970 const struct inet_connection_sock *icsk = inet_csk(sk);
971 struct tcp_sock *tp = tcp_sk(sk);
972 struct sk_buff *skb;
973 int cnt = 0;
974 u32 new_low_seq = tp->snd_nxt;
975 u32 received_upto = tcp_highest_sack_seq(tp);
976
977 if (!tcp_is_fack(tp) || !tp->retrans_out ||
978 !after(received_upto, tp->lost_retrans_low) ||
979 icsk->icsk_ca_state != TCP_CA_Recovery)
980 return;
981
982 tcp_for_write_queue(skb, sk) {
983 u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
984
985 if (skb == tcp_send_head(sk))
986 break;
987 if (cnt == tp->retrans_out)
988 break;
989 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
990 continue;
991
992 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS))
993 continue;
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006 if (after(received_upto, ack_seq)) {
1007 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1008 tp->retrans_out -= tcp_skb_pcount(skb);
1009
1010 tcp_skb_mark_lost_uncond_verify(tp, skb);
1011 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
1012 } else {
1013 if (before(ack_seq, new_low_seq))
1014 new_low_seq = ack_seq;
1015 cnt += tcp_skb_pcount(skb);
1016 }
1017 }
1018
1019 if (tp->retrans_out)
1020 tp->lost_retrans_low = new_low_seq;
1021}
1022
1023static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1024 struct tcp_sack_block_wire *sp, int num_sacks,
1025 u32 prior_snd_una)
1026{
1027 struct tcp_sock *tp = tcp_sk(sk);
1028 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
1029 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
1030 bool dup_sack = false;
1031
1032 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1033 dup_sack = true;
1034 tcp_dsack_seen(tp);
1035 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
1036 } else if (num_sacks > 1) {
1037 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
1038 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
1039
1040 if (!after(end_seq_0, end_seq_1) &&
1041 !before(start_seq_0, start_seq_1)) {
1042 dup_sack = true;
1043 tcp_dsack_seen(tp);
1044 NET_INC_STATS_BH(sock_net(sk),
1045 LINUX_MIB_TCPDSACKOFORECV);
1046 }
1047 }
1048
1049
1050 if (dup_sack && tp->undo_marker && tp->undo_retrans &&
1051 !after(end_seq_0, prior_snd_una) &&
1052 after(end_seq_0, tp->undo_marker))
1053 tp->undo_retrans--;
1054
1055 return dup_sack;
1056}
1057
1058struct tcp_sacktag_state {
1059 int reord;
1060 int fack_count;
1061 int flag;
1062};
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1073 u32 start_seq, u32 end_seq)
1074{
1075 int err;
1076 bool in_sack;
1077 unsigned int pkt_len;
1078 unsigned int mss;
1079
1080 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1081 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1082
1083 if (tcp_skb_pcount(skb) > 1 && !in_sack &&
1084 after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
1085 mss = tcp_skb_mss(skb);
1086 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1087
1088 if (!in_sack) {
1089 pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
1090 if (pkt_len < mss)
1091 pkt_len = mss;
1092 } else {
1093 pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
1094 if (pkt_len < mss)
1095 return -EINVAL;
1096 }
1097
1098
1099
1100
1101 if (pkt_len > mss) {
1102 unsigned int new_len = (pkt_len / mss) * mss;
1103 if (!in_sack && new_len < pkt_len) {
1104 new_len += mss;
1105 if (new_len > skb->len)
1106 return 0;
1107 }
1108 pkt_len = new_len;
1109 }
1110 err = tcp_fragment(sk, skb, pkt_len, mss);
1111 if (err < 0)
1112 return err;
1113 }
1114
1115 return in_sack;
1116}
1117
1118
1119static u8 tcp_sacktag_one(struct sock *sk,
1120 struct tcp_sacktag_state *state, u8 sacked,
1121 u32 start_seq, u32 end_seq,
1122 bool dup_sack, int pcount)
1123{
1124 struct tcp_sock *tp = tcp_sk(sk);
1125 int fack_count = state->fack_count;
1126
1127
1128 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1129 if (tp->undo_marker && tp->undo_retrans &&
1130 after(end_seq, tp->undo_marker))
1131 tp->undo_retrans--;
1132 if (sacked & TCPCB_SACKED_ACKED)
1133 state->reord = min(fack_count, state->reord);
1134 }
1135
1136
1137 if (!after(end_seq, tp->snd_una))
1138 return sacked;
1139
1140 if (!(sacked & TCPCB_SACKED_ACKED)) {
1141 if (sacked & TCPCB_SACKED_RETRANS) {
1142
1143
1144
1145
1146 if (sacked & TCPCB_LOST) {
1147 sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1148 tp->lost_out -= pcount;
1149 tp->retrans_out -= pcount;
1150 }
1151 } else {
1152 if (!(sacked & TCPCB_RETRANS)) {
1153
1154
1155
1156 if (before(start_seq,
1157 tcp_highest_sack_seq(tp)))
1158 state->reord = min(fack_count,
1159 state->reord);
1160 if (!after(end_seq, tp->high_seq))
1161 state->flag |= FLAG_ORIG_SACK_ACKED;
1162 }
1163
1164 if (sacked & TCPCB_LOST) {
1165 sacked &= ~TCPCB_LOST;
1166 tp->lost_out -= pcount;
1167 }
1168 }
1169
1170 sacked |= TCPCB_SACKED_ACKED;
1171 state->flag |= FLAG_DATA_SACKED;
1172 tp->sacked_out += pcount;
1173
1174 fack_count += pcount;
1175
1176
1177 if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
1178 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
1179 tp->lost_cnt_hint += pcount;
1180
1181 if (fack_count > tp->fackets_out)
1182 tp->fackets_out = fack_count;
1183 }
1184
1185
1186
1187
1188
1189 if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
1190 sacked &= ~TCPCB_SACKED_RETRANS;
1191 tp->retrans_out -= pcount;
1192 }
1193
1194 return sacked;
1195}
1196
1197
1198
1199
1200static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1201 struct tcp_sacktag_state *state,
1202 unsigned int pcount, int shifted, int mss,
1203 bool dup_sack)
1204{
1205 struct tcp_sock *tp = tcp_sk(sk);
1206 struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
1207 u32 start_seq = TCP_SKB_CB(skb)->seq;
1208 u32 end_seq = start_seq + shifted;
1209
1210 BUG_ON(!pcount);
1211
1212
1213
1214
1215
1216
1217
1218 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1219 start_seq, end_seq, dup_sack, pcount);
1220
1221 if (skb == tp->lost_skb_hint)
1222 tp->lost_cnt_hint += pcount;
1223
1224 TCP_SKB_CB(prev)->end_seq += shifted;
1225 TCP_SKB_CB(skb)->seq += shifted;
1226
1227 skb_shinfo(prev)->gso_segs += pcount;
1228 BUG_ON(skb_shinfo(skb)->gso_segs < pcount);
1229 skb_shinfo(skb)->gso_segs -= pcount;
1230
1231
1232
1233
1234
1235
1236 if (!skb_shinfo(prev)->gso_size) {
1237 skb_shinfo(prev)->gso_size = mss;
1238 skb_shinfo(prev)->gso_type = sk->sk_gso_type;
1239 }
1240
1241
1242 if (skb_shinfo(skb)->gso_segs <= 1) {
1243 skb_shinfo(skb)->gso_size = 0;
1244 skb_shinfo(skb)->gso_type = 0;
1245 }
1246
1247
1248 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
1249
1250 if (skb->len > 0) {
1251 BUG_ON(!tcp_skb_pcount(skb));
1252 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED);
1253 return false;
1254 }
1255
1256
1257
1258 if (skb == tp->retransmit_skb_hint)
1259 tp->retransmit_skb_hint = prev;
1260 if (skb == tp->scoreboard_skb_hint)
1261 tp->scoreboard_skb_hint = prev;
1262 if (skb == tp->lost_skb_hint) {
1263 tp->lost_skb_hint = prev;
1264 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1265 }
1266
1267 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags;
1268 if (skb == tcp_highest_sack(sk))
1269 tcp_advance_highest_sack(sk, skb);
1270
1271 tcp_unlink_write_queue(skb, sk);
1272 sk_wmem_free_skb(sk, skb);
1273
1274 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED);
1275
1276 return true;
1277}
1278
1279
1280
1281
1282static int tcp_skb_seglen(const struct sk_buff *skb)
1283{
1284 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
1285}
1286
1287
1288static int skb_can_shift(const struct sk_buff *skb)
1289{
1290 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1291}
1292
1293
1294
1295
1296static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1297 struct tcp_sacktag_state *state,
1298 u32 start_seq, u32 end_seq,
1299 bool dup_sack)
1300{
1301 struct tcp_sock *tp = tcp_sk(sk);
1302 struct sk_buff *prev;
1303 int mss;
1304 int pcount = 0;
1305 int len;
1306 int in_sack;
1307
1308 if (!sk_can_gso(sk))
1309 goto fallback;
1310
1311
1312 if (!dup_sack &&
1313 (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
1314 goto fallback;
1315 if (!skb_can_shift(skb))
1316 goto fallback;
1317
1318 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1319 goto fallback;
1320
1321
1322 if (unlikely(skb == tcp_write_queue_head(sk)))
1323 goto fallback;
1324 prev = tcp_write_queue_prev(sk, skb);
1325
1326 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1327 goto fallback;
1328
1329 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1330 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1331
1332 if (in_sack) {
1333 len = skb->len;
1334 pcount = tcp_skb_pcount(skb);
1335 mss = tcp_skb_seglen(skb);
1336
1337
1338
1339
1340 if (mss != tcp_skb_seglen(prev))
1341 goto fallback;
1342 } else {
1343 if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
1344 goto noop;
1345
1346
1347
1348
1349 if (tcp_skb_pcount(skb) <= 1)
1350 goto noop;
1351
1352 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1353 if (!in_sack) {
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365 goto fallback;
1366 }
1367
1368 len = end_seq - TCP_SKB_CB(skb)->seq;
1369 BUG_ON(len < 0);
1370 BUG_ON(len > skb->len);
1371
1372
1373
1374
1375
1376 mss = tcp_skb_mss(skb);
1377
1378
1379
1380
1381 if (mss != tcp_skb_seglen(prev))
1382 goto fallback;
1383
1384 if (len == mss) {
1385 pcount = 1;
1386 } else if (len < mss) {
1387 goto noop;
1388 } else {
1389 pcount = len / mss;
1390 len = pcount * mss;
1391 }
1392 }
1393
1394
1395 if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
1396 goto fallback;
1397
1398 if (!skb_shift(prev, skb, len))
1399 goto fallback;
1400 if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
1401 goto out;
1402
1403
1404
1405
1406 if (prev == tcp_write_queue_tail(sk))
1407 goto out;
1408 skb = tcp_write_queue_next(sk, prev);
1409
1410 if (!skb_can_shift(skb) ||
1411 (skb == tcp_send_head(sk)) ||
1412 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1413 (mss != tcp_skb_seglen(skb)))
1414 goto out;
1415
1416 len = skb->len;
1417 if (skb_shift(prev, skb, len)) {
1418 pcount += tcp_skb_pcount(skb);
1419 tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
1420 }
1421
1422out:
1423 state->fack_count += pcount;
1424 return prev;
1425
1426noop:
1427 return skb;
1428
1429fallback:
1430 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
1431 return NULL;
1432}
1433
1434static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1435 struct tcp_sack_block *next_dup,
1436 struct tcp_sacktag_state *state,
1437 u32 start_seq, u32 end_seq,
1438 bool dup_sack_in)
1439{
1440 struct tcp_sock *tp = tcp_sk(sk);
1441 struct sk_buff *tmp;
1442
1443 tcp_for_write_queue_from(skb, sk) {
1444 int in_sack = 0;
1445 bool dup_sack = dup_sack_in;
1446
1447 if (skb == tcp_send_head(sk))
1448 break;
1449
1450
1451 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1452 break;
1453
1454 if ((next_dup != NULL) &&
1455 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
1456 in_sack = tcp_match_skb_to_sack(sk, skb,
1457 next_dup->start_seq,
1458 next_dup->end_seq);
1459 if (in_sack > 0)
1460 dup_sack = true;
1461 }
1462
1463
1464
1465
1466
1467 if (in_sack <= 0) {
1468 tmp = tcp_shift_skb_data(sk, skb, state,
1469 start_seq, end_seq, dup_sack);
1470 if (tmp != NULL) {
1471 if (tmp != skb) {
1472 skb = tmp;
1473 continue;
1474 }
1475
1476 in_sack = 0;
1477 } else {
1478 in_sack = tcp_match_skb_to_sack(sk, skb,
1479 start_seq,
1480 end_seq);
1481 }
1482 }
1483
1484 if (unlikely(in_sack < 0))
1485 break;
1486
1487 if (in_sack) {
1488 TCP_SKB_CB(skb)->sacked =
1489 tcp_sacktag_one(sk,
1490 state,
1491 TCP_SKB_CB(skb)->sacked,
1492 TCP_SKB_CB(skb)->seq,
1493 TCP_SKB_CB(skb)->end_seq,
1494 dup_sack,
1495 tcp_skb_pcount(skb));
1496
1497 if (!before(TCP_SKB_CB(skb)->seq,
1498 tcp_highest_sack_seq(tp)))
1499 tcp_advance_highest_sack(sk, skb);
1500 }
1501
1502 state->fack_count += tcp_skb_pcount(skb);
1503 }
1504 return skb;
1505}
1506
1507
1508
1509
1510static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1511 struct tcp_sacktag_state *state,
1512 u32 skip_to_seq)
1513{
1514 tcp_for_write_queue_from(skb, sk) {
1515 if (skb == tcp_send_head(sk))
1516 break;
1517
1518 if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
1519 break;
1520
1521 state->fack_count += tcp_skb_pcount(skb);
1522 }
1523 return skb;
1524}
1525
1526static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1527 struct sock *sk,
1528 struct tcp_sack_block *next_dup,
1529 struct tcp_sacktag_state *state,
1530 u32 skip_to_seq)
1531{
1532 if (next_dup == NULL)
1533 return skb;
1534
1535 if (before(next_dup->start_seq, skip_to_seq)) {
1536 skb = tcp_sacktag_skip(skb, sk, state, next_dup->start_seq);
1537 skb = tcp_sacktag_walk(skb, sk, NULL, state,
1538 next_dup->start_seq, next_dup->end_seq,
1539 1);
1540 }
1541
1542 return skb;
1543}
1544
1545static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
1546{
1547 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1548}
1549
1550static int
1551tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1552 u32 prior_snd_una)
1553{
1554 struct tcp_sock *tp = tcp_sk(sk);
1555 const unsigned char *ptr = (skb_transport_header(ack_skb) +
1556 TCP_SKB_CB(ack_skb)->sacked);
1557 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1558 struct tcp_sack_block sp[TCP_NUM_SACKS];
1559 struct tcp_sack_block *cache;
1560 struct tcp_sacktag_state state;
1561 struct sk_buff *skb;
1562 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1563 int used_sacks;
1564 bool found_dup_sack = false;
1565 int i, j;
1566 int first_sack_index;
1567
1568 state.flag = 0;
1569 state.reord = tp->packets_out;
1570
1571 if (!tp->sacked_out) {
1572 if (WARN_ON(tp->fackets_out))
1573 tp->fackets_out = 0;
1574 tcp_highest_sack_reset(sk);
1575 }
1576
1577 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1578 num_sacks, prior_snd_una);
1579 if (found_dup_sack)
1580 state.flag |= FLAG_DSACKING_ACK;
1581
1582
1583
1584
1585
1586 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
1587 return 0;
1588
1589 if (!tp->packets_out)
1590 goto out;
1591
1592 used_sacks = 0;
1593 first_sack_index = 0;
1594 for (i = 0; i < num_sacks; i++) {
1595 bool dup_sack = !i && found_dup_sack;
1596
1597 sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
1598 sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
1599
1600 if (!tcp_is_sackblock_valid(tp, dup_sack,
1601 sp[used_sacks].start_seq,
1602 sp[used_sacks].end_seq)) {
1603 int mib_idx;
1604
1605 if (dup_sack) {
1606 if (!tp->undo_marker)
1607 mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
1608 else
1609 mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
1610 } else {
1611
1612 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1613 !after(sp[used_sacks].end_seq, tp->snd_una))
1614 continue;
1615 mib_idx = LINUX_MIB_TCPSACKDISCARD;
1616 }
1617
1618 NET_INC_STATS_BH(sock_net(sk), mib_idx);
1619 if (i == 0)
1620 first_sack_index = -1;
1621 continue;
1622 }
1623
1624
1625 if (!after(sp[used_sacks].end_seq, prior_snd_una))
1626 continue;
1627
1628 used_sacks++;
1629 }
1630
1631
1632 for (i = used_sacks - 1; i > 0; i--) {
1633 for (j = 0; j < i; j++) {
1634 if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
1635 swap(sp[j], sp[j + 1]);
1636
1637
1638 if (j == first_sack_index)
1639 first_sack_index = j + 1;
1640 }
1641 }
1642 }
1643
1644 skb = tcp_write_queue_head(sk);
1645 state.fack_count = 0;
1646 i = 0;
1647
1648 if (!tp->sacked_out) {
1649
1650 cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1651 } else {
1652 cache = tp->recv_sack_cache;
1653
1654 while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
1655 !cache->end_seq)
1656 cache++;
1657 }
1658
1659 while (i < used_sacks) {
1660 u32 start_seq = sp[i].start_seq;
1661 u32 end_seq = sp[i].end_seq;
1662 bool dup_sack = (found_dup_sack && (i == first_sack_index));
1663 struct tcp_sack_block *next_dup = NULL;
1664
1665 if (found_dup_sack && ((i + 1) == first_sack_index))
1666 next_dup = &sp[i + 1];
1667
1668
1669 while (tcp_sack_cache_ok(tp, cache) &&
1670 !before(start_seq, cache->end_seq))
1671 cache++;
1672
1673
1674 if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
1675 after(end_seq, cache->start_seq)) {
1676
1677
1678 if (before(start_seq, cache->start_seq)) {
1679 skb = tcp_sacktag_skip(skb, sk, &state,
1680 start_seq);
1681 skb = tcp_sacktag_walk(skb, sk, next_dup,
1682 &state,
1683 start_seq,
1684 cache->start_seq,
1685 dup_sack);
1686 }
1687
1688
1689 if (!after(end_seq, cache->end_seq))
1690 goto advance_sp;
1691
1692 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1693 &state,
1694 cache->end_seq);
1695
1696
1697 if (tcp_highest_sack_seq(tp) == cache->end_seq) {
1698
1699 skb = tcp_highest_sack(sk);
1700 if (skb == NULL)
1701 break;
1702 state.fack_count = tp->fackets_out;
1703 cache++;
1704 goto walk;
1705 }
1706
1707 skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq);
1708
1709 cache++;
1710 continue;
1711 }
1712
1713 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
1714 skb = tcp_highest_sack(sk);
1715 if (skb == NULL)
1716 break;
1717 state.fack_count = tp->fackets_out;
1718 }
1719 skb = tcp_sacktag_skip(skb, sk, &state, start_seq);
1720
1721walk:
1722 skb = tcp_sacktag_walk(skb, sk, next_dup, &state,
1723 start_seq, end_seq, dup_sack);
1724
1725advance_sp:
1726 i++;
1727 }
1728
1729
1730 for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
1731 tp->recv_sack_cache[i].start_seq = 0;
1732 tp->recv_sack_cache[i].end_seq = 0;
1733 }
1734 for (j = 0; j < used_sacks; j++)
1735 tp->recv_sack_cache[i++] = sp[j];
1736
1737 tcp_mark_lost_retrans(sk);
1738
1739 tcp_verify_left_out(tp);
1740
1741 if ((state.reord < tp->fackets_out) &&
1742 ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
1743 tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
1744
1745out:
1746
1747#if FASTRETRANS_DEBUG > 0
1748 WARN_ON((int)tp->sacked_out < 0);
1749 WARN_ON((int)tp->lost_out < 0);
1750 WARN_ON((int)tp->retrans_out < 0);
1751 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1752#endif
1753 return state.flag;
1754}
1755
1756
1757
1758
1759static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
1760{
1761 u32 holes;
1762
1763 holes = max(tp->lost_out, 1U);
1764 holes = min(holes, tp->packets_out);
1765
1766 if ((tp->sacked_out + holes) > tp->packets_out) {
1767 tp->sacked_out = tp->packets_out - holes;
1768 return true;
1769 }
1770 return false;
1771}
1772
1773
1774
1775
1776
1777static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1778{
1779 struct tcp_sock *tp = tcp_sk(sk);
1780 if (tcp_limit_reno_sacked(tp))
1781 tcp_update_reordering(sk, tp->packets_out + addend, 0);
1782}
1783
1784
1785
1786static void tcp_add_reno_sack(struct sock *sk)
1787{
1788 struct tcp_sock *tp = tcp_sk(sk);
1789 tp->sacked_out++;
1790 tcp_check_reno_reordering(sk, 0);
1791 tcp_verify_left_out(tp);
1792}
1793
1794
1795
1796static void tcp_remove_reno_sacks(struct sock *sk, int acked)
1797{
1798 struct tcp_sock *tp = tcp_sk(sk);
1799
1800 if (acked > 0) {
1801
1802 if (acked - 1 >= tp->sacked_out)
1803 tp->sacked_out = 0;
1804 else
1805 tp->sacked_out -= acked - 1;
1806 }
1807 tcp_check_reno_reordering(sk, acked);
1808 tcp_verify_left_out(tp);
1809}
1810
1811static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1812{
1813 tp->sacked_out = 0;
1814}
1815
1816static void tcp_clear_retrans_partial(struct tcp_sock *tp)
1817{
1818 tp->retrans_out = 0;
1819 tp->lost_out = 0;
1820
1821 tp->undo_marker = 0;
1822 tp->undo_retrans = 0;
1823}
1824
1825void tcp_clear_retrans(struct tcp_sock *tp)
1826{
1827 tcp_clear_retrans_partial(tp);
1828
1829 tp->fackets_out = 0;
1830 tp->sacked_out = 0;
1831}
1832
1833
1834
1835
1836
1837void tcp_enter_loss(struct sock *sk, int how)
1838{
1839 const struct inet_connection_sock *icsk = inet_csk(sk);
1840 struct tcp_sock *tp = tcp_sk(sk);
1841 struct sk_buff *skb;
1842 bool new_recovery = false;
1843
1844
1845 if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
1846 !after(tp->high_seq, tp->snd_una) ||
1847 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
1848 new_recovery = true;
1849 tp->prior_ssthresh = tcp_current_ssthresh(sk);
1850 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1851 tcp_ca_event(sk, CA_EVENT_LOSS);
1852 }
1853 tp->snd_cwnd = 1;
1854 tp->snd_cwnd_cnt = 0;
1855 tp->snd_cwnd_stamp = tcp_time_stamp;
1856
1857 tcp_clear_retrans_partial(tp);
1858
1859 if (tcp_is_reno(tp))
1860 tcp_reset_reno_sack(tp);
1861
1862 tp->undo_marker = tp->snd_una;
1863 if (how) {
1864 tp->sacked_out = 0;
1865 tp->fackets_out = 0;
1866 }
1867 tcp_clear_all_retrans_hints(tp);
1868
1869 tcp_for_write_queue(skb, sk) {
1870 if (skb == tcp_send_head(sk))
1871 break;
1872
1873 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1874 tp->undo_marker = 0;
1875 TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
1876 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
1877 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
1878 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1879 tp->lost_out += tcp_skb_pcount(skb);
1880 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
1881 }
1882 }
1883 tcp_verify_left_out(tp);
1884
1885 tp->reordering = min_t(unsigned int, tp->reordering,
1886 sysctl_tcp_reordering);
1887 tcp_set_ca_state(sk, TCP_CA_Loss);
1888 tp->high_seq = tp->snd_nxt;
1889 TCP_ECN_queue_cwr(tp);
1890
1891
1892
1893
1894
1895 tp->frto = sysctl_tcp_frto &&
1896 (new_recovery || icsk->icsk_retransmits) &&
1897 !inet_csk(sk)->icsk_mtup.probe_size;
1898}
1899
1900
1901
1902
1903
1904
1905
1906static bool tcp_check_sack_reneging(struct sock *sk, int flag)
1907{
1908 if (flag & FLAG_SACK_RENEGING) {
1909 struct inet_connection_sock *icsk = inet_csk(sk);
1910 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
1911
1912 tcp_enter_loss(sk, 1);
1913 icsk->icsk_retransmits++;
1914 tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
1915 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
1916 icsk->icsk_rto, TCP_RTO_MAX);
1917 return true;
1918 }
1919 return false;
1920}
1921
1922static inline int tcp_fackets_out(const struct tcp_sock *tp)
1923{
1924 return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
1925}
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
1943{
1944 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
1945}
1946
1947static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
1948{
1949 struct tcp_sock *tp = tcp_sk(sk);
1950 unsigned long delay;
1951
1952
1953
1954
1955
1956 if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 ||
1957 (flag & FLAG_ECE) || !tp->srtt)
1958 return false;
1959
1960 delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2));
1961 if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
1962 return false;
1963
1964 inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay,
1965 TCP_RTO_MAX);
1966 return true;
1967}
1968
1969static inline int tcp_skb_timedout(const struct sock *sk,
1970 const struct sk_buff *skb)
1971{
1972 return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto;
1973}
1974
1975static inline int tcp_head_timedout(const struct sock *sk)
1976{
1977 const struct tcp_sock *tp = tcp_sk(sk);
1978
1979 return tp->packets_out &&
1980 tcp_skb_timedout(sk, tcp_write_queue_head(sk));
1981}
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076static bool tcp_time_to_recover(struct sock *sk, int flag)
2077{
2078 struct tcp_sock *tp = tcp_sk(sk);
2079 __u32 packets_out;
2080
2081
2082 if (tp->lost_out)
2083 return true;
2084
2085
2086 if (tcp_dupack_heuristics(tp) > tp->reordering)
2087 return true;
2088
2089
2090
2091
2092 if (tcp_is_fack(tp) && tcp_head_timedout(sk))
2093 return true;
2094
2095
2096
2097
2098 packets_out = tp->packets_out;
2099 if (packets_out <= tp->reordering &&
2100 tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
2101 !tcp_may_send_now(sk)) {
2102
2103
2104
2105 return true;
2106 }
2107
2108
2109
2110
2111
2112
2113 if ((tp->thin_dupack || sysctl_tcp_thin_dupack) &&
2114 tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 &&
2115 tcp_is_sack(tp) && !tcp_send_head(sk))
2116 return true;
2117
2118
2119
2120
2121
2122
2123 if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
2124 (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) &&
2125 !tcp_may_send_now(sk))
2126 return !tcp_pause_early_retransmit(sk, flag);
2127
2128 return false;
2129}
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143static void tcp_timeout_skbs(struct sock *sk)
2144{
2145 struct tcp_sock *tp = tcp_sk(sk);
2146 struct sk_buff *skb;
2147
2148 if (!tcp_is_fack(tp) || !tcp_head_timedout(sk))
2149 return;
2150
2151 skb = tp->scoreboard_skb_hint;
2152 if (tp->scoreboard_skb_hint == NULL)
2153 skb = tcp_write_queue_head(sk);
2154
2155 tcp_for_write_queue_from(skb, sk) {
2156 if (skb == tcp_send_head(sk))
2157 break;
2158 if (!tcp_skb_timedout(sk, skb))
2159 break;
2160
2161 tcp_skb_mark_lost(tp, skb);
2162 }
2163
2164 tp->scoreboard_skb_hint = skb;
2165
2166 tcp_verify_left_out(tp);
2167}
2168
2169
2170
2171
2172
2173
2174
2175static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2176{
2177 struct tcp_sock *tp = tcp_sk(sk);
2178 struct sk_buff *skb;
2179 int cnt, oldcnt;
2180 int err;
2181 unsigned int mss;
2182
2183 const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
2184
2185 WARN_ON(packets > tp->packets_out);
2186 if (tp->lost_skb_hint) {
2187 skb = tp->lost_skb_hint;
2188 cnt = tp->lost_cnt_hint;
2189
2190 if (mark_head && skb != tcp_write_queue_head(sk))
2191 return;
2192 } else {
2193 skb = tcp_write_queue_head(sk);
2194 cnt = 0;
2195 }
2196
2197 tcp_for_write_queue_from(skb, sk) {
2198 if (skb == tcp_send_head(sk))
2199 break;
2200
2201
2202 tp->lost_skb_hint = skb;
2203 tp->lost_cnt_hint = cnt;
2204
2205 if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
2206 break;
2207
2208 oldcnt = cnt;
2209 if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
2210 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2211 cnt += tcp_skb_pcount(skb);
2212
2213 if (cnt > packets) {
2214 if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
2215 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
2216 (oldcnt >= packets))
2217 break;
2218
2219 mss = skb_shinfo(skb)->gso_size;
2220 err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss);
2221 if (err < 0)
2222 break;
2223 cnt = packets;
2224 }
2225
2226 tcp_skb_mark_lost(tp, skb);
2227
2228 if (mark_head)
2229 break;
2230 }
2231 tcp_verify_left_out(tp);
2232}
2233
2234
2235
2236static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2237{
2238 struct tcp_sock *tp = tcp_sk(sk);
2239
2240 if (tcp_is_reno(tp)) {
2241 tcp_mark_head_lost(sk, 1, 1);
2242 } else if (tcp_is_fack(tp)) {
2243 int lost = tp->fackets_out - tp->reordering;
2244 if (lost <= 0)
2245 lost = 1;
2246 tcp_mark_head_lost(sk, lost, 0);
2247 } else {
2248 int sacked_upto = tp->sacked_out - tp->reordering;
2249 if (sacked_upto >= 0)
2250 tcp_mark_head_lost(sk, sacked_upto, 0);
2251 else if (fast_rexmit)
2252 tcp_mark_head_lost(sk, 1, 1);
2253 }
2254
2255 tcp_timeout_skbs(sk);
2256}
2257
2258
2259
2260
2261static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
2262{
2263 tp->snd_cwnd = min(tp->snd_cwnd,
2264 tcp_packets_in_flight(tp) + tcp_max_burst(tp));
2265 tp->snd_cwnd_stamp = tcp_time_stamp;
2266}
2267
2268
2269
2270
2271static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
2272{
2273 return !tp->retrans_stamp ||
2274 (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2275 before(tp->rx_opt.rcv_tsecr, tp->retrans_stamp));
2276}
2277
2278
2279
2280#if FASTRETRANS_DEBUG > 1
2281static void DBGUNDO(struct sock *sk, const char *msg)
2282{
2283 struct tcp_sock *tp = tcp_sk(sk);
2284 struct inet_sock *inet = inet_sk(sk);
2285
2286 if (sk->sk_family == AF_INET) {
2287 pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2288 msg,
2289 &inet->inet_daddr, ntohs(inet->inet_dport),
2290 tp->snd_cwnd, tcp_left_out(tp),
2291 tp->snd_ssthresh, tp->prior_ssthresh,
2292 tp->packets_out);
2293 }
2294#if IS_ENABLED(CONFIG_IPV6)
2295 else if (sk->sk_family == AF_INET6) {
2296 struct ipv6_pinfo *np = inet6_sk(sk);
2297 pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2298 msg,
2299 &np->daddr, ntohs(inet->inet_dport),
2300 tp->snd_cwnd, tcp_left_out(tp),
2301 tp->snd_ssthresh, tp->prior_ssthresh,
2302 tp->packets_out);
2303 }
2304#endif
2305}
2306#else
2307#define DBGUNDO(x...) do { } while (0)
2308#endif
2309
2310static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
2311{
2312 struct tcp_sock *tp = tcp_sk(sk);
2313
2314 if (tp->prior_ssthresh) {
2315 const struct inet_connection_sock *icsk = inet_csk(sk);
2316
2317 if (icsk->icsk_ca_ops->undo_cwnd)
2318 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
2319 else
2320 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
2321
2322 if (undo_ssthresh && tp->prior_ssthresh > tp->snd_ssthresh) {
2323 tp->snd_ssthresh = tp->prior_ssthresh;
2324 TCP_ECN_withdraw_cwr(tp);
2325 }
2326 } else {
2327 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
2328 }
2329 tp->snd_cwnd_stamp = tcp_time_stamp;
2330}
2331
2332static inline bool tcp_may_undo(const struct tcp_sock *tp)
2333{
2334 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2335}
2336
2337
2338static bool tcp_try_undo_recovery(struct sock *sk)
2339{
2340 struct tcp_sock *tp = tcp_sk(sk);
2341
2342 if (tcp_may_undo(tp)) {
2343 int mib_idx;
2344
2345
2346
2347
2348 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2349 tcp_undo_cwr(sk, true);
2350 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2351 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2352 else
2353 mib_idx = LINUX_MIB_TCPFULLUNDO;
2354
2355 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2356 tp->undo_marker = 0;
2357 }
2358 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2359
2360
2361
2362 tcp_moderate_cwnd(tp);
2363 return true;
2364 }
2365 tcp_set_ca_state(sk, TCP_CA_Open);
2366 return false;
2367}
2368
2369
2370static void tcp_try_undo_dsack(struct sock *sk)
2371{
2372 struct tcp_sock *tp = tcp_sk(sk);
2373
2374 if (tp->undo_marker && !tp->undo_retrans) {
2375 DBGUNDO(sk, "D-SACK");
2376 tcp_undo_cwr(sk, true);
2377 tp->undo_marker = 0;
2378 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2379 }
2380}
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396static bool tcp_any_retrans_done(const struct sock *sk)
2397{
2398 const struct tcp_sock *tp = tcp_sk(sk);
2399 struct sk_buff *skb;
2400
2401 if (tp->retrans_out)
2402 return true;
2403
2404 skb = tcp_write_queue_head(sk);
2405 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2406 return true;
2407
2408 return false;
2409}
2410
2411
2412
2413static int tcp_try_undo_partial(struct sock *sk, int acked)
2414{
2415 struct tcp_sock *tp = tcp_sk(sk);
2416
2417 int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering);
2418
2419 if (tcp_may_undo(tp)) {
2420
2421
2422
2423 if (!tcp_any_retrans_done(sk))
2424 tp->retrans_stamp = 0;
2425
2426 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
2427
2428 DBGUNDO(sk, "Hoe");
2429 tcp_undo_cwr(sk, false);
2430 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2431
2432
2433
2434
2435
2436 failed = 0;
2437 }
2438 return failed;
2439}
2440
2441
2442static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
2443{
2444 struct tcp_sock *tp = tcp_sk(sk);
2445
2446 if (frto_undo || tcp_may_undo(tp)) {
2447 struct sk_buff *skb;
2448 tcp_for_write_queue(skb, sk) {
2449 if (skb == tcp_send_head(sk))
2450 break;
2451 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2452 }
2453
2454 tcp_clear_all_retrans_hints(tp);
2455
2456 DBGUNDO(sk, "partial loss");
2457 tp->lost_out = 0;
2458 tcp_undo_cwr(sk, true);
2459 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2460 if (frto_undo)
2461 NET_INC_STATS_BH(sock_net(sk),
2462 LINUX_MIB_TCPSPURIOUSRTOS);
2463 inet_csk(sk)->icsk_retransmits = 0;
2464 tp->undo_marker = 0;
2465 if (frto_undo || tcp_is_sack(tp))
2466 tcp_set_ca_state(sk, TCP_CA_Open);
2467 return true;
2468 }
2469 return false;
2470}
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
2483{
2484 struct tcp_sock *tp = tcp_sk(sk);
2485
2486 tp->high_seq = tp->snd_nxt;
2487 tp->tlp_high_seq = 0;
2488 tp->snd_cwnd_cnt = 0;
2489 tp->prior_cwnd = tp->snd_cwnd;
2490 tp->prr_delivered = 0;
2491 tp->prr_out = 0;
2492 if (set_ssthresh)
2493 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
2494 TCP_ECN_queue_cwr(tp);
2495}
2496
2497static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
2498 int fast_rexmit)
2499{
2500 struct tcp_sock *tp = tcp_sk(sk);
2501 int sndcnt = 0;
2502 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2503
2504 tp->prr_delivered += newly_acked_sacked;
2505 if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
2506 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2507 tp->prior_cwnd - 1;
2508 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2509 } else {
2510 sndcnt = min_t(int, delta,
2511 max_t(int, tp->prr_delivered - tp->prr_out,
2512 newly_acked_sacked) + 1);
2513 }
2514
2515 sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
2516 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2517}
2518
2519static inline void tcp_end_cwnd_reduction(struct sock *sk)
2520{
2521 struct tcp_sock *tp = tcp_sk(sk);
2522
2523
2524 if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
2525 (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
2526 tp->snd_cwnd = tp->snd_ssthresh;
2527 tp->snd_cwnd_stamp = tcp_time_stamp;
2528 }
2529 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2530}
2531
2532
2533void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
2534{
2535 struct tcp_sock *tp = tcp_sk(sk);
2536
2537 tp->prior_ssthresh = 0;
2538 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2539 tp->undo_marker = 0;
2540 tcp_init_cwnd_reduction(sk, set_ssthresh);
2541 tcp_set_ca_state(sk, TCP_CA_CWR);
2542 }
2543}
2544
2545static void tcp_try_keep_open(struct sock *sk)
2546{
2547 struct tcp_sock *tp = tcp_sk(sk);
2548 int state = TCP_CA_Open;
2549
2550 if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
2551 state = TCP_CA_Disorder;
2552
2553 if (inet_csk(sk)->icsk_ca_state != state) {
2554 tcp_set_ca_state(sk, state);
2555 tp->high_seq = tp->snd_nxt;
2556 }
2557}
2558
2559static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)
2560{
2561 struct tcp_sock *tp = tcp_sk(sk);
2562
2563 tcp_verify_left_out(tp);
2564
2565 if (!tcp_any_retrans_done(sk))
2566 tp->retrans_stamp = 0;
2567
2568 if (flag & FLAG_ECE)
2569 tcp_enter_cwr(sk, 1);
2570
2571 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2572 tcp_try_keep_open(sk);
2573 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
2574 tcp_moderate_cwnd(tp);
2575 } else {
2576 tcp_cwnd_reduction(sk, newly_acked_sacked, 0);
2577 }
2578}
2579
2580static void tcp_mtup_probe_failed(struct sock *sk)
2581{
2582 struct inet_connection_sock *icsk = inet_csk(sk);
2583
2584 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2585 icsk->icsk_mtup.probe_size = 0;
2586}
2587
2588static void tcp_mtup_probe_success(struct sock *sk)
2589{
2590 struct tcp_sock *tp = tcp_sk(sk);
2591 struct inet_connection_sock *icsk = inet_csk(sk);
2592
2593
2594 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2595 tp->snd_cwnd = tp->snd_cwnd *
2596 tcp_mss_to_mtu(sk, tp->mss_cache) /
2597 icsk->icsk_mtup.probe_size;
2598 tp->snd_cwnd_cnt = 0;
2599 tp->snd_cwnd_stamp = tcp_time_stamp;
2600 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2601
2602 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2603 icsk->icsk_mtup.probe_size = 0;
2604 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2605}
2606
2607
2608
2609
2610
2611void tcp_simple_retransmit(struct sock *sk)
2612{
2613 const struct inet_connection_sock *icsk = inet_csk(sk);
2614 struct tcp_sock *tp = tcp_sk(sk);
2615 struct sk_buff *skb;
2616 unsigned int mss = tcp_current_mss(sk);
2617 u32 prior_lost = tp->lost_out;
2618
2619 tcp_for_write_queue(skb, sk) {
2620 if (skb == tcp_send_head(sk))
2621 break;
2622 if (tcp_skb_seglen(skb) > mss &&
2623 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2624 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2625 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2626 tp->retrans_out -= tcp_skb_pcount(skb);
2627 }
2628 tcp_skb_mark_lost_uncond_verify(tp, skb);
2629 }
2630 }
2631
2632 tcp_clear_retrans_hints_partial(tp);
2633
2634 if (prior_lost == tp->lost_out)
2635 return;
2636
2637 if (tcp_is_reno(tp))
2638 tcp_limit_reno_sacked(tp);
2639
2640 tcp_verify_left_out(tp);
2641
2642
2643
2644
2645
2646
2647 if (icsk->icsk_ca_state != TCP_CA_Loss) {
2648 tp->high_seq = tp->snd_nxt;
2649 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2650 tp->prior_ssthresh = 0;
2651 tp->undo_marker = 0;
2652 tcp_set_ca_state(sk, TCP_CA_Loss);
2653 }
2654 tcp_xmit_retransmit_queue(sk);
2655}
2656EXPORT_SYMBOL(tcp_simple_retransmit);
2657
2658static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2659{
2660 struct tcp_sock *tp = tcp_sk(sk);
2661 int mib_idx;
2662
2663 if (tcp_is_reno(tp))
2664 mib_idx = LINUX_MIB_TCPRENORECOVERY;
2665 else
2666 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
2667
2668 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2669
2670 tp->prior_ssthresh = 0;
2671 tp->undo_marker = tp->snd_una;
2672 tp->undo_retrans = tp->retrans_out;
2673
2674 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2675 if (!ece_ack)
2676 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2677 tcp_init_cwnd_reduction(sk, true);
2678 }
2679 tcp_set_ca_state(sk, TCP_CA_Recovery);
2680}
2681
2682
2683
2684
2685static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
2686{
2687 struct inet_connection_sock *icsk = inet_csk(sk);
2688 struct tcp_sock *tp = tcp_sk(sk);
2689 bool recovered = !before(tp->snd_una, tp->high_seq);
2690
2691 if (tp->frto) {
2692 if (flag & FLAG_ORIG_SACK_ACKED) {
2693
2694
2695
2696 tcp_try_undo_loss(sk, true);
2697 return;
2698 }
2699 if (after(tp->snd_nxt, tp->high_seq) &&
2700 (flag & FLAG_DATA_SACKED || is_dupack)) {
2701 tp->frto = 0;
2702 } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
2703 tp->high_seq = tp->snd_nxt;
2704 __tcp_push_pending_frames(sk, tcp_current_mss(sk),
2705 TCP_NAGLE_OFF);
2706 if (after(tp->snd_nxt, tp->high_seq))
2707 return;
2708 tp->frto = 0;
2709 }
2710 }
2711
2712 if (recovered) {
2713
2714 icsk->icsk_retransmits = 0;
2715 tcp_try_undo_recovery(sk);
2716 return;
2717 }
2718 if (flag & FLAG_DATA_ACKED)
2719 icsk->icsk_retransmits = 0;
2720 if (tcp_is_reno(tp)) {
2721
2722
2723
2724 if (after(tp->snd_nxt, tp->high_seq) && is_dupack)
2725 tcp_add_reno_sack(sk);
2726 else if (flag & FLAG_SND_UNA_ADVANCED)
2727 tcp_reset_reno_sack(tp);
2728 }
2729 if (tcp_try_undo_loss(sk, false))
2730 return;
2731 tcp_xmit_retransmit_queue(sk);
2732}
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
2746 int prior_sacked, int prior_packets,
2747 bool is_dupack, int flag)
2748{
2749 struct inet_connection_sock *icsk = inet_csk(sk);
2750 struct tcp_sock *tp = tcp_sk(sk);
2751 int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
2752 (tcp_fackets_out(tp) > tp->reordering));
2753 int newly_acked_sacked = 0;
2754 int fast_rexmit = 0;
2755
2756 if (WARN_ON(!tp->packets_out && tp->sacked_out))
2757 tp->sacked_out = 0;
2758 if (WARN_ON(!tp->sacked_out && tp->fackets_out))
2759 tp->fackets_out = 0;
2760
2761
2762
2763 if (flag & FLAG_ECE)
2764 tp->prior_ssthresh = 0;
2765
2766
2767 if (tcp_check_sack_reneging(sk, flag))
2768 return;
2769
2770
2771 tcp_verify_left_out(tp);
2772
2773
2774
2775 if (icsk->icsk_ca_state == TCP_CA_Open) {
2776 WARN_ON(tp->retrans_out != 0);
2777 tp->retrans_stamp = 0;
2778 } else if (!before(tp->snd_una, tp->high_seq)) {
2779 switch (icsk->icsk_ca_state) {
2780 case TCP_CA_CWR:
2781
2782
2783 if (tp->snd_una != tp->high_seq) {
2784 tcp_end_cwnd_reduction(sk);
2785 tcp_set_ca_state(sk, TCP_CA_Open);
2786 }
2787 break;
2788
2789 case TCP_CA_Recovery:
2790 if (tcp_is_reno(tp))
2791 tcp_reset_reno_sack(tp);
2792 if (tcp_try_undo_recovery(sk))
2793 return;
2794 tcp_end_cwnd_reduction(sk);
2795 break;
2796 }
2797 }
2798
2799
2800 switch (icsk->icsk_ca_state) {
2801 case TCP_CA_Recovery:
2802 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
2803 if (tcp_is_reno(tp) && is_dupack)
2804 tcp_add_reno_sack(sk);
2805 } else
2806 do_lost = tcp_try_undo_partial(sk, pkts_acked);
2807 newly_acked_sacked = prior_packets - tp->packets_out +
2808 tp->sacked_out - prior_sacked;
2809 break;
2810 case TCP_CA_Loss:
2811 tcp_process_loss(sk, flag, is_dupack);
2812 if (icsk->icsk_ca_state != TCP_CA_Open)
2813 return;
2814
2815 default:
2816 if (tcp_is_reno(tp)) {
2817 if (flag & FLAG_SND_UNA_ADVANCED)
2818 tcp_reset_reno_sack(tp);
2819 if (is_dupack)
2820 tcp_add_reno_sack(sk);
2821 }
2822 newly_acked_sacked = prior_packets - tp->packets_out +
2823 tp->sacked_out - prior_sacked;
2824
2825 if (icsk->icsk_ca_state <= TCP_CA_Disorder)
2826 tcp_try_undo_dsack(sk);
2827
2828 if (!tcp_time_to_recover(sk, flag)) {
2829 tcp_try_to_open(sk, flag, newly_acked_sacked);
2830 return;
2831 }
2832
2833
2834 if (icsk->icsk_ca_state < TCP_CA_CWR &&
2835 icsk->icsk_mtup.probe_size &&
2836 tp->snd_una == tp->mtu_probe.probe_seq_start) {
2837 tcp_mtup_probe_failed(sk);
2838
2839 tp->snd_cwnd++;
2840 tcp_simple_retransmit(sk);
2841 return;
2842 }
2843
2844
2845 tcp_enter_recovery(sk, (flag & FLAG_ECE));
2846 fast_rexmit = 1;
2847 }
2848
2849 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
2850 tcp_update_scoreboard(sk, fast_rexmit);
2851 tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit);
2852 tcp_xmit_retransmit_queue(sk);
2853}
2854
2855void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
2856{
2857 tcp_rtt_estimator(sk, seq_rtt);
2858 tcp_set_rto(sk);
2859 inet_csk(sk)->icsk_backoff = 0;
2860}
2861EXPORT_SYMBOL(tcp_valid_rtt_meas);
2862
2863
2864
2865
2866static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
2867{
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883 struct tcp_sock *tp = tcp_sk(sk);
2884
2885 tcp_valid_rtt_meas(sk, tcp_time_stamp - tp->rx_opt.rcv_tsecr);
2886}
2887
2888static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag)
2889{
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899 if (flag & FLAG_RETRANS_DATA_ACKED)
2900 return;
2901
2902 tcp_valid_rtt_meas(sk, seq_rtt);
2903}
2904
2905static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
2906 const s32 seq_rtt)
2907{
2908 const struct tcp_sock *tp = tcp_sk(sk);
2909
2910 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
2911 tcp_ack_saw_tstamp(sk, flag);
2912 else if (seq_rtt >= 0)
2913 tcp_ack_no_tstamp(sk, seq_rtt, flag);
2914}
2915
2916static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
2917{
2918 const struct inet_connection_sock *icsk = inet_csk(sk);
2919 icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight);
2920 tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
2921}
2922
2923
2924
2925
2926void tcp_rearm_rto(struct sock *sk)
2927{
2928 const struct inet_connection_sock *icsk = inet_csk(sk);
2929 struct tcp_sock *tp = tcp_sk(sk);
2930
2931
2932
2933
2934 if (tp->fastopen_rsk)
2935 return;
2936
2937 if (!tp->packets_out) {
2938 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
2939 } else {
2940 u32 rto = inet_csk(sk)->icsk_rto;
2941
2942 if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2943 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2944 struct sk_buff *skb = tcp_write_queue_head(sk);
2945 const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto;
2946 s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);
2947
2948
2949
2950 if (delta > 0)
2951 rto = delta;
2952 }
2953 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
2954 TCP_RTO_MAX);
2955 }
2956}
2957
2958
2959
2960
2961void tcp_resume_early_retransmit(struct sock *sk)
2962{
2963 struct tcp_sock *tp = tcp_sk(sk);
2964
2965 tcp_rearm_rto(sk);
2966
2967
2968 if (!tp->do_early_retrans)
2969 return;
2970
2971 tcp_enter_recovery(sk, false);
2972 tcp_update_scoreboard(sk, 1);
2973 tcp_xmit_retransmit_queue(sk);
2974}
2975
2976
2977static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
2978{
2979 struct tcp_sock *tp = tcp_sk(sk);
2980 u32 packets_acked;
2981
2982 BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
2983
2984 packets_acked = tcp_skb_pcount(skb);
2985 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
2986 return 0;
2987 packets_acked -= tcp_skb_pcount(skb);
2988
2989 if (packets_acked) {
2990 BUG_ON(tcp_skb_pcount(skb) == 0);
2991 BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
2992 }
2993
2994 return packets_acked;
2995}
2996
2997
2998
2999
3000
3001static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3002 u32 prior_snd_una)
3003{
3004 struct tcp_sock *tp = tcp_sk(sk);
3005 const struct inet_connection_sock *icsk = inet_csk(sk);
3006 struct sk_buff *skb;
3007 u32 now = tcp_time_stamp;
3008 int fully_acked = true;
3009 int flag = 0;
3010 u32 pkts_acked = 0;
3011 u32 reord = tp->packets_out;
3012 u32 prior_sacked = tp->sacked_out;
3013 s32 seq_rtt = -1;
3014 s32 ca_seq_rtt = -1;
3015 ktime_t last_ackt = net_invalid_timestamp();
3016
3017 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
3018 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3019 u32 acked_pcount;
3020 u8 sacked = scb->sacked;
3021
3022
3023 if (after(scb->end_seq, tp->snd_una)) {
3024 if (tcp_skb_pcount(skb) == 1 ||
3025 !after(tp->snd_una, scb->seq))
3026 break;
3027
3028 acked_pcount = tcp_tso_acked(sk, skb);
3029 if (!acked_pcount)
3030 break;
3031
3032 fully_acked = false;
3033 } else {
3034 acked_pcount = tcp_skb_pcount(skb);
3035 }
3036
3037 if (sacked & TCPCB_RETRANS) {
3038 if (sacked & TCPCB_SACKED_RETRANS)
3039 tp->retrans_out -= acked_pcount;
3040 flag |= FLAG_RETRANS_DATA_ACKED;
3041 ca_seq_rtt = -1;
3042 seq_rtt = -1;
3043 } else {
3044 ca_seq_rtt = now - scb->when;
3045 last_ackt = skb->tstamp;
3046 if (seq_rtt < 0) {
3047 seq_rtt = ca_seq_rtt;
3048 }
3049 if (!(sacked & TCPCB_SACKED_ACKED))
3050 reord = min(pkts_acked, reord);
3051 if (!after(scb->end_seq, tp->high_seq))
3052 flag |= FLAG_ORIG_SACK_ACKED;
3053 }
3054
3055 if (sacked & TCPCB_SACKED_ACKED)
3056 tp->sacked_out -= acked_pcount;
3057 if (sacked & TCPCB_LOST)
3058 tp->lost_out -= acked_pcount;
3059
3060 tp->packets_out -= acked_pcount;
3061 pkts_acked += acked_pcount;
3062
3063
3064
3065
3066
3067
3068
3069
3070 if (!(scb->tcp_flags & TCPHDR_SYN)) {
3071 flag |= FLAG_DATA_ACKED;
3072 } else {
3073 flag |= FLAG_SYN_ACKED;
3074 tp->retrans_stamp = 0;
3075 }
3076
3077 if (!fully_acked)
3078 break;
3079
3080 tcp_unlink_write_queue(skb, sk);
3081 sk_wmem_free_skb(sk, skb);
3082 tp->scoreboard_skb_hint = NULL;
3083 if (skb == tp->retransmit_skb_hint)
3084 tp->retransmit_skb_hint = NULL;
3085 if (skb == tp->lost_skb_hint)
3086 tp->lost_skb_hint = NULL;
3087 }
3088
3089 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
3090 tp->snd_up = tp->snd_una;
3091
3092 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
3093 flag |= FLAG_SACK_RENEGING;
3094
3095 if (flag & FLAG_ACKED) {
3096 const struct tcp_congestion_ops *ca_ops
3097 = inet_csk(sk)->icsk_ca_ops;
3098
3099 if (unlikely(icsk->icsk_mtup.probe_size &&
3100 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
3101 tcp_mtup_probe_success(sk);
3102 }
3103
3104 tcp_ack_update_rtt(sk, flag, seq_rtt);
3105 tcp_rearm_rto(sk);
3106
3107 if (tcp_is_reno(tp)) {
3108 tcp_remove_reno_sacks(sk, pkts_acked);
3109 } else {
3110 int delta;
3111
3112
3113 if (reord < prior_fackets)
3114 tcp_update_reordering(sk, tp->fackets_out - reord, 0);
3115
3116 delta = tcp_is_fack(tp) ? pkts_acked :
3117 prior_sacked - tp->sacked_out;
3118 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3119 }
3120
3121 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
3122
3123 if (ca_ops->pkts_acked) {
3124 s32 rtt_us = -1;
3125
3126
3127 if (!(flag & FLAG_RETRANS_DATA_ACKED)) {
3128
3129 if (ca_ops->flags & TCP_CONG_RTT_STAMP &&
3130 !ktime_equal(last_ackt,
3131 net_invalid_timestamp()))
3132 rtt_us = ktime_us_delta(ktime_get_real(),
3133 last_ackt);
3134 else if (ca_seq_rtt >= 0)
3135 rtt_us = jiffies_to_usecs(ca_seq_rtt);
3136 }
3137
3138 ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
3139 }
3140 }
3141
3142#if FASTRETRANS_DEBUG > 0
3143 WARN_ON((int)tp->sacked_out < 0);
3144 WARN_ON((int)tp->lost_out < 0);
3145 WARN_ON((int)tp->retrans_out < 0);
3146 if (!tp->packets_out && tcp_is_sack(tp)) {
3147 icsk = inet_csk(sk);
3148 if (tp->lost_out) {
3149 pr_debug("Leak l=%u %d\n",
3150 tp->lost_out, icsk->icsk_ca_state);
3151 tp->lost_out = 0;
3152 }
3153 if (tp->sacked_out) {
3154 pr_debug("Leak s=%u %d\n",
3155 tp->sacked_out, icsk->icsk_ca_state);
3156 tp->sacked_out = 0;
3157 }
3158 if (tp->retrans_out) {
3159 pr_debug("Leak r=%u %d\n",
3160 tp->retrans_out, icsk->icsk_ca_state);
3161 tp->retrans_out = 0;
3162 }
3163 }
3164#endif
3165 return flag;
3166}
3167
3168static void tcp_ack_probe(struct sock *sk)
3169{
3170 const struct tcp_sock *tp = tcp_sk(sk);
3171 struct inet_connection_sock *icsk = inet_csk(sk);
3172
3173
3174
3175 if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
3176 icsk->icsk_backoff = 0;
3177 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
3178
3179
3180
3181 } else {
3182 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3183 min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
3184 TCP_RTO_MAX);
3185 }
3186}
3187
3188static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
3189{
3190 return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3191 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3192}
3193
3194static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3195{
3196 const struct tcp_sock *tp = tcp_sk(sk);
3197 return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
3198 !tcp_in_cwnd_reduction(sk);
3199}
3200
3201
3202
3203
3204static inline bool tcp_may_update_window(const struct tcp_sock *tp,
3205 const u32 ack, const u32 ack_seq,
3206 const u32 nwin)
3207{
3208 return after(ack, tp->snd_una) ||
3209 after(ack_seq, tp->snd_wl1) ||
3210 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
3211}
3212
3213
3214
3215
3216
3217
3218static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
3219 u32 ack_seq)
3220{
3221 struct tcp_sock *tp = tcp_sk(sk);
3222 int flag = 0;
3223 u32 nwin = ntohs(tcp_hdr(skb)->window);
3224
3225 if (likely(!tcp_hdr(skb)->syn))
3226 nwin <<= tp->rx_opt.snd_wscale;
3227
3228 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
3229 flag |= FLAG_WIN_UPDATE;
3230 tcp_update_wl(tp, ack_seq);
3231
3232 if (tp->snd_wnd != nwin) {
3233 tp->snd_wnd = nwin;
3234
3235
3236
3237
3238 tp->pred_flags = 0;
3239 tcp_fast_path_check(sk);
3240
3241 if (nwin > tp->max_window) {
3242 tp->max_window = nwin;
3243 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
3244 }
3245 }
3246 }
3247
3248 tp->snd_una = ack;
3249
3250 return flag;
3251}
3252
3253
3254static void tcp_send_challenge_ack(struct sock *sk)
3255{
3256
3257 static u32 challenge_timestamp;
3258 static unsigned int challenge_count;
3259 u32 now = jiffies / HZ;
3260
3261 if (now != challenge_timestamp) {
3262 challenge_timestamp = now;
3263 challenge_count = 0;
3264 }
3265 if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
3266 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
3267 tcp_send_ack(sk);
3268 }
3269}
3270
3271static void tcp_store_ts_recent(struct tcp_sock *tp)
3272{
3273 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
3274 tp->rx_opt.ts_recent_stamp = get_seconds();
3275}
3276
3277static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3278{
3279 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
3280
3281
3282
3283
3284
3285
3286
3287 if (tcp_paws_check(&tp->rx_opt, 0))
3288 tcp_store_ts_recent(tp);
3289 }
3290}
3291
3292
3293
3294
3295static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
3296{
3297 struct tcp_sock *tp = tcp_sk(sk);
3298 bool is_tlp_dupack = (ack == tp->tlp_high_seq) &&
3299 !(flag & (FLAG_SND_UNA_ADVANCED |
3300 FLAG_NOT_DUP | FLAG_DATA_SACKED));
3301
3302
3303
3304
3305 if (is_tlp_dupack) {
3306 tp->tlp_high_seq = 0;
3307 return;
3308 }
3309
3310 if (after(ack, tp->tlp_high_seq)) {
3311 tp->tlp_high_seq = 0;
3312
3313 if (!(flag & FLAG_DSACKING_ACK)) {
3314 tcp_init_cwnd_reduction(sk, true);
3315 tcp_set_ca_state(sk, TCP_CA_CWR);
3316 tcp_end_cwnd_reduction(sk);
3317 tcp_set_ca_state(sk, TCP_CA_Open);
3318 NET_INC_STATS_BH(sock_net(sk),
3319 LINUX_MIB_TCPLOSSPROBERECOVERY);
3320 }
3321 }
3322}
3323
3324
3325static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3326{
3327 struct inet_connection_sock *icsk = inet_csk(sk);
3328 struct tcp_sock *tp = tcp_sk(sk);
3329 u32 prior_snd_una = tp->snd_una;
3330 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3331 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3332 bool is_dupack = false;
3333 u32 prior_in_flight;
3334 u32 prior_fackets;
3335 int prior_packets = tp->packets_out;
3336 int prior_sacked = tp->sacked_out;
3337 int pkts_acked = 0;
3338 int previous_packets_out = 0;
3339
3340
3341
3342
3343 if (before(ack, prior_snd_una)) {
3344
3345 if (before(ack, prior_snd_una - tp->max_window)) {
3346 tcp_send_challenge_ack(sk);
3347 return -1;
3348 }
3349 goto old_ack;
3350 }
3351
3352
3353
3354
3355 if (after(ack, tp->snd_nxt))
3356 goto invalid_ack;
3357
3358 if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
3359 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
3360 tcp_rearm_rto(sk);
3361
3362 if (after(ack, prior_snd_una))
3363 flag |= FLAG_SND_UNA_ADVANCED;
3364
3365 prior_fackets = tp->fackets_out;
3366 prior_in_flight = tcp_packets_in_flight(tp);
3367
3368
3369
3370
3371 if (flag & FLAG_UPDATE_TS_RECENT)
3372 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
3373
3374 if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
3375
3376
3377
3378
3379 tcp_update_wl(tp, ack_seq);
3380 tp->snd_una = ack;
3381 flag |= FLAG_WIN_UPDATE;
3382
3383 tcp_ca_event(sk, CA_EVENT_FAST_ACK);
3384
3385 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS);
3386 } else {
3387 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3388 flag |= FLAG_DATA;
3389 else
3390 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS);
3391
3392 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3393
3394 if (TCP_SKB_CB(skb)->sacked)
3395 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3396
3397 if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
3398 flag |= FLAG_ECE;
3399
3400 tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
3401 }
3402
3403
3404
3405
3406 sk->sk_err_soft = 0;
3407 icsk->icsk_probes_out = 0;
3408 tp->rcv_tstamp = tcp_time_stamp;
3409 if (!prior_packets)
3410 goto no_queue;
3411
3412
3413 previous_packets_out = tp->packets_out;
3414 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
3415
3416 pkts_acked = previous_packets_out - tp->packets_out;
3417
3418 if (tcp_ack_is_dubious(sk, flag)) {
3419
3420 if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
3421 tcp_cong_avoid(sk, ack, prior_in_flight);
3422 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3423 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
3424 prior_packets, is_dupack, flag);
3425 } else {
3426 if (flag & FLAG_DATA_ACKED)
3427 tcp_cong_avoid(sk, ack, prior_in_flight);
3428 }
3429
3430 if (tp->tlp_high_seq)
3431 tcp_process_tlp_ack(sk, ack, flag);
3432
3433 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
3434 struct dst_entry *dst = __sk_dst_get(sk);
3435 if (dst)
3436 dst_confirm(dst);
3437 }
3438
3439 if (icsk->icsk_pending == ICSK_TIME_RETRANS)
3440 tcp_schedule_loss_probe(sk);
3441 return 1;
3442
3443no_queue:
3444
3445 if (flag & FLAG_DSACKING_ACK)
3446 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
3447 prior_packets, is_dupack, flag);
3448
3449
3450
3451
3452 if (tcp_send_head(sk))
3453 tcp_ack_probe(sk);
3454
3455 if (tp->tlp_high_seq)
3456 tcp_process_tlp_ack(sk, ack, flag);
3457 return 1;
3458
3459invalid_ack:
3460 SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3461 return -1;
3462
3463old_ack:
3464
3465
3466
3467 if (TCP_SKB_CB(skb)->sacked) {
3468 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3469 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
3470 prior_packets, is_dupack, flag);
3471 }
3472
3473 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3474 return 0;
3475}
3476
3477
3478
3479
3480
3481void tcp_parse_options(const struct sk_buff *skb,
3482 struct tcp_options_received *opt_rx, int estab,
3483 struct tcp_fastopen_cookie *foc)
3484{
3485 const unsigned char *ptr;
3486 const struct tcphdr *th = tcp_hdr(skb);
3487 int length = (th->doff * 4) - sizeof(struct tcphdr);
3488
3489 ptr = (const unsigned char *)(th + 1);
3490 opt_rx->saw_tstamp = 0;
3491
3492 while (length > 0) {
3493 int opcode = *ptr++;
3494 int opsize;
3495
3496 switch (opcode) {
3497 case TCPOPT_EOL:
3498 return;
3499 case TCPOPT_NOP:
3500 length--;
3501 continue;
3502 default:
3503 opsize = *ptr++;
3504 if (opsize < 2)
3505 return;
3506 if (opsize > length)
3507 return;
3508 switch (opcode) {
3509 case TCPOPT_MSS:
3510 if (opsize == TCPOLEN_MSS && th->syn && !estab) {
3511 u16 in_mss = get_unaligned_be16(ptr);
3512 if (in_mss) {
3513 if (opt_rx->user_mss &&
3514 opt_rx->user_mss < in_mss)
3515 in_mss = opt_rx->user_mss;
3516 opt_rx->mss_clamp = in_mss;
3517 }
3518 }
3519 break;
3520 case TCPOPT_WINDOW:
3521 if (opsize == TCPOLEN_WINDOW && th->syn &&
3522 !estab && sysctl_tcp_window_scaling) {
3523 __u8 snd_wscale = *(__u8 *)ptr;
3524 opt_rx->wscale_ok = 1;
3525 if (snd_wscale > 14) {
3526 net_info_ratelimited("%s: Illegal window scaling value %d >14 received\n",
3527 __func__,
3528 snd_wscale);
3529 snd_wscale = 14;
3530 }
3531 opt_rx->snd_wscale = snd_wscale;
3532 }
3533 break;
3534 case TCPOPT_TIMESTAMP:
3535 if ((opsize == TCPOLEN_TIMESTAMP) &&
3536 ((estab && opt_rx->tstamp_ok) ||
3537 (!estab && sysctl_tcp_timestamps))) {
3538 opt_rx->saw_tstamp = 1;
3539 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
3540 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
3541 }
3542 break;
3543 case TCPOPT_SACK_PERM:
3544 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3545 !estab && sysctl_tcp_sack) {
3546 opt_rx->sack_ok = TCP_SACK_SEEN;
3547 tcp_sack_reset(opt_rx);
3548 }
3549 break;
3550
3551 case TCPOPT_SACK:
3552 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
3553 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
3554 opt_rx->sack_ok) {
3555 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
3556 }
3557 break;
3558#ifdef CONFIG_TCP_MD5SIG
3559 case TCPOPT_MD5SIG:
3560
3561
3562
3563
3564 break;
3565#endif
3566 case TCPOPT_EXP:
3567
3568
3569
3570
3571 if (opsize < TCPOLEN_EXP_FASTOPEN_BASE ||
3572 get_unaligned_be16(ptr) != TCPOPT_FASTOPEN_MAGIC ||
3573 foc == NULL || !th->syn || (opsize & 1))
3574 break;
3575 foc->len = opsize - TCPOLEN_EXP_FASTOPEN_BASE;
3576 if (foc->len >= TCP_FASTOPEN_COOKIE_MIN &&
3577 foc->len <= TCP_FASTOPEN_COOKIE_MAX)
3578 memcpy(foc->val, ptr + 2, foc->len);
3579 else if (foc->len != 0)
3580 foc->len = -1;
3581 break;
3582
3583 }
3584 ptr += opsize-2;
3585 length -= opsize;
3586 }
3587 }
3588}
3589EXPORT_SYMBOL(tcp_parse_options);
3590
3591static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
3592{
3593 const __be32 *ptr = (const __be32 *)(th + 1);
3594
3595 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
3596 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
3597 tp->rx_opt.saw_tstamp = 1;
3598 ++ptr;
3599 tp->rx_opt.rcv_tsval = ntohl(*ptr);
3600 ++ptr;
3601 tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
3602 return true;
3603 }
3604 return false;
3605}
3606
3607
3608
3609
3610static bool tcp_fast_parse_options(const struct sk_buff *skb,
3611 const struct tcphdr *th, struct tcp_sock *tp)
3612{
3613
3614
3615
3616 if (th->doff == (sizeof(*th) / 4)) {
3617 tp->rx_opt.saw_tstamp = 0;
3618 return false;
3619 } else if (tp->rx_opt.tstamp_ok &&
3620 th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
3621 if (tcp_parse_aligned_timestamp(tp, th))
3622 return true;
3623 }
3624
3625 tcp_parse_options(skb, &tp->rx_opt, 1, NULL);
3626 if (tp->rx_opt.saw_tstamp)
3627 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
3628
3629 return true;
3630}
3631
3632#ifdef CONFIG_TCP_MD5SIG
3633
3634
3635
3636const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
3637{
3638 int length = (th->doff << 2) - sizeof(*th);
3639 const u8 *ptr = (const u8 *)(th + 1);
3640
3641
3642 if (length < TCPOLEN_MD5SIG)
3643 return NULL;
3644
3645 while (length > 0) {
3646 int opcode = *ptr++;
3647 int opsize;
3648
3649 switch(opcode) {
3650 case TCPOPT_EOL:
3651 return NULL;
3652 case TCPOPT_NOP:
3653 length--;
3654 continue;
3655 default:
3656 opsize = *ptr++;
3657 if (opsize < 2 || opsize > length)
3658 return NULL;
3659 if (opcode == TCPOPT_MD5SIG)
3660 return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
3661 }
3662 ptr += opsize - 2;
3663 length -= opsize;
3664 }
3665 return NULL;
3666}
3667EXPORT_SYMBOL(tcp_parse_md5sig_option);
3668#endif
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
3694{
3695 const struct tcp_sock *tp = tcp_sk(sk);
3696 const struct tcphdr *th = tcp_hdr(skb);
3697 u32 seq = TCP_SKB_CB(skb)->seq;
3698 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3699
3700 return (
3701 (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
3702
3703
3704 ack == tp->snd_una &&
3705
3706
3707 !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
3708
3709
3710 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
3711}
3712
3713static inline bool tcp_paws_discard(const struct sock *sk,
3714 const struct sk_buff *skb)
3715{
3716 const struct tcp_sock *tp = tcp_sk(sk);
3717
3718 return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
3719 !tcp_disordered_ack(sk, skb);
3720}
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
3736{
3737 return !before(end_seq, tp->rcv_wup) &&
3738 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
3739}
3740
3741
3742void tcp_reset(struct sock *sk)
3743{
3744
3745 switch (sk->sk_state) {
3746 case TCP_SYN_SENT:
3747 sk->sk_err = ECONNREFUSED;
3748 break;
3749 case TCP_CLOSE_WAIT:
3750 sk->sk_err = EPIPE;
3751 break;
3752 case TCP_CLOSE:
3753 return;
3754 default:
3755 sk->sk_err = ECONNRESET;
3756 }
3757
3758 smp_wmb();
3759
3760 if (!sock_flag(sk, SOCK_DEAD))
3761 sk->sk_error_report(sk);
3762
3763 tcp_done(sk);
3764}
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780static void tcp_fin(struct sock *sk)
3781{
3782 struct tcp_sock *tp = tcp_sk(sk);
3783
3784 inet_csk_schedule_ack(sk);
3785
3786 sk->sk_shutdown |= RCV_SHUTDOWN;
3787 sock_set_flag(sk, SOCK_DONE);
3788
3789 switch (sk->sk_state) {
3790 case TCP_SYN_RECV:
3791 case TCP_ESTABLISHED:
3792
3793 tcp_set_state(sk, TCP_CLOSE_WAIT);
3794 inet_csk(sk)->icsk_ack.pingpong = 1;
3795 break;
3796
3797 case TCP_CLOSE_WAIT:
3798 case TCP_CLOSING:
3799
3800
3801
3802 break;
3803 case TCP_LAST_ACK:
3804
3805 break;
3806
3807 case TCP_FIN_WAIT1:
3808
3809
3810
3811
3812 tcp_send_ack(sk);
3813 tcp_set_state(sk, TCP_CLOSING);
3814 break;
3815 case TCP_FIN_WAIT2:
3816
3817 tcp_send_ack(sk);
3818 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
3819 break;
3820 default:
3821
3822
3823
3824 pr_err("%s: Impossible, sk->sk_state=%d\n",
3825 __func__, sk->sk_state);
3826 break;
3827 }
3828
3829
3830
3831
3832 __skb_queue_purge(&tp->out_of_order_queue);
3833 if (tcp_is_sack(tp))
3834 tcp_sack_reset(&tp->rx_opt);
3835 sk_mem_reclaim(sk);
3836
3837 if (!sock_flag(sk, SOCK_DEAD)) {
3838 sk->sk_state_change(sk);
3839
3840
3841 if (sk->sk_shutdown == SHUTDOWN_MASK ||
3842 sk->sk_state == TCP_CLOSE)
3843 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
3844 else
3845 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
3846 }
3847}
3848
3849static inline bool tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
3850 u32 end_seq)
3851{
3852 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
3853 if (before(seq, sp->start_seq))
3854 sp->start_seq = seq;
3855 if (after(end_seq, sp->end_seq))
3856 sp->end_seq = end_seq;
3857 return true;
3858 }
3859 return false;
3860}
3861
3862static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
3863{
3864 struct tcp_sock *tp = tcp_sk(sk);
3865
3866 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
3867 int mib_idx;
3868
3869 if (before(seq, tp->rcv_nxt))
3870 mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
3871 else
3872 mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
3873
3874 NET_INC_STATS_BH(sock_net(sk), mib_idx);
3875
3876 tp->rx_opt.dsack = 1;
3877 tp->duplicate_sack[0].start_seq = seq;
3878 tp->duplicate_sack[0].end_seq = end_seq;
3879 }
3880}
3881
3882static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
3883{
3884 struct tcp_sock *tp = tcp_sk(sk);
3885
3886 if (!tp->rx_opt.dsack)
3887 tcp_dsack_set(sk, seq, end_seq);
3888 else
3889 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
3890}
3891
3892static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
3893{
3894 struct tcp_sock *tp = tcp_sk(sk);
3895
3896 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
3897 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
3898 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
3899 tcp_enter_quickack_mode(sk);
3900
3901 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
3902 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
3903
3904 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
3905 end_seq = tp->rcv_nxt;
3906 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
3907 }
3908 }
3909
3910 tcp_send_ack(sk);
3911}
3912
3913
3914
3915
3916static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
3917{
3918 int this_sack;
3919 struct tcp_sack_block *sp = &tp->selective_acks[0];
3920 struct tcp_sack_block *swalk = sp + 1;
3921
3922
3923
3924
3925 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
3926 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
3927 int i;
3928
3929
3930
3931
3932 tp->rx_opt.num_sacks--;
3933 for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
3934 sp[i] = sp[i + 1];
3935 continue;
3936 }
3937 this_sack++, swalk++;
3938 }
3939}
3940
3941static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
3942{
3943 struct tcp_sock *tp = tcp_sk(sk);
3944 struct tcp_sack_block *sp = &tp->selective_acks[0];
3945 int cur_sacks = tp->rx_opt.num_sacks;
3946 int this_sack;
3947
3948 if (!cur_sacks)
3949 goto new_sack;
3950
3951 for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
3952 if (tcp_sack_extend(sp, seq, end_seq)) {
3953
3954 for (; this_sack > 0; this_sack--, sp--)
3955 swap(*sp, *(sp - 1));
3956 if (cur_sacks > 1)
3957 tcp_sack_maybe_coalesce(tp);
3958 return;
3959 }
3960 }
3961
3962
3963
3964
3965
3966
3967
3968 if (this_sack >= TCP_NUM_SACKS) {
3969 this_sack--;
3970 tp->rx_opt.num_sacks--;
3971 sp--;
3972 }
3973 for (; this_sack > 0; this_sack--, sp--)
3974 *sp = *(sp - 1);
3975
3976new_sack:
3977
3978 sp->start_seq = seq;
3979 sp->end_seq = end_seq;
3980 tp->rx_opt.num_sacks++;
3981}
3982
3983
3984
3985static void tcp_sack_remove(struct tcp_sock *tp)
3986{
3987 struct tcp_sack_block *sp = &tp->selective_acks[0];
3988 int num_sacks = tp->rx_opt.num_sacks;
3989 int this_sack;
3990
3991
3992 if (skb_queue_empty(&tp->out_of_order_queue)) {
3993 tp->rx_opt.num_sacks = 0;
3994 return;
3995 }
3996
3997 for (this_sack = 0; this_sack < num_sacks;) {
3998
3999 if (!before(tp->rcv_nxt, sp->start_seq)) {
4000 int i;
4001
4002
4003 WARN_ON(before(tp->rcv_nxt, sp->end_seq));
4004
4005
4006 for (i=this_sack+1; i < num_sacks; i++)
4007 tp->selective_acks[i-1] = tp->selective_acks[i];
4008 num_sacks--;
4009 continue;
4010 }
4011 this_sack++;
4012 sp++;
4013 }
4014 tp->rx_opt.num_sacks = num_sacks;
4015}
4016
4017
4018
4019
4020static void tcp_ofo_queue(struct sock *sk)
4021{
4022 struct tcp_sock *tp = tcp_sk(sk);
4023 __u32 dsack_high = tp->rcv_nxt;
4024 struct sk_buff *skb;
4025
4026 while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
4027 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
4028 break;
4029
4030 if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
4031 __u32 dsack = dsack_high;
4032 if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
4033 dsack_high = TCP_SKB_CB(skb)->end_seq;
4034 tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
4035 }
4036
4037 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4038 SOCK_DEBUG(sk, "ofo packet was already received\n");
4039 __skb_unlink(skb, &tp->out_of_order_queue);
4040 __kfree_skb(skb);
4041 continue;
4042 }
4043 SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
4044 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4045 TCP_SKB_CB(skb)->end_seq);
4046
4047 __skb_unlink(skb, &tp->out_of_order_queue);
4048 __skb_queue_tail(&sk->sk_receive_queue, skb);
4049 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4050 if (tcp_hdr(skb)->fin)
4051 tcp_fin(sk);
4052 }
4053}
4054
4055static bool tcp_prune_ofo_queue(struct sock *sk);
4056static int tcp_prune_queue(struct sock *sk);
4057
4058static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
4059 unsigned int size)
4060{
4061 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
4062 !sk_rmem_schedule(sk, skb, size)) {
4063
4064 if (tcp_prune_queue(sk) < 0)
4065 return -1;
4066
4067 if (!sk_rmem_schedule(sk, skb, size)) {
4068 if (!tcp_prune_ofo_queue(sk))
4069 return -1;
4070
4071 if (!sk_rmem_schedule(sk, skb, size))
4072 return -1;
4073 }
4074 }
4075 return 0;
4076}
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091static bool tcp_try_coalesce(struct sock *sk,
4092 struct sk_buff *to,
4093 struct sk_buff *from,
4094 bool *fragstolen)
4095{
4096 int delta;
4097
4098 *fragstolen = false;
4099
4100 if (tcp_hdr(from)->fin)
4101 return false;
4102
4103
4104 if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
4105 return false;
4106
4107 if (!skb_try_coalesce(to, from, fragstolen, &delta))
4108 return false;
4109
4110 atomic_add(delta, &sk->sk_rmem_alloc);
4111 sk_mem_charge(sk, delta);
4112 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
4113 TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
4114 TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
4115 return true;
4116}
4117
4118static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4119{
4120 struct tcp_sock *tp = tcp_sk(sk);
4121 struct sk_buff *skb1;
4122 u32 seq, end_seq;
4123
4124 TCP_ECN_check_ce(tp, skb);
4125
4126 if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
4127 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP);
4128 __kfree_skb(skb);
4129 return;
4130 }
4131
4132
4133 tp->pred_flags = 0;
4134 inet_csk_schedule_ack(sk);
4135
4136 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
4137 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
4138 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4139
4140 skb1 = skb_peek_tail(&tp->out_of_order_queue);
4141 if (!skb1) {
4142
4143 if (tcp_is_sack(tp)) {
4144 tp->rx_opt.num_sacks = 1;
4145 tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
4146 tp->selective_acks[0].end_seq =
4147 TCP_SKB_CB(skb)->end_seq;
4148 }
4149 __skb_queue_head(&tp->out_of_order_queue, skb);
4150 goto end;
4151 }
4152
4153 seq = TCP_SKB_CB(skb)->seq;
4154 end_seq = TCP_SKB_CB(skb)->end_seq;
4155
4156 if (seq == TCP_SKB_CB(skb1)->end_seq) {
4157 bool fragstolen;
4158
4159 if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
4160 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4161 } else {
4162 kfree_skb_partial(skb, fragstolen);
4163 skb = NULL;
4164 }
4165
4166 if (!tp->rx_opt.num_sacks ||
4167 tp->selective_acks[0].end_seq != seq)
4168 goto add_sack;
4169
4170
4171 tp->selective_acks[0].end_seq = end_seq;
4172 goto end;
4173 }
4174
4175
4176 while (1) {
4177 if (!after(TCP_SKB_CB(skb1)->seq, seq))
4178 break;
4179 if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
4180 skb1 = NULL;
4181 break;
4182 }
4183 skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
4184 }
4185
4186
4187 if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4188 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4189
4190 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
4191 __kfree_skb(skb);
4192 skb = NULL;
4193 tcp_dsack_set(sk, seq, end_seq);
4194 goto add_sack;
4195 }
4196 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4197
4198 tcp_dsack_set(sk, seq,
4199 TCP_SKB_CB(skb1)->end_seq);
4200 } else {
4201 if (skb_queue_is_first(&tp->out_of_order_queue,
4202 skb1))
4203 skb1 = NULL;
4204 else
4205 skb1 = skb_queue_prev(
4206 &tp->out_of_order_queue,
4207 skb1);
4208 }
4209 }
4210 if (!skb1)
4211 __skb_queue_head(&tp->out_of_order_queue, skb);
4212 else
4213 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4214
4215
4216 while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
4217 skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
4218
4219 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4220 break;
4221 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4222 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4223 end_seq);
4224 break;
4225 }
4226 __skb_unlink(skb1, &tp->out_of_order_queue);
4227 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4228 TCP_SKB_CB(skb1)->end_seq);
4229 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
4230 __kfree_skb(skb1);
4231 }
4232
4233add_sack:
4234 if (tcp_is_sack(tp))
4235 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4236end:
4237 if (skb)
4238 skb_set_owner_r(skb, sk);
4239}
4240
4241static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
4242 bool *fragstolen)
4243{
4244 int eaten;
4245 struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
4246
4247 __skb_pull(skb, hdrlen);
4248 eaten = (tail &&
4249 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
4250 tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4251 if (!eaten) {
4252 __skb_queue_tail(&sk->sk_receive_queue, skb);
4253 skb_set_owner_r(skb, sk);
4254 }
4255 return eaten;
4256}
4257
4258int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
4259{
4260 struct sk_buff *skb = NULL;
4261 struct tcphdr *th;
4262 bool fragstolen;
4263
4264 if (size == 0)
4265 return 0;
4266
4267 skb = alloc_skb(size + sizeof(*th), sk->sk_allocation);
4268 if (!skb)
4269 goto err;
4270
4271 if (tcp_try_rmem_schedule(sk, skb, size + sizeof(*th)))
4272 goto err_free;
4273
4274 th = (struct tcphdr *)skb_put(skb, sizeof(*th));
4275 skb_reset_transport_header(skb);
4276 memset(th, 0, sizeof(*th));
4277
4278 if (memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size))
4279 goto err_free;
4280
4281 TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
4282 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
4283 TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
4284
4285 if (tcp_queue_rcv(sk, skb, sizeof(*th), &fragstolen)) {
4286 WARN_ON_ONCE(fragstolen);
4287 __kfree_skb(skb);
4288 }
4289 return size;
4290
4291err_free:
4292 kfree_skb(skb);
4293err:
4294 return -ENOMEM;
4295}
4296
4297static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4298{
4299 const struct tcphdr *th = tcp_hdr(skb);
4300 struct tcp_sock *tp = tcp_sk(sk);
4301 int eaten = -1;
4302 bool fragstolen = false;
4303
4304 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
4305 goto drop;
4306
4307 skb_dst_drop(skb);
4308 __skb_pull(skb, th->doff * 4);
4309
4310 TCP_ECN_accept_cwr(tp, skb);
4311
4312 tp->rx_opt.dsack = 0;
4313
4314
4315
4316
4317
4318 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
4319 if (tcp_receive_window(tp) == 0)
4320 goto out_of_window;
4321
4322
4323 if (tp->ucopy.task == current &&
4324 tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
4325 sock_owned_by_user(sk) && !tp->urg_data) {
4326 int chunk = min_t(unsigned int, skb->len,
4327 tp->ucopy.len);
4328
4329 __set_current_state(TASK_RUNNING);
4330
4331 local_bh_enable();
4332 if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) {
4333 tp->ucopy.len -= chunk;
4334 tp->copied_seq += chunk;
4335 eaten = (chunk == skb->len);
4336 tcp_rcv_space_adjust(sk);
4337 }
4338 local_bh_disable();
4339 }
4340
4341 if (eaten <= 0) {
4342queue_and_out:
4343 if (eaten < 0 &&
4344 tcp_try_rmem_schedule(sk, skb, skb->truesize))
4345 goto drop;
4346
4347 eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
4348 }
4349 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4350 if (skb->len)
4351 tcp_event_data_recv(sk, skb);
4352 if (th->fin)
4353 tcp_fin(sk);
4354
4355 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4356 tcp_ofo_queue(sk);
4357
4358
4359
4360
4361 if (skb_queue_empty(&tp->out_of_order_queue))
4362 inet_csk(sk)->icsk_ack.pingpong = 0;
4363 }
4364
4365 if (tp->rx_opt.num_sacks)
4366 tcp_sack_remove(tp);
4367
4368 tcp_fast_path_check(sk);
4369
4370 if (eaten > 0)
4371 kfree_skb_partial(skb, fragstolen);
4372 if (!sock_flag(sk, SOCK_DEAD))
4373 sk->sk_data_ready(sk, 0);
4374 return;
4375 }
4376
4377 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4378
4379 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4380 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4381
4382out_of_window:
4383 tcp_enter_quickack_mode(sk);
4384 inet_csk_schedule_ack(sk);
4385drop:
4386 __kfree_skb(skb);
4387 return;
4388 }
4389
4390
4391 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
4392 goto out_of_window;
4393
4394 tcp_enter_quickack_mode(sk);
4395
4396 if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4397
4398 SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
4399 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4400 TCP_SKB_CB(skb)->end_seq);
4401
4402 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
4403
4404
4405
4406
4407 if (!tcp_receive_window(tp))
4408 goto out_of_window;
4409 goto queue_and_out;
4410 }
4411
4412 tcp_data_queue_ofo(sk, skb);
4413}
4414
4415static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
4416 struct sk_buff_head *list)
4417{
4418 struct sk_buff *next = NULL;
4419
4420 if (!skb_queue_is_last(list, skb))
4421 next = skb_queue_next(list, skb);
4422
4423 __skb_unlink(skb, list);
4424 __kfree_skb(skb);
4425 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
4426
4427 return next;
4428}
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438static void
4439tcp_collapse(struct sock *sk, struct sk_buff_head *list,
4440 struct sk_buff *head, struct sk_buff *tail,
4441 u32 start, u32 end)
4442{
4443 struct sk_buff *skb, *n;
4444 bool end_of_skbs;
4445
4446
4447
4448 skb = head;
4449restart:
4450 end_of_skbs = true;
4451 skb_queue_walk_from_safe(list, skb, n) {
4452 if (skb == tail)
4453 break;
4454
4455 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4456 skb = tcp_collapse_one(sk, skb, list);
4457 if (!skb)
4458 break;
4459 goto restart;
4460 }
4461
4462
4463
4464
4465
4466
4467 if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin &&
4468 (tcp_win_from_space(skb->truesize) > skb->len ||
4469 before(TCP_SKB_CB(skb)->seq, start))) {
4470 end_of_skbs = false;
4471 break;
4472 }
4473
4474 if (!skb_queue_is_last(list, skb)) {
4475 struct sk_buff *next = skb_queue_next(list, skb);
4476 if (next != tail &&
4477 TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) {
4478 end_of_skbs = false;
4479 break;
4480 }
4481 }
4482
4483
4484 start = TCP_SKB_CB(skb)->end_seq;
4485 }
4486 if (end_of_skbs || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin)
4487 return;
4488
4489 while (before(start, end)) {
4490 struct sk_buff *nskb;
4491 unsigned int header = skb_headroom(skb);
4492 int copy = SKB_MAX_ORDER(header, 0);
4493
4494
4495 if (copy < 0)
4496 return;
4497 if (end - start < copy)
4498 copy = end - start;
4499 nskb = alloc_skb(copy + header, GFP_ATOMIC);
4500 if (!nskb)
4501 return;
4502
4503 skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
4504 skb_set_network_header(nskb, (skb_network_header(skb) -
4505 skb->head));
4506 skb_set_transport_header(nskb, (skb_transport_header(skb) -
4507 skb->head));
4508 skb_reserve(nskb, header);
4509 memcpy(nskb->head, skb->head, header);
4510 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
4511 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
4512 __skb_queue_before(list, skb, nskb);
4513 skb_set_owner_r(nskb, sk);
4514
4515
4516 while (copy > 0) {
4517 int offset = start - TCP_SKB_CB(skb)->seq;
4518 int size = TCP_SKB_CB(skb)->end_seq - start;
4519
4520 BUG_ON(offset < 0);
4521 if (size > 0) {
4522 size = min(copy, size);
4523 if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
4524 BUG();
4525 TCP_SKB_CB(nskb)->end_seq += size;
4526 copy -= size;
4527 start += size;
4528 }
4529 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4530 skb = tcp_collapse_one(sk, skb, list);
4531 if (!skb ||
4532 skb == tail ||
4533 tcp_hdr(skb)->syn ||
4534 tcp_hdr(skb)->fin)
4535 return;
4536 }
4537 }
4538 }
4539}
4540
4541
4542
4543
4544static void tcp_collapse_ofo_queue(struct sock *sk)
4545{
4546 struct tcp_sock *tp = tcp_sk(sk);
4547 struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
4548 struct sk_buff *head;
4549 u32 start, end;
4550
4551 if (skb == NULL)
4552 return;
4553
4554 start = TCP_SKB_CB(skb)->seq;
4555 end = TCP_SKB_CB(skb)->end_seq;
4556 head = skb;
4557
4558 for (;;) {
4559 struct sk_buff *next = NULL;
4560
4561 if (!skb_queue_is_last(&tp->out_of_order_queue, skb))
4562 next = skb_queue_next(&tp->out_of_order_queue, skb);
4563 skb = next;
4564
4565
4566
4567 if (!skb ||
4568 after(TCP_SKB_CB(skb)->seq, end) ||
4569 before(TCP_SKB_CB(skb)->end_seq, start)) {
4570 tcp_collapse(sk, &tp->out_of_order_queue,
4571 head, skb, start, end);
4572 head = skb;
4573 if (!skb)
4574 break;
4575
4576 start = TCP_SKB_CB(skb)->seq;
4577 end = TCP_SKB_CB(skb)->end_seq;
4578 } else {
4579 if (before(TCP_SKB_CB(skb)->seq, start))
4580 start = TCP_SKB_CB(skb)->seq;
4581 if (after(TCP_SKB_CB(skb)->end_seq, end))
4582 end = TCP_SKB_CB(skb)->end_seq;
4583 }
4584 }
4585}
4586
4587
4588
4589
4590
4591static bool tcp_prune_ofo_queue(struct sock *sk)
4592{
4593 struct tcp_sock *tp = tcp_sk(sk);
4594 bool res = false;
4595
4596 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4597 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED);
4598 __skb_queue_purge(&tp->out_of_order_queue);
4599
4600
4601
4602
4603
4604
4605 if (tp->rx_opt.sack_ok)
4606 tcp_sack_reset(&tp->rx_opt);
4607 sk_mem_reclaim(sk);
4608 res = true;
4609 }
4610 return res;
4611}
4612
4613
4614
4615
4616
4617
4618
4619
4620static int tcp_prune_queue(struct sock *sk)
4621{
4622 struct tcp_sock *tp = tcp_sk(sk);
4623
4624 SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
4625
4626 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PRUNECALLED);
4627
4628 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
4629 tcp_clamp_window(sk);
4630 else if (sk_under_memory_pressure(sk))
4631 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
4632
4633 tcp_collapse_ofo_queue(sk);
4634 if (!skb_queue_empty(&sk->sk_receive_queue))
4635 tcp_collapse(sk, &sk->sk_receive_queue,
4636 skb_peek(&sk->sk_receive_queue),
4637 NULL,
4638 tp->copied_seq, tp->rcv_nxt);
4639 sk_mem_reclaim(sk);
4640
4641 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4642 return 0;
4643
4644
4645
4646
4647 tcp_prune_ofo_queue(sk);
4648
4649 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4650 return 0;
4651
4652
4653
4654
4655
4656 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_RCVPRUNED);
4657
4658
4659 tp->pred_flags = 0;
4660 return -1;
4661}
4662
4663
4664
4665
4666
4667void tcp_cwnd_application_limited(struct sock *sk)
4668{
4669 struct tcp_sock *tp = tcp_sk(sk);
4670
4671 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
4672 sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
4673
4674 u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
4675 u32 win_used = max(tp->snd_cwnd_used, init_win);
4676 if (win_used < tp->snd_cwnd) {
4677 tp->snd_ssthresh = tcp_current_ssthresh(sk);
4678 tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
4679 }
4680 tp->snd_cwnd_used = 0;
4681 }
4682 tp->snd_cwnd_stamp = tcp_time_stamp;
4683}
4684
4685static bool tcp_should_expand_sndbuf(const struct sock *sk)
4686{
4687 const struct tcp_sock *tp = tcp_sk(sk);
4688
4689
4690
4691
4692 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
4693 return false;
4694
4695
4696 if (sk_under_memory_pressure(sk))
4697 return false;
4698
4699
4700 if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
4701 return false;
4702
4703
4704 if (tp->packets_out >= tp->snd_cwnd)
4705 return false;
4706
4707 return true;
4708}
4709
4710
4711
4712
4713
4714
4715
4716static void tcp_new_space(struct sock *sk)
4717{
4718 struct tcp_sock *tp = tcp_sk(sk);
4719
4720 if (tcp_should_expand_sndbuf(sk)) {
4721 int sndmem = SKB_TRUESIZE(max_t(u32,
4722 tp->rx_opt.mss_clamp,
4723 tp->mss_cache) +
4724 MAX_TCP_HEADER);
4725 int demanded = max_t(unsigned int, tp->snd_cwnd,
4726 tp->reordering + 1);
4727 sndmem *= 2 * demanded;
4728 if (sndmem > sk->sk_sndbuf)
4729 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
4730 tp->snd_cwnd_stamp = tcp_time_stamp;
4731 }
4732
4733 sk->sk_write_space(sk);
4734}
4735
4736static void tcp_check_space(struct sock *sk)
4737{
4738 if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
4739 sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
4740 if (sk->sk_socket &&
4741 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
4742 tcp_new_space(sk);
4743 }
4744}
4745
4746static inline void tcp_data_snd_check(struct sock *sk)
4747{
4748 tcp_push_pending_frames(sk);
4749 tcp_check_space(sk);
4750}
4751
4752
4753
4754
4755static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
4756{
4757 struct tcp_sock *tp = tcp_sk(sk);
4758
4759
4760 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
4761
4762
4763
4764 __tcp_select_window(sk) >= tp->rcv_wnd) ||
4765
4766 tcp_in_quickack_mode(sk) ||
4767
4768 (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
4769
4770 tcp_send_ack(sk);
4771 } else {
4772
4773 tcp_send_delayed_ack(sk);
4774 }
4775}
4776
4777static inline void tcp_ack_snd_check(struct sock *sk)
4778{
4779 if (!inet_csk_ack_scheduled(sk)) {
4780
4781 return;
4782 }
4783 __tcp_ack_snd_check(sk, 1);
4784}
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
4797{
4798 struct tcp_sock *tp = tcp_sk(sk);
4799 u32 ptr = ntohs(th->urg_ptr);
4800
4801 if (ptr && !sysctl_tcp_stdurg)
4802 ptr--;
4803 ptr += ntohl(th->seq);
4804
4805
4806 if (after(tp->copied_seq, ptr))
4807 return;
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819 if (before(ptr, tp->rcv_nxt))
4820 return;
4821
4822
4823 if (tp->urg_data && !after(ptr, tp->urg_seq))
4824 return;
4825
4826
4827 sk_send_sigurg(sk);
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
4845 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
4846 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
4847 tp->copied_seq++;
4848 if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
4849 __skb_unlink(skb, &sk->sk_receive_queue);
4850 __kfree_skb(skb);
4851 }
4852 }
4853
4854 tp->urg_data = TCP_URG_NOTYET;
4855 tp->urg_seq = ptr;
4856
4857
4858 tp->pred_flags = 0;
4859}
4860
4861
4862static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
4863{
4864 struct tcp_sock *tp = tcp_sk(sk);
4865
4866
4867 if (th->urg)
4868 tcp_check_urg(sk, th);
4869
4870
4871 if (tp->urg_data == TCP_URG_NOTYET) {
4872 u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
4873 th->syn;
4874
4875
4876 if (ptr < skb->len) {
4877 u8 tmp;
4878 if (skb_copy_bits(skb, ptr, &tmp, 1))
4879 BUG();
4880 tp->urg_data = TCP_URG_VALID | tmp;
4881 if (!sock_flag(sk, SOCK_DEAD))
4882 sk->sk_data_ready(sk, 0);
4883 }
4884 }
4885}
4886
4887static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
4888{
4889 struct tcp_sock *tp = tcp_sk(sk);
4890 int chunk = skb->len - hlen;
4891 int err;
4892
4893 local_bh_enable();
4894 if (skb_csum_unnecessary(skb))
4895 err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk);
4896 else
4897 err = skb_copy_and_csum_datagram_iovec(skb, hlen,
4898 tp->ucopy.iov);
4899
4900 if (!err) {
4901 tp->ucopy.len -= chunk;
4902 tp->copied_seq += chunk;
4903 tcp_rcv_space_adjust(sk);
4904 }
4905
4906 local_bh_disable();
4907 return err;
4908}
4909
4910static __sum16 __tcp_checksum_complete_user(struct sock *sk,
4911 struct sk_buff *skb)
4912{
4913 __sum16 result;
4914
4915 if (sock_owned_by_user(sk)) {
4916 local_bh_enable();
4917 result = __tcp_checksum_complete(skb);
4918 local_bh_disable();
4919 } else {
4920 result = __tcp_checksum_complete(skb);
4921 }
4922 return result;
4923}
4924
4925static inline bool tcp_checksum_complete_user(struct sock *sk,
4926 struct sk_buff *skb)
4927{
4928 return !skb_csum_unnecessary(skb) &&
4929 __tcp_checksum_complete_user(sk, skb);
4930}
4931
4932#ifdef CONFIG_NET_DMA
4933static bool tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
4934 int hlen)
4935{
4936 struct tcp_sock *tp = tcp_sk(sk);
4937 int chunk = skb->len - hlen;
4938 int dma_cookie;
4939 bool copied_early = false;
4940
4941 if (tp->ucopy.wakeup)
4942 return false;
4943
4944 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
4945 tp->ucopy.dma_chan = net_dma_find_channel();
4946
4947 if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
4948
4949 dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
4950 skb, hlen,
4951 tp->ucopy.iov, chunk,
4952 tp->ucopy.pinned_list);
4953
4954 if (dma_cookie < 0)
4955 goto out;
4956
4957 tp->ucopy.dma_cookie = dma_cookie;
4958 copied_early = true;
4959
4960 tp->ucopy.len -= chunk;
4961 tp->copied_seq += chunk;
4962 tcp_rcv_space_adjust(sk);
4963
4964 if ((tp->ucopy.len == 0) ||
4965 (tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) ||
4966 (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) {
4967 tp->ucopy.wakeup = 1;
4968 sk->sk_data_ready(sk, 0);
4969 }
4970 } else if (chunk > 0) {
4971 tp->ucopy.wakeup = 1;
4972 sk->sk_data_ready(sk, 0);
4973 }
4974out:
4975 return copied_early;
4976}
4977#endif
4978
4979
4980
4981
4982static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
4983 const struct tcphdr *th, int syn_inerr)
4984{
4985 struct tcp_sock *tp = tcp_sk(sk);
4986
4987
4988 if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
4989 tcp_paws_discard(sk, skb)) {
4990 if (!th->rst) {
4991 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
4992 tcp_send_dupack(sk, skb);
4993 goto discard;
4994 }
4995
4996 }
4997
4998
4999 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
5000
5001
5002
5003
5004
5005
5006 if (!th->rst) {
5007 if (th->syn)
5008 goto syn_challenge;
5009 tcp_send_dupack(sk, skb);
5010 }
5011 goto discard;
5012 }
5013
5014
5015 if (th->rst) {
5016
5017
5018
5019
5020
5021
5022 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt)
5023 tcp_reset(sk);
5024 else
5025 tcp_send_challenge_ack(sk);
5026 goto discard;
5027 }
5028
5029
5030
5031
5032
5033
5034 if (th->syn) {
5035syn_challenge:
5036 if (syn_inerr)
5037 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5038 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
5039 tcp_send_challenge_ack(sk);
5040 goto discard;
5041 }
5042
5043 return true;
5044
5045discard:
5046 __kfree_skb(skb);
5047 return false;
5048}
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5074 const struct tcphdr *th, unsigned int len)
5075{
5076 struct tcp_sock *tp = tcp_sk(sk);
5077
5078 if (unlikely(sk->sk_rx_dst == NULL))
5079 inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095 tp->rx_opt.saw_tstamp = 0;
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
5107 TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
5108 !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
5109 int tcp_header_len = tp->tcp_header_len;
5110
5111
5112
5113
5114
5115
5116
5117 if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
5118
5119 if (!tcp_parse_aligned_timestamp(tp, th))
5120 goto slow_path;
5121
5122
5123 if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
5124 goto slow_path;
5125
5126
5127
5128
5129
5130
5131 }
5132
5133 if (len <= tcp_header_len) {
5134
5135 if (len == tcp_header_len) {
5136
5137
5138
5139
5140 if (tcp_header_len ==
5141 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5142 tp->rcv_nxt == tp->rcv_wup)
5143 tcp_store_ts_recent(tp);
5144
5145
5146
5147
5148 tcp_ack(sk, skb, 0);
5149 __kfree_skb(skb);
5150 tcp_data_snd_check(sk);
5151 return 0;
5152 } else {
5153 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5154 goto discard;
5155 }
5156 } else {
5157 int eaten = 0;
5158 int copied_early = 0;
5159 bool fragstolen = false;
5160
5161 if (tp->copied_seq == tp->rcv_nxt &&
5162 len - tcp_header_len <= tp->ucopy.len) {
5163#ifdef CONFIG_NET_DMA
5164 if (tp->ucopy.task == current &&
5165 sock_owned_by_user(sk) &&
5166 tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
5167 copied_early = 1;
5168 eaten = 1;
5169 }
5170#endif
5171 if (tp->ucopy.task == current &&
5172 sock_owned_by_user(sk) && !copied_early) {
5173 __set_current_state(TASK_RUNNING);
5174
5175 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len))
5176 eaten = 1;
5177 }
5178 if (eaten) {
5179
5180
5181
5182
5183 if (tcp_header_len ==
5184 (sizeof(struct tcphdr) +
5185 TCPOLEN_TSTAMP_ALIGNED) &&
5186 tp->rcv_nxt == tp->rcv_wup)
5187 tcp_store_ts_recent(tp);
5188
5189 tcp_rcv_rtt_measure_ts(sk, skb);
5190
5191 __skb_pull(skb, tcp_header_len);
5192 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
5193 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
5194 }
5195 if (copied_early)
5196 tcp_cleanup_rbuf(sk, skb->len);
5197 }
5198 if (!eaten) {
5199 if (tcp_checksum_complete_user(sk, skb))
5200 goto csum_error;
5201
5202 if ((int)skb->truesize > sk->sk_forward_alloc)
5203 goto step5;
5204
5205
5206
5207
5208
5209 if (tcp_header_len ==
5210 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5211 tp->rcv_nxt == tp->rcv_wup)
5212 tcp_store_ts_recent(tp);
5213
5214 tcp_rcv_rtt_measure_ts(sk, skb);
5215
5216 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
5217
5218
5219 eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
5220 &fragstolen);
5221 }
5222
5223 tcp_event_data_recv(sk, skb);
5224
5225 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
5226
5227 tcp_ack(sk, skb, FLAG_DATA);
5228 tcp_data_snd_check(sk);
5229 if (!inet_csk_ack_scheduled(sk))
5230 goto no_ack;
5231 }
5232
5233 if (!copied_early || tp->rcv_nxt != tp->rcv_wup)
5234 __tcp_ack_snd_check(sk, 0);
5235no_ack:
5236#ifdef CONFIG_NET_DMA
5237 if (copied_early)
5238 __skb_queue_tail(&sk->sk_async_wait_queue, skb);
5239 else
5240#endif
5241 if (eaten)
5242 kfree_skb_partial(skb, fragstolen);
5243 sk->sk_data_ready(sk, 0);
5244 return 0;
5245 }
5246 }
5247
5248slow_path:
5249 if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
5250 goto csum_error;
5251
5252 if (!th->ack && !th->rst)
5253 goto discard;
5254
5255
5256
5257
5258
5259 if (!tcp_validate_incoming(sk, skb, th, 1))
5260 return 0;
5261
5262step5:
5263 if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
5264 goto discard;
5265
5266 tcp_rcv_rtt_measure_ts(sk, skb);
5267
5268
5269 tcp_urg(sk, skb, th);
5270
5271
5272 tcp_data_queue(sk, skb);
5273
5274 tcp_data_snd_check(sk);
5275 tcp_ack_snd_check(sk);
5276 return 0;
5277
5278csum_error:
5279 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
5280 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5281
5282discard:
5283 __kfree_skb(skb);
5284 return 0;
5285}
5286EXPORT_SYMBOL(tcp_rcv_established);
5287
5288void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
5289{
5290 struct tcp_sock *tp = tcp_sk(sk);
5291 struct inet_connection_sock *icsk = inet_csk(sk);
5292
5293 tcp_set_state(sk, TCP_ESTABLISHED);
5294
5295 if (skb != NULL) {
5296 icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
5297 security_inet_conn_established(sk, skb);
5298 }
5299
5300
5301 icsk->icsk_af_ops->rebuild_header(sk);
5302
5303 tcp_init_metrics(sk);
5304
5305 tcp_init_congestion_control(sk);
5306
5307
5308
5309
5310 tp->lsndtime = tcp_time_stamp;
5311
5312 tcp_init_buffer_space(sk);
5313
5314 if (sock_flag(sk, SOCK_KEEPOPEN))
5315 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
5316
5317 if (!tp->rx_opt.snd_wscale)
5318 __tcp_fast_path_on(tp, tp->snd_wnd);
5319 else
5320 tp->pred_flags = 0;
5321
5322 if (!sock_flag(sk, SOCK_DEAD)) {
5323 sk->sk_state_change(sk);
5324 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
5325 }
5326}
5327
5328static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5329 struct tcp_fastopen_cookie *cookie)
5330{
5331 struct tcp_sock *tp = tcp_sk(sk);
5332 struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL;
5333 u16 mss = tp->rx_opt.mss_clamp;
5334 bool syn_drop;
5335
5336 if (mss == tp->rx_opt.user_mss) {
5337 struct tcp_options_received opt;
5338
5339
5340 tcp_clear_options(&opt);
5341 opt.user_mss = opt.mss_clamp = 0;
5342 tcp_parse_options(synack, &opt, 0, NULL);
5343 mss = opt.mss_clamp;
5344 }
5345
5346 if (!tp->syn_fastopen)
5347 cookie->len = -1;
5348
5349
5350
5351
5352
5353 syn_drop = (cookie->len <= 0 && data && tp->total_retrans);
5354
5355 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop);
5356
5357 if (data) {
5358 tcp_for_write_queue_from(data, sk) {
5359 if (data == tcp_send_head(sk) ||
5360 __tcp_retransmit_skb(sk, data))
5361 break;
5362 }
5363 tcp_rearm_rto(sk);
5364 return true;
5365 }
5366 tp->syn_data_acked = tp->syn_data;
5367 return false;
5368}
5369
5370static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5371 const struct tcphdr *th, unsigned int len)
5372{
5373 struct inet_connection_sock *icsk = inet_csk(sk);
5374 struct tcp_sock *tp = tcp_sk(sk);
5375 struct tcp_fastopen_cookie foc = { .len = -1 };
5376 int saved_clamp = tp->rx_opt.mss_clamp;
5377
5378 tcp_parse_options(skb, &tp->rx_opt, 0, &foc);
5379 if (tp->rx_opt.saw_tstamp)
5380 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
5381
5382 if (th->ack) {
5383
5384
5385
5386
5387
5388
5389
5390
5391 if (!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_una) ||
5392 after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt))
5393 goto reset_and_undo;
5394
5395 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
5396 !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
5397 tcp_time_stamp)) {
5398 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED);
5399 goto reset_and_undo;
5400 }
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410 if (th->rst) {
5411 tcp_reset(sk);
5412 goto discard;
5413 }
5414
5415
5416
5417
5418
5419
5420
5421
5422 if (!th->syn)
5423 goto discard_and_undo;
5424
5425
5426
5427
5428
5429
5430
5431
5432 TCP_ECN_rcv_synack(tp, th);
5433
5434 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5435 tcp_ack(sk, skb, FLAG_SLOWPATH);
5436
5437
5438
5439
5440 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5441 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5442
5443
5444
5445
5446 tp->snd_wnd = ntohs(th->window);
5447
5448 if (!tp->rx_opt.wscale_ok) {
5449 tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
5450 tp->window_clamp = min(tp->window_clamp, 65535U);
5451 }
5452
5453 if (tp->rx_opt.saw_tstamp) {
5454 tp->rx_opt.tstamp_ok = 1;
5455 tp->tcp_header_len =
5456 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5457 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5458 tcp_store_ts_recent(tp);
5459 } else {
5460 tp->tcp_header_len = sizeof(struct tcphdr);
5461 }
5462
5463 if (tcp_is_sack(tp) && sysctl_tcp_fack)
5464 tcp_enable_fack(tp);
5465
5466 tcp_mtup_init(sk);
5467 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5468 tcp_initialize_rcv_mss(sk);
5469
5470
5471
5472
5473 tp->copied_seq = tp->rcv_nxt;
5474
5475 smp_mb();
5476
5477 tcp_finish_connect(sk, skb);
5478
5479 if ((tp->syn_fastopen || tp->syn_data) &&
5480 tcp_rcv_fastopen_synack(sk, skb, &foc))
5481 return -1;
5482
5483 if (sk->sk_write_pending ||
5484 icsk->icsk_accept_queue.rskq_defer_accept ||
5485 icsk->icsk_ack.pingpong) {
5486
5487
5488
5489
5490
5491
5492
5493 inet_csk_schedule_ack(sk);
5494 icsk->icsk_ack.lrcvtime = tcp_time_stamp;
5495 tcp_enter_quickack_mode(sk);
5496 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
5497 TCP_DELACK_MAX, TCP_RTO_MAX);
5498
5499discard:
5500 __kfree_skb(skb);
5501 return 0;
5502 } else {
5503 tcp_send_ack(sk);
5504 }
5505 return -1;
5506 }
5507
5508
5509
5510 if (th->rst) {
5511
5512
5513
5514
5515
5516
5517 goto discard_and_undo;
5518 }
5519
5520
5521 if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
5522 tcp_paws_reject(&tp->rx_opt, 0))
5523 goto discard_and_undo;
5524
5525 if (th->syn) {
5526
5527
5528
5529
5530 tcp_set_state(sk, TCP_SYN_RECV);
5531
5532 if (tp->rx_opt.saw_tstamp) {
5533 tp->rx_opt.tstamp_ok = 1;
5534 tcp_store_ts_recent(tp);
5535 tp->tcp_header_len =
5536 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5537 } else {
5538 tp->tcp_header_len = sizeof(struct tcphdr);
5539 }
5540
5541 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5542 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5543
5544
5545
5546
5547 tp->snd_wnd = ntohs(th->window);
5548 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
5549 tp->max_window = tp->snd_wnd;
5550
5551 TCP_ECN_rcv_syn(tp, th);
5552
5553 tcp_mtup_init(sk);
5554 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5555 tcp_initialize_rcv_mss(sk);
5556
5557 tcp_send_synack(sk);
5558#if 0
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570 return -1;
5571#else
5572 goto discard;
5573#endif
5574 }
5575
5576
5577
5578
5579discard_and_undo:
5580 tcp_clear_options(&tp->rx_opt);
5581 tp->rx_opt.mss_clamp = saved_clamp;
5582 goto discard;
5583
5584reset_and_undo:
5585 tcp_clear_options(&tp->rx_opt);
5586 tp->rx_opt.mss_clamp = saved_clamp;
5587 return 1;
5588}
5589
5590
5591
5592
5593
5594
5595
5596
5597int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5598 const struct tcphdr *th, unsigned int len)
5599{
5600 struct tcp_sock *tp = tcp_sk(sk);
5601 struct inet_connection_sock *icsk = inet_csk(sk);
5602 struct request_sock *req;
5603 int queued = 0;
5604
5605 tp->rx_opt.saw_tstamp = 0;
5606
5607 switch (sk->sk_state) {
5608 case TCP_CLOSE:
5609 goto discard;
5610
5611 case TCP_LISTEN:
5612 if (th->ack)
5613 return 1;
5614
5615 if (th->rst)
5616 goto discard;
5617
5618 if (th->syn) {
5619 if (th->fin)
5620 goto discard;
5621 if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
5622 return 1;
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641 kfree_skb(skb);
5642 return 0;
5643 }
5644 goto discard;
5645
5646 case TCP_SYN_SENT:
5647 queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
5648 if (queued >= 0)
5649 return queued;
5650
5651
5652 tcp_urg(sk, skb, th);
5653 __kfree_skb(skb);
5654 tcp_data_snd_check(sk);
5655 return 0;
5656 }
5657
5658 req = tp->fastopen_rsk;
5659 if (req != NULL) {
5660 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
5661 sk->sk_state != TCP_FIN_WAIT1);
5662
5663 if (tcp_check_req(sk, skb, req, NULL, true) == NULL)
5664 goto discard;
5665 }
5666
5667 if (!th->ack && !th->rst)
5668 goto discard;
5669
5670 if (!tcp_validate_incoming(sk, skb, th, 0))
5671 return 0;
5672
5673
5674 if (true) {
5675 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
5676 FLAG_UPDATE_TS_RECENT) > 0;
5677
5678 switch (sk->sk_state) {
5679 case TCP_SYN_RECV:
5680 if (acceptable) {
5681
5682
5683
5684 if (req) {
5685 tcp_synack_rtt_meas(sk, req);
5686 tp->total_retrans = req->num_retrans;
5687
5688 reqsk_fastopen_remove(sk, req, false);
5689 } else {
5690
5691
5692
5693 icsk->icsk_af_ops->rebuild_header(sk);
5694 tcp_init_congestion_control(sk);
5695
5696 tcp_mtup_init(sk);
5697 tcp_init_buffer_space(sk);
5698 tp->copied_seq = tp->rcv_nxt;
5699 }
5700 smp_mb();
5701 tcp_set_state(sk, TCP_ESTABLISHED);
5702 sk->sk_state_change(sk);
5703
5704
5705
5706
5707
5708
5709 if (sk->sk_socket)
5710 sk_wake_async(sk,
5711 SOCK_WAKE_IO, POLL_OUT);
5712
5713 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
5714 tp->snd_wnd = ntohs(th->window) <<
5715 tp->rx_opt.snd_wscale;
5716 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5717
5718 if (tp->rx_opt.tstamp_ok)
5719 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5720
5721 if (req) {
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732 tcp_rearm_rto(sk);
5733 } else
5734 tcp_init_metrics(sk);
5735
5736
5737
5738
5739 tp->lsndtime = tcp_time_stamp;
5740
5741 tcp_initialize_rcv_mss(sk);
5742 tcp_fast_path_on(tp);
5743 } else {
5744 return 1;
5745 }
5746 break;
5747
5748 case TCP_FIN_WAIT1:
5749
5750
5751
5752
5753
5754 if (req != NULL) {
5755
5756
5757
5758
5759
5760
5761 if (!acceptable)
5762 return 1;
5763
5764 reqsk_fastopen_remove(sk, req, false);
5765 tcp_rearm_rto(sk);
5766 }
5767 if (tp->snd_una == tp->write_seq) {
5768 struct dst_entry *dst;
5769
5770 tcp_set_state(sk, TCP_FIN_WAIT2);
5771 sk->sk_shutdown |= SEND_SHUTDOWN;
5772
5773 dst = __sk_dst_get(sk);
5774 if (dst)
5775 dst_confirm(dst);
5776
5777 if (!sock_flag(sk, SOCK_DEAD))
5778
5779 sk->sk_state_change(sk);
5780 else {
5781 int tmo;
5782
5783 if (tp->linger2 < 0 ||
5784 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5785 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
5786 tcp_done(sk);
5787 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5788 return 1;
5789 }
5790
5791 tmo = tcp_fin_time(sk);
5792 if (tmo > TCP_TIMEWAIT_LEN) {
5793 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
5794 } else if (th->fin || sock_owned_by_user(sk)) {
5795
5796
5797
5798
5799
5800
5801 inet_csk_reset_keepalive_timer(sk, tmo);
5802 } else {
5803 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
5804 goto discard;
5805 }
5806 }
5807 }
5808 break;
5809
5810 case TCP_CLOSING:
5811 if (tp->snd_una == tp->write_seq) {
5812 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
5813 goto discard;
5814 }
5815 break;
5816
5817 case TCP_LAST_ACK:
5818 if (tp->snd_una == tp->write_seq) {
5819 tcp_update_metrics(sk);
5820 tcp_done(sk);
5821 goto discard;
5822 }
5823 break;
5824 }
5825 }
5826
5827
5828 tcp_urg(sk, skb, th);
5829
5830
5831 switch (sk->sk_state) {
5832 case TCP_CLOSE_WAIT:
5833 case TCP_CLOSING:
5834 case TCP_LAST_ACK:
5835 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
5836 break;
5837 case TCP_FIN_WAIT1:
5838 case TCP_FIN_WAIT2:
5839
5840
5841
5842
5843 if (sk->sk_shutdown & RCV_SHUTDOWN) {
5844 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5845 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
5846 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5847 tcp_reset(sk);
5848 return 1;
5849 }
5850 }
5851
5852 case TCP_ESTABLISHED:
5853 tcp_data_queue(sk, skb);
5854 queued = 1;
5855 break;
5856 }
5857
5858
5859 if (sk->sk_state != TCP_CLOSE) {
5860 tcp_data_snd_check(sk);
5861 tcp_ack_snd_check(sk);
5862 }
5863
5864 if (!queued) {
5865discard:
5866 __kfree_skb(skb);
5867 }
5868 return 0;
5869}
5870EXPORT_SYMBOL(tcp_rcv_state_process);
5871