1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64#define pr_fmt(fmt) "TCP: " fmt
65
66#include <linux/mm.h>
67#include <linux/slab.h>
68#include <linux/module.h>
69#include <linux/sysctl.h>
70#include <linux/kernel.h>
71#include <net/dst.h>
72#include <net/tcp.h>
73#include <net/inet_common.h>
74#include <linux/ipsec.h>
75#include <asm/unaligned.h>
76#include <net/netdma.h>
77
78int sysctl_tcp_timestamps __read_mostly = 1;
79int sysctl_tcp_window_scaling __read_mostly = 1;
80int sysctl_tcp_sack __read_mostly = 1;
81int sysctl_tcp_fack __read_mostly = 1;
82int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
83EXPORT_SYMBOL(sysctl_tcp_reordering);
84int sysctl_tcp_dsack __read_mostly = 1;
85int sysctl_tcp_app_win __read_mostly = 31;
86int sysctl_tcp_adv_win_scale __read_mostly = 1;
87EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
88
89
90int sysctl_tcp_challenge_ack_limit = 100;
91
92int sysctl_tcp_stdurg __read_mostly;
93int sysctl_tcp_rfc1337 __read_mostly;
94int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
95int sysctl_tcp_frto __read_mostly = 2;
96
97int sysctl_tcp_thin_dupack __read_mostly;
98
99int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
100int sysctl_tcp_early_retrans __read_mostly = 3;
101
102#define FLAG_DATA 0x01
103#define FLAG_WIN_UPDATE 0x02
104#define FLAG_DATA_ACKED 0x04
105#define FLAG_RETRANS_DATA_ACKED 0x08
106#define FLAG_SYN_ACKED 0x10
107#define FLAG_DATA_SACKED 0x20
108#define FLAG_ECE 0x40
109#define FLAG_SLOWPATH 0x100
110#define FLAG_ORIG_SACK_ACKED 0x200
111#define FLAG_SND_UNA_ADVANCED 0x400
112#define FLAG_DSACKING_ACK 0x800
113#define FLAG_SACK_RENEGING 0x2000
114#define FLAG_UPDATE_TS_RECENT 0x4000
115
116#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
117#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
118#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
119#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
120
121#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
122#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
123
124
125
126
127static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
128{
129 struct inet_connection_sock *icsk = inet_csk(sk);
130 const unsigned int lss = icsk->icsk_ack.last_seg_size;
131 unsigned int len;
132
133 icsk->icsk_ack.last_seg_size = 0;
134
135
136
137
138 len = skb_shinfo(skb)->gso_size ? : skb->len;
139 if (len >= icsk->icsk_ack.rcv_mss) {
140 icsk->icsk_ack.rcv_mss = len;
141 } else {
142
143
144
145
146
147 len += skb->data - skb_transport_header(skb);
148 if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
149
150
151
152
153
154 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
155 !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
156
157
158
159
160 len -= tcp_sk(sk)->tcp_header_len;
161 icsk->icsk_ack.last_seg_size = len;
162 if (len == lss) {
163 icsk->icsk_ack.rcv_mss = len;
164 return;
165 }
166 }
167 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
168 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
169 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
170 }
171}
172
173static void tcp_incr_quickack(struct sock *sk)
174{
175 struct inet_connection_sock *icsk = inet_csk(sk);
176 unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
177
178 if (quickacks == 0)
179 quickacks = 2;
180 if (quickacks > icsk->icsk_ack.quick)
181 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
182}
183
184static void tcp_enter_quickack_mode(struct sock *sk)
185{
186 struct inet_connection_sock *icsk = inet_csk(sk);
187 tcp_incr_quickack(sk);
188 icsk->icsk_ack.pingpong = 0;
189 icsk->icsk_ack.ato = TCP_ATO_MIN;
190}
191
192
193
194
195
196static inline bool tcp_in_quickack_mode(const struct sock *sk)
197{
198 const struct inet_connection_sock *icsk = inet_csk(sk);
199
200 return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong;
201}
202
203static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
204{
205 if (tp->ecn_flags & TCP_ECN_OK)
206 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
207}
208
209static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb)
210{
211 if (tcp_hdr(skb)->cwr)
212 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
213}
214
215static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)
216{
217 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
218}
219
220static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
221{
222 if (!(tp->ecn_flags & TCP_ECN_OK))
223 return;
224
225 switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
226 case INET_ECN_NOT_ECT:
227
228
229
230
231 if (tp->ecn_flags & TCP_ECN_SEEN)
232 tcp_enter_quickack_mode((struct sock *)tp);
233 break;
234 case INET_ECN_CE:
235 if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
236
237 tcp_enter_quickack_mode((struct sock *)tp);
238 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
239 }
240
241 default:
242 tp->ecn_flags |= TCP_ECN_SEEN;
243 }
244}
245
246static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
247{
248 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
249 tp->ecn_flags &= ~TCP_ECN_OK;
250}
251
252static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
253{
254 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
255 tp->ecn_flags &= ~TCP_ECN_OK;
256}
257
258static bool TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
259{
260 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
261 return true;
262 return false;
263}
264
265
266
267
268
269
270static void tcp_fixup_sndbuf(struct sock *sk)
271{
272 int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER);
273
274 sndmem *= TCP_INIT_CWND;
275 if (sk->sk_sndbuf < sndmem)
276 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
277}
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
306{
307 struct tcp_sock *tp = tcp_sk(sk);
308
309 int truesize = tcp_win_from_space(skb->truesize) >> 1;
310 int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
311
312 while (tp->rcv_ssthresh <= window) {
313 if (truesize <= skb->len)
314 return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
315
316 truesize >>= 1;
317 window >>= 1;
318 }
319 return 0;
320}
321
322static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
323{
324 struct tcp_sock *tp = tcp_sk(sk);
325
326
327 if (tp->rcv_ssthresh < tp->window_clamp &&
328 (int)tp->rcv_ssthresh < tcp_space(sk) &&
329 !sk_under_memory_pressure(sk)) {
330 int incr;
331
332
333
334
335 if (tcp_win_from_space(skb->truesize) <= skb->len)
336 incr = 2 * tp->advmss;
337 else
338 incr = __tcp_grow_window(sk, skb);
339
340 if (incr) {
341 incr = max_t(int, incr, 2 * skb->len);
342 tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
343 tp->window_clamp);
344 inet_csk(sk)->icsk_ack.quick |= 1;
345 }
346 }
347}
348
349
350static void tcp_fixup_rcvbuf(struct sock *sk)
351{
352 u32 mss = tcp_sk(sk)->advmss;
353 int rcvmem;
354
355 rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) *
356 tcp_default_init_rwnd(mss);
357
358 if (sk->sk_rcvbuf < rcvmem)
359 sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
360}
361
362
363
364
365void tcp_init_buffer_space(struct sock *sk)
366{
367 struct tcp_sock *tp = tcp_sk(sk);
368 int maxwin;
369
370 if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
371 tcp_fixup_rcvbuf(sk);
372 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
373 tcp_fixup_sndbuf(sk);
374
375 tp->rcvq_space.space = tp->rcv_wnd;
376
377 maxwin = tcp_full_space(sk);
378
379 if (tp->window_clamp >= maxwin) {
380 tp->window_clamp = maxwin;
381
382 if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
383 tp->window_clamp = max(maxwin -
384 (maxwin >> sysctl_tcp_app_win),
385 4 * tp->advmss);
386 }
387
388
389 if (sysctl_tcp_app_win &&
390 tp->window_clamp > 2 * tp->advmss &&
391 tp->window_clamp + tp->advmss > maxwin)
392 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
393
394 tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
395 tp->snd_cwnd_stamp = tcp_time_stamp;
396}
397
398
399static void tcp_clamp_window(struct sock *sk)
400{
401 struct tcp_sock *tp = tcp_sk(sk);
402 struct inet_connection_sock *icsk = inet_csk(sk);
403
404 icsk->icsk_ack.quick = 0;
405
406 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
407 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
408 !sk_under_memory_pressure(sk) &&
409 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
410 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
411 sysctl_tcp_rmem[2]);
412 }
413 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
414 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
415}
416
417
418
419
420
421
422
423
424void tcp_initialize_rcv_mss(struct sock *sk)
425{
426 const struct tcp_sock *tp = tcp_sk(sk);
427 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
428
429 hint = min(hint, tp->rcv_wnd / 2);
430 hint = min(hint, TCP_MSS_DEFAULT);
431 hint = max(hint, TCP_MIN_MSS);
432
433 inet_csk(sk)->icsk_ack.rcv_mss = hint;
434}
435EXPORT_SYMBOL(tcp_initialize_rcv_mss);
436
437
438
439
440
441
442
443
444
445
446
447
448static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
449{
450 u32 new_sample = tp->rcv_rtt_est.rtt;
451 long m = sample;
452
453 if (m == 0)
454 m = 1;
455
456 if (new_sample != 0) {
457
458
459
460
461
462
463
464
465
466
467 if (!win_dep) {
468 m -= (new_sample >> 3);
469 new_sample += m;
470 } else {
471 m <<= 3;
472 if (m < new_sample)
473 new_sample = m;
474 }
475 } else {
476
477 new_sample = m << 3;
478 }
479
480 if (tp->rcv_rtt_est.rtt != new_sample)
481 tp->rcv_rtt_est.rtt = new_sample;
482}
483
484static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
485{
486 if (tp->rcv_rtt_est.time == 0)
487 goto new_measure;
488 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
489 return;
490 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_rtt_est.time, 1);
491
492new_measure:
493 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
494 tp->rcv_rtt_est.time = tcp_time_stamp;
495}
496
497static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
498 const struct sk_buff *skb)
499{
500 struct tcp_sock *tp = tcp_sk(sk);
501 if (tp->rx_opt.rcv_tsecr &&
502 (TCP_SKB_CB(skb)->end_seq -
503 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
504 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
505}
506
507
508
509
510
511void tcp_rcv_space_adjust(struct sock *sk)
512{
513 struct tcp_sock *tp = tcp_sk(sk);
514 int time;
515 int space;
516
517 if (tp->rcvq_space.time == 0)
518 goto new_measure;
519
520 time = tcp_time_stamp - tp->rcvq_space.time;
521 if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
522 return;
523
524 space = 2 * (tp->copied_seq - tp->rcvq_space.seq);
525
526 space = max(tp->rcvq_space.space, space);
527
528 if (tp->rcvq_space.space != space) {
529 int rcvmem;
530
531 tp->rcvq_space.space = space;
532
533 if (sysctl_tcp_moderate_rcvbuf &&
534 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
535 int new_clamp = space;
536
537
538
539
540
541 space /= tp->advmss;
542 if (!space)
543 space = 1;
544 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
545 while (tcp_win_from_space(rcvmem) < tp->advmss)
546 rcvmem += 128;
547 space *= rcvmem;
548 space = min(space, sysctl_tcp_rmem[2]);
549 if (space > sk->sk_rcvbuf) {
550 sk->sk_rcvbuf = space;
551
552
553 tp->window_clamp = new_clamp;
554 }
555 }
556 }
557
558new_measure:
559 tp->rcvq_space.seq = tp->copied_seq;
560 tp->rcvq_space.time = tcp_time_stamp;
561}
562
563
564
565
566
567
568
569
570
571
572
573static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
574{
575 struct tcp_sock *tp = tcp_sk(sk);
576 struct inet_connection_sock *icsk = inet_csk(sk);
577 u32 now;
578
579 inet_csk_schedule_ack(sk);
580
581 tcp_measure_rcv_mss(sk, skb);
582
583 tcp_rcv_rtt_measure(tp);
584
585 now = tcp_time_stamp;
586
587 if (!icsk->icsk_ack.ato) {
588
589
590
591 tcp_incr_quickack(sk);
592 icsk->icsk_ack.ato = TCP_ATO_MIN;
593 } else {
594 int m = now - icsk->icsk_ack.lrcvtime;
595
596 if (m <= TCP_ATO_MIN / 2) {
597
598 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
599 } else if (m < icsk->icsk_ack.ato) {
600 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
601 if (icsk->icsk_ack.ato > icsk->icsk_rto)
602 icsk->icsk_ack.ato = icsk->icsk_rto;
603 } else if (m > icsk->icsk_rto) {
604
605
606
607 tcp_incr_quickack(sk);
608 sk_mem_reclaim(sk);
609 }
610 }
611 icsk->icsk_ack.lrcvtime = now;
612
613 TCP_ECN_check_ce(tp, skb);
614
615 if (skb->len >= 128)
616 tcp_grow_window(sk, skb);
617}
618
619
620
621
622
623
624
625
626
627
628static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
629{
630 struct tcp_sock *tp = tcp_sk(sk);
631 long m = mrtt;
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649 if (m == 0)
650 m = 1;
651 if (tp->srtt != 0) {
652 m -= (tp->srtt >> 3);
653 tp->srtt += m;
654 if (m < 0) {
655 m = -m;
656 m -= (tp->mdev >> 2);
657
658
659
660
661
662
663
664
665 if (m > 0)
666 m >>= 3;
667 } else {
668 m -= (tp->mdev >> 2);
669 }
670 tp->mdev += m;
671 if (tp->mdev > tp->mdev_max) {
672 tp->mdev_max = tp->mdev;
673 if (tp->mdev_max > tp->rttvar)
674 tp->rttvar = tp->mdev_max;
675 }
676 if (after(tp->snd_una, tp->rtt_seq)) {
677 if (tp->mdev_max < tp->rttvar)
678 tp->rttvar -= (tp->rttvar - tp->mdev_max) >> 2;
679 tp->rtt_seq = tp->snd_nxt;
680 tp->mdev_max = tcp_rto_min(sk);
681 }
682 } else {
683
684 tp->srtt = m << 3;
685 tp->mdev = m << 1;
686 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
687 tp->rtt_seq = tp->snd_nxt;
688 }
689}
690
691
692
693
694void tcp_set_rto(struct sock *sk)
695{
696 const struct tcp_sock *tp = tcp_sk(sk);
697
698
699
700
701
702
703
704
705
706
707 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
708
709
710
711
712
713
714
715
716
717
718 tcp_bound_rto(sk);
719}
720
721__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
722{
723 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
724
725 if (!cwnd)
726 cwnd = TCP_INIT_CWND;
727 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
728}
729
730
731
732
733
734void tcp_disable_fack(struct tcp_sock *tp)
735{
736
737 if (tcp_is_fack(tp))
738 tp->lost_skb_hint = NULL;
739 tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;
740}
741
742
743static void tcp_dsack_seen(struct tcp_sock *tp)
744{
745 tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
746}
747
748static void tcp_update_reordering(struct sock *sk, const int metric,
749 const int ts)
750{
751 struct tcp_sock *tp = tcp_sk(sk);
752 if (metric > tp->reordering) {
753 int mib_idx;
754
755 tp->reordering = min(TCP_MAX_REORDERING, metric);
756
757
758 if (ts)
759 mib_idx = LINUX_MIB_TCPTSREORDER;
760 else if (tcp_is_reno(tp))
761 mib_idx = LINUX_MIB_TCPRENOREORDER;
762 else if (tcp_is_fack(tp))
763 mib_idx = LINUX_MIB_TCPFACKREORDER;
764 else
765 mib_idx = LINUX_MIB_TCPSACKREORDER;
766
767 NET_INC_STATS_BH(sock_net(sk), mib_idx);
768#if FASTRETRANS_DEBUG > 1
769 pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
770 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
771 tp->reordering,
772 tp->fackets_out,
773 tp->sacked_out,
774 tp->undo_marker ? tp->undo_retrans : 0);
775#endif
776 tcp_disable_fack(tp);
777 }
778
779 if (metric > 0)
780 tcp_disable_early_retrans(tp);
781}
782
783
784static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
785{
786 if ((tp->retransmit_skb_hint == NULL) ||
787 before(TCP_SKB_CB(skb)->seq,
788 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
789 tp->retransmit_skb_hint = skb;
790
791 if (!tp->lost_out ||
792 after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high))
793 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
794}
795
796static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
797{
798 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
799 tcp_verify_retransmit_hint(tp, skb);
800
801 tp->lost_out += tcp_skb_pcount(skb);
802 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
803 }
804}
805
806static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
807 struct sk_buff *skb)
808{
809 tcp_verify_retransmit_hint(tp, skb);
810
811 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
812 tp->lost_out += tcp_skb_pcount(skb);
813 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
814 }
815}
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
912 u32 start_seq, u32 end_seq)
913{
914
915 if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
916 return false;
917
918
919 if (!before(start_seq, tp->snd_nxt))
920 return false;
921
922
923
924
925 if (after(start_seq, tp->snd_una))
926 return true;
927
928 if (!is_dsack || !tp->undo_marker)
929 return false;
930
931
932 if (after(end_seq, tp->snd_una))
933 return false;
934
935 if (!before(start_seq, tp->undo_marker))
936 return true;
937
938
939 if (!after(end_seq, tp->undo_marker))
940 return false;
941
942
943
944
945 return !before(start_seq, end_seq - tp->max_window);
946}
947
948
949
950
951
952
953
954
955
956
957static void tcp_mark_lost_retrans(struct sock *sk)
958{
959 const struct inet_connection_sock *icsk = inet_csk(sk);
960 struct tcp_sock *tp = tcp_sk(sk);
961 struct sk_buff *skb;
962 int cnt = 0;
963 u32 new_low_seq = tp->snd_nxt;
964 u32 received_upto = tcp_highest_sack_seq(tp);
965
966 if (!tcp_is_fack(tp) || !tp->retrans_out ||
967 !after(received_upto, tp->lost_retrans_low) ||
968 icsk->icsk_ca_state != TCP_CA_Recovery)
969 return;
970
971 tcp_for_write_queue(skb, sk) {
972 u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
973
974 if (skb == tcp_send_head(sk))
975 break;
976 if (cnt == tp->retrans_out)
977 break;
978 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
979 continue;
980
981 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS))
982 continue;
983
984
985
986
987
988
989
990
991
992
993
994
995 if (after(received_upto, ack_seq)) {
996 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
997 tp->retrans_out -= tcp_skb_pcount(skb);
998
999 tcp_skb_mark_lost_uncond_verify(tp, skb);
1000 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
1001 } else {
1002 if (before(ack_seq, new_low_seq))
1003 new_low_seq = ack_seq;
1004 cnt += tcp_skb_pcount(skb);
1005 }
1006 }
1007
1008 if (tp->retrans_out)
1009 tp->lost_retrans_low = new_low_seq;
1010}
1011
1012static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1013 struct tcp_sack_block_wire *sp, int num_sacks,
1014 u32 prior_snd_una)
1015{
1016 struct tcp_sock *tp = tcp_sk(sk);
1017 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
1018 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
1019 bool dup_sack = false;
1020
1021 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1022 dup_sack = true;
1023 tcp_dsack_seen(tp);
1024 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
1025 } else if (num_sacks > 1) {
1026 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
1027 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
1028
1029 if (!after(end_seq_0, end_seq_1) &&
1030 !before(start_seq_0, start_seq_1)) {
1031 dup_sack = true;
1032 tcp_dsack_seen(tp);
1033 NET_INC_STATS_BH(sock_net(sk),
1034 LINUX_MIB_TCPDSACKOFORECV);
1035 }
1036 }
1037
1038
1039 if (dup_sack && tp->undo_marker && tp->undo_retrans &&
1040 !after(end_seq_0, prior_snd_una) &&
1041 after(end_seq_0, tp->undo_marker))
1042 tp->undo_retrans--;
1043
1044 return dup_sack;
1045}
1046
1047struct tcp_sacktag_state {
1048 int reord;
1049 int fack_count;
1050 int flag;
1051};
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1062 u32 start_seq, u32 end_seq)
1063{
1064 int err;
1065 bool in_sack;
1066 unsigned int pkt_len;
1067 unsigned int mss;
1068
1069 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1070 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1071
1072 if (tcp_skb_pcount(skb) > 1 && !in_sack &&
1073 after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
1074 mss = tcp_skb_mss(skb);
1075 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1076
1077 if (!in_sack) {
1078 pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
1079 if (pkt_len < mss)
1080 pkt_len = mss;
1081 } else {
1082 pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
1083 if (pkt_len < mss)
1084 return -EINVAL;
1085 }
1086
1087
1088
1089
1090 if (pkt_len > mss) {
1091 unsigned int new_len = (pkt_len / mss) * mss;
1092 if (!in_sack && new_len < pkt_len) {
1093 new_len += mss;
1094 if (new_len > skb->len)
1095 return 0;
1096 }
1097 pkt_len = new_len;
1098 }
1099 err = tcp_fragment(sk, skb, pkt_len, mss);
1100 if (err < 0)
1101 return err;
1102 }
1103
1104 return in_sack;
1105}
1106
1107
1108static u8 tcp_sacktag_one(struct sock *sk,
1109 struct tcp_sacktag_state *state, u8 sacked,
1110 u32 start_seq, u32 end_seq,
1111 bool dup_sack, int pcount)
1112{
1113 struct tcp_sock *tp = tcp_sk(sk);
1114 int fack_count = state->fack_count;
1115
1116
1117 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1118 if (tp->undo_marker && tp->undo_retrans &&
1119 after(end_seq, tp->undo_marker))
1120 tp->undo_retrans--;
1121 if (sacked & TCPCB_SACKED_ACKED)
1122 state->reord = min(fack_count, state->reord);
1123 }
1124
1125
1126 if (!after(end_seq, tp->snd_una))
1127 return sacked;
1128
1129 if (!(sacked & TCPCB_SACKED_ACKED)) {
1130 if (sacked & TCPCB_SACKED_RETRANS) {
1131
1132
1133
1134
1135 if (sacked & TCPCB_LOST) {
1136 sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1137 tp->lost_out -= pcount;
1138 tp->retrans_out -= pcount;
1139 }
1140 } else {
1141 if (!(sacked & TCPCB_RETRANS)) {
1142
1143
1144
1145 if (before(start_seq,
1146 tcp_highest_sack_seq(tp)))
1147 state->reord = min(fack_count,
1148 state->reord);
1149 if (!after(end_seq, tp->high_seq))
1150 state->flag |= FLAG_ORIG_SACK_ACKED;
1151 }
1152
1153 if (sacked & TCPCB_LOST) {
1154 sacked &= ~TCPCB_LOST;
1155 tp->lost_out -= pcount;
1156 }
1157 }
1158
1159 sacked |= TCPCB_SACKED_ACKED;
1160 state->flag |= FLAG_DATA_SACKED;
1161 tp->sacked_out += pcount;
1162
1163 fack_count += pcount;
1164
1165
1166 if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
1167 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
1168 tp->lost_cnt_hint += pcount;
1169
1170 if (fack_count > tp->fackets_out)
1171 tp->fackets_out = fack_count;
1172 }
1173
1174
1175
1176
1177
1178 if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
1179 sacked &= ~TCPCB_SACKED_RETRANS;
1180 tp->retrans_out -= pcount;
1181 }
1182
1183 return sacked;
1184}
1185
1186
1187
1188
1189static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1190 struct tcp_sacktag_state *state,
1191 unsigned int pcount, int shifted, int mss,
1192 bool dup_sack)
1193{
1194 struct tcp_sock *tp = tcp_sk(sk);
1195 struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
1196 u32 start_seq = TCP_SKB_CB(skb)->seq;
1197 u32 end_seq = start_seq + shifted;
1198
1199 BUG_ON(!pcount);
1200
1201
1202
1203
1204
1205
1206
1207 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1208 start_seq, end_seq, dup_sack, pcount);
1209
1210 if (skb == tp->lost_skb_hint)
1211 tp->lost_cnt_hint += pcount;
1212
1213 TCP_SKB_CB(prev)->end_seq += shifted;
1214 TCP_SKB_CB(skb)->seq += shifted;
1215
1216 skb_shinfo(prev)->gso_segs += pcount;
1217 BUG_ON(skb_shinfo(skb)->gso_segs < pcount);
1218 skb_shinfo(skb)->gso_segs -= pcount;
1219
1220
1221
1222
1223
1224
1225 if (!skb_shinfo(prev)->gso_size) {
1226 skb_shinfo(prev)->gso_size = mss;
1227 skb_shinfo(prev)->gso_type = sk->sk_gso_type;
1228 }
1229
1230
1231 if (skb_shinfo(skb)->gso_segs <= 1) {
1232 skb_shinfo(skb)->gso_size = 0;
1233 skb_shinfo(skb)->gso_type = 0;
1234 }
1235
1236
1237 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
1238
1239 if (skb->len > 0) {
1240 BUG_ON(!tcp_skb_pcount(skb));
1241 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED);
1242 return false;
1243 }
1244
1245
1246
1247 if (skb == tp->retransmit_skb_hint)
1248 tp->retransmit_skb_hint = prev;
1249 if (skb == tp->lost_skb_hint) {
1250 tp->lost_skb_hint = prev;
1251 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1252 }
1253
1254 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags;
1255 if (skb == tcp_highest_sack(sk))
1256 tcp_advance_highest_sack(sk, skb);
1257
1258 tcp_unlink_write_queue(skb, sk);
1259 sk_wmem_free_skb(sk, skb);
1260
1261 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED);
1262
1263 return true;
1264}
1265
1266
1267
1268
1269static int tcp_skb_seglen(const struct sk_buff *skb)
1270{
1271 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
1272}
1273
1274
1275static int skb_can_shift(const struct sk_buff *skb)
1276{
1277 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1278}
1279
1280
1281
1282
1283static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1284 struct tcp_sacktag_state *state,
1285 u32 start_seq, u32 end_seq,
1286 bool dup_sack)
1287{
1288 struct tcp_sock *tp = tcp_sk(sk);
1289 struct sk_buff *prev;
1290 int mss;
1291 int pcount = 0;
1292 int len;
1293 int in_sack;
1294
1295 if (!sk_can_gso(sk))
1296 goto fallback;
1297
1298
1299 if (!dup_sack &&
1300 (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
1301 goto fallback;
1302 if (!skb_can_shift(skb))
1303 goto fallback;
1304
1305 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1306 goto fallback;
1307
1308
1309 if (unlikely(skb == tcp_write_queue_head(sk)))
1310 goto fallback;
1311 prev = tcp_write_queue_prev(sk, skb);
1312
1313 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1314 goto fallback;
1315
1316 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1317 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1318
1319 if (in_sack) {
1320 len = skb->len;
1321 pcount = tcp_skb_pcount(skb);
1322 mss = tcp_skb_seglen(skb);
1323
1324
1325
1326
1327 if (mss != tcp_skb_seglen(prev))
1328 goto fallback;
1329 } else {
1330 if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
1331 goto noop;
1332
1333
1334
1335
1336 if (tcp_skb_pcount(skb) <= 1)
1337 goto noop;
1338
1339 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1340 if (!in_sack) {
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352 goto fallback;
1353 }
1354
1355 len = end_seq - TCP_SKB_CB(skb)->seq;
1356 BUG_ON(len < 0);
1357 BUG_ON(len > skb->len);
1358
1359
1360
1361
1362
1363 mss = tcp_skb_mss(skb);
1364
1365
1366
1367
1368 if (mss != tcp_skb_seglen(prev))
1369 goto fallback;
1370
1371 if (len == mss) {
1372 pcount = 1;
1373 } else if (len < mss) {
1374 goto noop;
1375 } else {
1376 pcount = len / mss;
1377 len = pcount * mss;
1378 }
1379 }
1380
1381
1382 if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
1383 goto fallback;
1384
1385 if (!skb_shift(prev, skb, len))
1386 goto fallback;
1387 if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
1388 goto out;
1389
1390
1391
1392
1393 if (prev == tcp_write_queue_tail(sk))
1394 goto out;
1395 skb = tcp_write_queue_next(sk, prev);
1396
1397 if (!skb_can_shift(skb) ||
1398 (skb == tcp_send_head(sk)) ||
1399 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1400 (mss != tcp_skb_seglen(skb)))
1401 goto out;
1402
1403 len = skb->len;
1404 if (skb_shift(prev, skb, len)) {
1405 pcount += tcp_skb_pcount(skb);
1406 tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
1407 }
1408
1409out:
1410 state->fack_count += pcount;
1411 return prev;
1412
1413noop:
1414 return skb;
1415
1416fallback:
1417 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
1418 return NULL;
1419}
1420
1421static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1422 struct tcp_sack_block *next_dup,
1423 struct tcp_sacktag_state *state,
1424 u32 start_seq, u32 end_seq,
1425 bool dup_sack_in)
1426{
1427 struct tcp_sock *tp = tcp_sk(sk);
1428 struct sk_buff *tmp;
1429
1430 tcp_for_write_queue_from(skb, sk) {
1431 int in_sack = 0;
1432 bool dup_sack = dup_sack_in;
1433
1434 if (skb == tcp_send_head(sk))
1435 break;
1436
1437
1438 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1439 break;
1440
1441 if ((next_dup != NULL) &&
1442 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
1443 in_sack = tcp_match_skb_to_sack(sk, skb,
1444 next_dup->start_seq,
1445 next_dup->end_seq);
1446 if (in_sack > 0)
1447 dup_sack = true;
1448 }
1449
1450
1451
1452
1453
1454 if (in_sack <= 0) {
1455 tmp = tcp_shift_skb_data(sk, skb, state,
1456 start_seq, end_seq, dup_sack);
1457 if (tmp != NULL) {
1458 if (tmp != skb) {
1459 skb = tmp;
1460 continue;
1461 }
1462
1463 in_sack = 0;
1464 } else {
1465 in_sack = tcp_match_skb_to_sack(sk, skb,
1466 start_seq,
1467 end_seq);
1468 }
1469 }
1470
1471 if (unlikely(in_sack < 0))
1472 break;
1473
1474 if (in_sack) {
1475 TCP_SKB_CB(skb)->sacked =
1476 tcp_sacktag_one(sk,
1477 state,
1478 TCP_SKB_CB(skb)->sacked,
1479 TCP_SKB_CB(skb)->seq,
1480 TCP_SKB_CB(skb)->end_seq,
1481 dup_sack,
1482 tcp_skb_pcount(skb));
1483
1484 if (!before(TCP_SKB_CB(skb)->seq,
1485 tcp_highest_sack_seq(tp)))
1486 tcp_advance_highest_sack(sk, skb);
1487 }
1488
1489 state->fack_count += tcp_skb_pcount(skb);
1490 }
1491 return skb;
1492}
1493
1494
1495
1496
1497static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1498 struct tcp_sacktag_state *state,
1499 u32 skip_to_seq)
1500{
1501 tcp_for_write_queue_from(skb, sk) {
1502 if (skb == tcp_send_head(sk))
1503 break;
1504
1505 if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
1506 break;
1507
1508 state->fack_count += tcp_skb_pcount(skb);
1509 }
1510 return skb;
1511}
1512
1513static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1514 struct sock *sk,
1515 struct tcp_sack_block *next_dup,
1516 struct tcp_sacktag_state *state,
1517 u32 skip_to_seq)
1518{
1519 if (next_dup == NULL)
1520 return skb;
1521
1522 if (before(next_dup->start_seq, skip_to_seq)) {
1523 skb = tcp_sacktag_skip(skb, sk, state, next_dup->start_seq);
1524 skb = tcp_sacktag_walk(skb, sk, NULL, state,
1525 next_dup->start_seq, next_dup->end_seq,
1526 1);
1527 }
1528
1529 return skb;
1530}
1531
1532static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
1533{
1534 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1535}
1536
1537static int
1538tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1539 u32 prior_snd_una)
1540{
1541 struct tcp_sock *tp = tcp_sk(sk);
1542 const unsigned char *ptr = (skb_transport_header(ack_skb) +
1543 TCP_SKB_CB(ack_skb)->sacked);
1544 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1545 struct tcp_sack_block sp[TCP_NUM_SACKS];
1546 struct tcp_sack_block *cache;
1547 struct tcp_sacktag_state state;
1548 struct sk_buff *skb;
1549 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1550 int used_sacks;
1551 bool found_dup_sack = false;
1552 int i, j;
1553 int first_sack_index;
1554
1555 state.flag = 0;
1556 state.reord = tp->packets_out;
1557
1558 if (!tp->sacked_out) {
1559 if (WARN_ON(tp->fackets_out))
1560 tp->fackets_out = 0;
1561 tcp_highest_sack_reset(sk);
1562 }
1563
1564 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1565 num_sacks, prior_snd_una);
1566 if (found_dup_sack)
1567 state.flag |= FLAG_DSACKING_ACK;
1568
1569
1570
1571
1572
1573 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
1574 return 0;
1575
1576 if (!tp->packets_out)
1577 goto out;
1578
1579 used_sacks = 0;
1580 first_sack_index = 0;
1581 for (i = 0; i < num_sacks; i++) {
1582 bool dup_sack = !i && found_dup_sack;
1583
1584 sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
1585 sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
1586
1587 if (!tcp_is_sackblock_valid(tp, dup_sack,
1588 sp[used_sacks].start_seq,
1589 sp[used_sacks].end_seq)) {
1590 int mib_idx;
1591
1592 if (dup_sack) {
1593 if (!tp->undo_marker)
1594 mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
1595 else
1596 mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
1597 } else {
1598
1599 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1600 !after(sp[used_sacks].end_seq, tp->snd_una))
1601 continue;
1602 mib_idx = LINUX_MIB_TCPSACKDISCARD;
1603 }
1604
1605 NET_INC_STATS_BH(sock_net(sk), mib_idx);
1606 if (i == 0)
1607 first_sack_index = -1;
1608 continue;
1609 }
1610
1611
1612 if (!after(sp[used_sacks].end_seq, prior_snd_una))
1613 continue;
1614
1615 used_sacks++;
1616 }
1617
1618
1619 for (i = used_sacks - 1; i > 0; i--) {
1620 for (j = 0; j < i; j++) {
1621 if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
1622 swap(sp[j], sp[j + 1]);
1623
1624
1625 if (j == first_sack_index)
1626 first_sack_index = j + 1;
1627 }
1628 }
1629 }
1630
1631 skb = tcp_write_queue_head(sk);
1632 state.fack_count = 0;
1633 i = 0;
1634
1635 if (!tp->sacked_out) {
1636
1637 cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1638 } else {
1639 cache = tp->recv_sack_cache;
1640
1641 while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
1642 !cache->end_seq)
1643 cache++;
1644 }
1645
1646 while (i < used_sacks) {
1647 u32 start_seq = sp[i].start_seq;
1648 u32 end_seq = sp[i].end_seq;
1649 bool dup_sack = (found_dup_sack && (i == first_sack_index));
1650 struct tcp_sack_block *next_dup = NULL;
1651
1652 if (found_dup_sack && ((i + 1) == first_sack_index))
1653 next_dup = &sp[i + 1];
1654
1655
1656 while (tcp_sack_cache_ok(tp, cache) &&
1657 !before(start_seq, cache->end_seq))
1658 cache++;
1659
1660
1661 if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
1662 after(end_seq, cache->start_seq)) {
1663
1664
1665 if (before(start_seq, cache->start_seq)) {
1666 skb = tcp_sacktag_skip(skb, sk, &state,
1667 start_seq);
1668 skb = tcp_sacktag_walk(skb, sk, next_dup,
1669 &state,
1670 start_seq,
1671 cache->start_seq,
1672 dup_sack);
1673 }
1674
1675
1676 if (!after(end_seq, cache->end_seq))
1677 goto advance_sp;
1678
1679 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1680 &state,
1681 cache->end_seq);
1682
1683
1684 if (tcp_highest_sack_seq(tp) == cache->end_seq) {
1685
1686 skb = tcp_highest_sack(sk);
1687 if (skb == NULL)
1688 break;
1689 state.fack_count = tp->fackets_out;
1690 cache++;
1691 goto walk;
1692 }
1693
1694 skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq);
1695
1696 cache++;
1697 continue;
1698 }
1699
1700 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
1701 skb = tcp_highest_sack(sk);
1702 if (skb == NULL)
1703 break;
1704 state.fack_count = tp->fackets_out;
1705 }
1706 skb = tcp_sacktag_skip(skb, sk, &state, start_seq);
1707
1708walk:
1709 skb = tcp_sacktag_walk(skb, sk, next_dup, &state,
1710 start_seq, end_seq, dup_sack);
1711
1712advance_sp:
1713 i++;
1714 }
1715
1716
1717 for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
1718 tp->recv_sack_cache[i].start_seq = 0;
1719 tp->recv_sack_cache[i].end_seq = 0;
1720 }
1721 for (j = 0; j < used_sacks; j++)
1722 tp->recv_sack_cache[i++] = sp[j];
1723
1724 tcp_mark_lost_retrans(sk);
1725
1726 tcp_verify_left_out(tp);
1727
1728 if ((state.reord < tp->fackets_out) &&
1729 ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
1730 tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
1731
1732out:
1733
1734#if FASTRETRANS_DEBUG > 0
1735 WARN_ON((int)tp->sacked_out < 0);
1736 WARN_ON((int)tp->lost_out < 0);
1737 WARN_ON((int)tp->retrans_out < 0);
1738 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1739#endif
1740 return state.flag;
1741}
1742
1743
1744
1745
1746static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
1747{
1748 u32 holes;
1749
1750 holes = max(tp->lost_out, 1U);
1751 holes = min(holes, tp->packets_out);
1752
1753 if ((tp->sacked_out + holes) > tp->packets_out) {
1754 tp->sacked_out = tp->packets_out - holes;
1755 return true;
1756 }
1757 return false;
1758}
1759
1760
1761
1762
1763
1764static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1765{
1766 struct tcp_sock *tp = tcp_sk(sk);
1767 if (tcp_limit_reno_sacked(tp))
1768 tcp_update_reordering(sk, tp->packets_out + addend, 0);
1769}
1770
1771
1772
1773static void tcp_add_reno_sack(struct sock *sk)
1774{
1775 struct tcp_sock *tp = tcp_sk(sk);
1776 tp->sacked_out++;
1777 tcp_check_reno_reordering(sk, 0);
1778 tcp_verify_left_out(tp);
1779}
1780
1781
1782
1783static void tcp_remove_reno_sacks(struct sock *sk, int acked)
1784{
1785 struct tcp_sock *tp = tcp_sk(sk);
1786
1787 if (acked > 0) {
1788
1789 if (acked - 1 >= tp->sacked_out)
1790 tp->sacked_out = 0;
1791 else
1792 tp->sacked_out -= acked - 1;
1793 }
1794 tcp_check_reno_reordering(sk, acked);
1795 tcp_verify_left_out(tp);
1796}
1797
1798static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1799{
1800 tp->sacked_out = 0;
1801}
1802
1803static void tcp_clear_retrans_partial(struct tcp_sock *tp)
1804{
1805 tp->retrans_out = 0;
1806 tp->lost_out = 0;
1807
1808 tp->undo_marker = 0;
1809 tp->undo_retrans = 0;
1810}
1811
1812void tcp_clear_retrans(struct tcp_sock *tp)
1813{
1814 tcp_clear_retrans_partial(tp);
1815
1816 tp->fackets_out = 0;
1817 tp->sacked_out = 0;
1818}
1819
1820
1821
1822
1823
1824void tcp_enter_loss(struct sock *sk, int how)
1825{
1826 const struct inet_connection_sock *icsk = inet_csk(sk);
1827 struct tcp_sock *tp = tcp_sk(sk);
1828 struct sk_buff *skb;
1829 bool new_recovery = false;
1830
1831
1832 if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
1833 !after(tp->high_seq, tp->snd_una) ||
1834 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
1835 new_recovery = true;
1836 tp->prior_ssthresh = tcp_current_ssthresh(sk);
1837 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1838 tcp_ca_event(sk, CA_EVENT_LOSS);
1839 }
1840 tp->snd_cwnd = 1;
1841 tp->snd_cwnd_cnt = 0;
1842 tp->snd_cwnd_stamp = tcp_time_stamp;
1843
1844 tcp_clear_retrans_partial(tp);
1845
1846 if (tcp_is_reno(tp))
1847 tcp_reset_reno_sack(tp);
1848
1849 tp->undo_marker = tp->snd_una;
1850 if (how) {
1851 tp->sacked_out = 0;
1852 tp->fackets_out = 0;
1853 }
1854 tcp_clear_all_retrans_hints(tp);
1855
1856 tcp_for_write_queue(skb, sk) {
1857 if (skb == tcp_send_head(sk))
1858 break;
1859
1860 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1861 tp->undo_marker = 0;
1862 TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
1863 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
1864 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
1865 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1866 tp->lost_out += tcp_skb_pcount(skb);
1867 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
1868 }
1869 }
1870 tcp_verify_left_out(tp);
1871
1872 tp->reordering = min_t(unsigned int, tp->reordering,
1873 sysctl_tcp_reordering);
1874 tcp_set_ca_state(sk, TCP_CA_Loss);
1875 tp->high_seq = tp->snd_nxt;
1876 TCP_ECN_queue_cwr(tp);
1877
1878
1879
1880
1881
1882 tp->frto = sysctl_tcp_frto &&
1883 (new_recovery || icsk->icsk_retransmits) &&
1884 !inet_csk(sk)->icsk_mtup.probe_size;
1885}
1886
1887
1888
1889
1890
1891
1892
1893static bool tcp_check_sack_reneging(struct sock *sk, int flag)
1894{
1895 if (flag & FLAG_SACK_RENEGING) {
1896 struct inet_connection_sock *icsk = inet_csk(sk);
1897 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
1898
1899 tcp_enter_loss(sk, 1);
1900 icsk->icsk_retransmits++;
1901 tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
1902 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
1903 icsk->icsk_rto, TCP_RTO_MAX);
1904 return true;
1905 }
1906 return false;
1907}
1908
1909static inline int tcp_fackets_out(const struct tcp_sock *tp)
1910{
1911 return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
1912}
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
1930{
1931 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
1932}
1933
1934static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
1935{
1936 struct tcp_sock *tp = tcp_sk(sk);
1937 unsigned long delay;
1938
1939
1940
1941
1942
1943 if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 ||
1944 (flag & FLAG_ECE) || !tp->srtt)
1945 return false;
1946
1947 delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2));
1948 if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
1949 return false;
1950
1951 inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay,
1952 TCP_RTO_MAX);
1953 return true;
1954}
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049static bool tcp_time_to_recover(struct sock *sk, int flag)
2050{
2051 struct tcp_sock *tp = tcp_sk(sk);
2052 __u32 packets_out;
2053
2054
2055 if (tp->lost_out)
2056 return true;
2057
2058
2059 if (tcp_dupack_heuristics(tp) > tp->reordering)
2060 return true;
2061
2062
2063
2064
2065 packets_out = tp->packets_out;
2066 if (packets_out <= tp->reordering &&
2067 tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
2068 !tcp_may_send_now(sk)) {
2069
2070
2071
2072 return true;
2073 }
2074
2075
2076
2077
2078
2079
2080 if ((tp->thin_dupack || sysctl_tcp_thin_dupack) &&
2081 tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 &&
2082 tcp_is_sack(tp) && !tcp_send_head(sk))
2083 return true;
2084
2085
2086
2087
2088
2089
2090 if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
2091 (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) &&
2092 !tcp_may_send_now(sk))
2093 return !tcp_pause_early_retransmit(sk, flag);
2094
2095 return false;
2096}
2097
2098
2099
2100
2101
2102
2103
2104static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2105{
2106 struct tcp_sock *tp = tcp_sk(sk);
2107 struct sk_buff *skb;
2108 int cnt, oldcnt;
2109 int err;
2110 unsigned int mss;
2111
2112 const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
2113
2114 WARN_ON(packets > tp->packets_out);
2115 if (tp->lost_skb_hint) {
2116 skb = tp->lost_skb_hint;
2117 cnt = tp->lost_cnt_hint;
2118
2119 if (mark_head && skb != tcp_write_queue_head(sk))
2120 return;
2121 } else {
2122 skb = tcp_write_queue_head(sk);
2123 cnt = 0;
2124 }
2125
2126 tcp_for_write_queue_from(skb, sk) {
2127 if (skb == tcp_send_head(sk))
2128 break;
2129
2130
2131 tp->lost_skb_hint = skb;
2132 tp->lost_cnt_hint = cnt;
2133
2134 if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
2135 break;
2136
2137 oldcnt = cnt;
2138 if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
2139 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2140 cnt += tcp_skb_pcount(skb);
2141
2142 if (cnt > packets) {
2143 if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
2144 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
2145 (oldcnt >= packets))
2146 break;
2147
2148 mss = skb_shinfo(skb)->gso_size;
2149 err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss);
2150 if (err < 0)
2151 break;
2152 cnt = packets;
2153 }
2154
2155 tcp_skb_mark_lost(tp, skb);
2156
2157 if (mark_head)
2158 break;
2159 }
2160 tcp_verify_left_out(tp);
2161}
2162
2163
2164
2165static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2166{
2167 struct tcp_sock *tp = tcp_sk(sk);
2168
2169 if (tcp_is_reno(tp)) {
2170 tcp_mark_head_lost(sk, 1, 1);
2171 } else if (tcp_is_fack(tp)) {
2172 int lost = tp->fackets_out - tp->reordering;
2173 if (lost <= 0)
2174 lost = 1;
2175 tcp_mark_head_lost(sk, lost, 0);
2176 } else {
2177 int sacked_upto = tp->sacked_out - tp->reordering;
2178 if (sacked_upto >= 0)
2179 tcp_mark_head_lost(sk, sacked_upto, 0);
2180 else if (fast_rexmit)
2181 tcp_mark_head_lost(sk, 1, 1);
2182 }
2183}
2184
2185
2186
2187
2188static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
2189{
2190 tp->snd_cwnd = min(tp->snd_cwnd,
2191 tcp_packets_in_flight(tp) + tcp_max_burst(tp));
2192 tp->snd_cwnd_stamp = tcp_time_stamp;
2193}
2194
2195
2196
2197
2198static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
2199{
2200 return !tp->retrans_stamp ||
2201 (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2202 before(tp->rx_opt.rcv_tsecr, tp->retrans_stamp));
2203}
2204
2205
2206
2207#if FASTRETRANS_DEBUG > 1
2208static void DBGUNDO(struct sock *sk, const char *msg)
2209{
2210 struct tcp_sock *tp = tcp_sk(sk);
2211 struct inet_sock *inet = inet_sk(sk);
2212
2213 if (sk->sk_family == AF_INET) {
2214 pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2215 msg,
2216 &inet->inet_daddr, ntohs(inet->inet_dport),
2217 tp->snd_cwnd, tcp_left_out(tp),
2218 tp->snd_ssthresh, tp->prior_ssthresh,
2219 tp->packets_out);
2220 }
2221#if IS_ENABLED(CONFIG_IPV6)
2222 else if (sk->sk_family == AF_INET6) {
2223 struct ipv6_pinfo *np = inet6_sk(sk);
2224 pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2225 msg,
2226 &np->daddr, ntohs(inet->inet_dport),
2227 tp->snd_cwnd, tcp_left_out(tp),
2228 tp->snd_ssthresh, tp->prior_ssthresh,
2229 tp->packets_out);
2230 }
2231#endif
2232}
2233#else
2234#define DBGUNDO(x...) do { } while (0)
2235#endif
2236
2237static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
2238{
2239 struct tcp_sock *tp = tcp_sk(sk);
2240
2241 if (unmark_loss) {
2242 struct sk_buff *skb;
2243
2244 tcp_for_write_queue(skb, sk) {
2245 if (skb == tcp_send_head(sk))
2246 break;
2247 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2248 }
2249 tp->lost_out = 0;
2250 tcp_clear_all_retrans_hints(tp);
2251 }
2252
2253 if (tp->prior_ssthresh) {
2254 const struct inet_connection_sock *icsk = inet_csk(sk);
2255
2256 if (icsk->icsk_ca_ops->undo_cwnd)
2257 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
2258 else
2259 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
2260
2261 if (tp->prior_ssthresh > tp->snd_ssthresh) {
2262 tp->snd_ssthresh = tp->prior_ssthresh;
2263 TCP_ECN_withdraw_cwr(tp);
2264 }
2265 } else {
2266 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
2267 }
2268 tp->snd_cwnd_stamp = tcp_time_stamp;
2269 tp->undo_marker = 0;
2270}
2271
2272static inline bool tcp_may_undo(const struct tcp_sock *tp)
2273{
2274 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2275}
2276
2277
2278static bool tcp_try_undo_recovery(struct sock *sk)
2279{
2280 struct tcp_sock *tp = tcp_sk(sk);
2281
2282 if (tcp_may_undo(tp)) {
2283 int mib_idx;
2284
2285
2286
2287
2288 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2289 tcp_undo_cwnd_reduction(sk, false);
2290 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2291 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2292 else
2293 mib_idx = LINUX_MIB_TCPFULLUNDO;
2294
2295 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2296 }
2297 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2298
2299
2300
2301 tcp_moderate_cwnd(tp);
2302 return true;
2303 }
2304 tcp_set_ca_state(sk, TCP_CA_Open);
2305 return false;
2306}
2307
2308
2309static bool tcp_try_undo_dsack(struct sock *sk)
2310{
2311 struct tcp_sock *tp = tcp_sk(sk);
2312
2313 if (tp->undo_marker && !tp->undo_retrans) {
2314 DBGUNDO(sk, "D-SACK");
2315 tcp_undo_cwnd_reduction(sk, false);
2316 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2317 return true;
2318 }
2319 return false;
2320}
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336static bool tcp_any_retrans_done(const struct sock *sk)
2337{
2338 const struct tcp_sock *tp = tcp_sk(sk);
2339 struct sk_buff *skb;
2340
2341 if (tp->retrans_out)
2342 return true;
2343
2344 skb = tcp_write_queue_head(sk);
2345 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2346 return true;
2347
2348 return false;
2349}
2350
2351
2352static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
2353{
2354 struct tcp_sock *tp = tcp_sk(sk);
2355
2356 if (frto_undo || tcp_may_undo(tp)) {
2357 tcp_undo_cwnd_reduction(sk, true);
2358
2359 DBGUNDO(sk, "partial loss");
2360 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2361 if (frto_undo)
2362 NET_INC_STATS_BH(sock_net(sk),
2363 LINUX_MIB_TCPSPURIOUSRTOS);
2364 inet_csk(sk)->icsk_retransmits = 0;
2365 if (frto_undo || tcp_is_sack(tp))
2366 tcp_set_ca_state(sk, TCP_CA_Open);
2367 return true;
2368 }
2369 return false;
2370}
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
2383{
2384 struct tcp_sock *tp = tcp_sk(sk);
2385
2386 tp->high_seq = tp->snd_nxt;
2387 tp->tlp_high_seq = 0;
2388 tp->snd_cwnd_cnt = 0;
2389 tp->prior_cwnd = tp->snd_cwnd;
2390 tp->prr_delivered = 0;
2391 tp->prr_out = 0;
2392 if (set_ssthresh)
2393 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
2394 TCP_ECN_queue_cwr(tp);
2395}
2396
2397static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
2398 int fast_rexmit)
2399{
2400 struct tcp_sock *tp = tcp_sk(sk);
2401 int sndcnt = 0;
2402 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2403 int newly_acked_sacked = prior_unsacked -
2404 (tp->packets_out - tp->sacked_out);
2405
2406 tp->prr_delivered += newly_acked_sacked;
2407 if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
2408 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2409 tp->prior_cwnd - 1;
2410 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2411 } else {
2412 sndcnt = min_t(int, delta,
2413 max_t(int, tp->prr_delivered - tp->prr_out,
2414 newly_acked_sacked) + 1);
2415 }
2416
2417 sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
2418 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2419}
2420
2421static inline void tcp_end_cwnd_reduction(struct sock *sk)
2422{
2423 struct tcp_sock *tp = tcp_sk(sk);
2424
2425
2426 if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
2427 (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
2428 tp->snd_cwnd = tp->snd_ssthresh;
2429 tp->snd_cwnd_stamp = tcp_time_stamp;
2430 }
2431 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2432}
2433
2434
2435void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
2436{
2437 struct tcp_sock *tp = tcp_sk(sk);
2438
2439 tp->prior_ssthresh = 0;
2440 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2441 tp->undo_marker = 0;
2442 tcp_init_cwnd_reduction(sk, set_ssthresh);
2443 tcp_set_ca_state(sk, TCP_CA_CWR);
2444 }
2445}
2446
2447static void tcp_try_keep_open(struct sock *sk)
2448{
2449 struct tcp_sock *tp = tcp_sk(sk);
2450 int state = TCP_CA_Open;
2451
2452 if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
2453 state = TCP_CA_Disorder;
2454
2455 if (inet_csk(sk)->icsk_ca_state != state) {
2456 tcp_set_ca_state(sk, state);
2457 tp->high_seq = tp->snd_nxt;
2458 }
2459}
2460
2461static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
2462{
2463 struct tcp_sock *tp = tcp_sk(sk);
2464
2465 tcp_verify_left_out(tp);
2466
2467 if (!tcp_any_retrans_done(sk))
2468 tp->retrans_stamp = 0;
2469
2470 if (flag & FLAG_ECE)
2471 tcp_enter_cwr(sk, 1);
2472
2473 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2474 tcp_try_keep_open(sk);
2475 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
2476 tcp_moderate_cwnd(tp);
2477 } else {
2478 tcp_cwnd_reduction(sk, prior_unsacked, 0);
2479 }
2480}
2481
2482static void tcp_mtup_probe_failed(struct sock *sk)
2483{
2484 struct inet_connection_sock *icsk = inet_csk(sk);
2485
2486 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2487 icsk->icsk_mtup.probe_size = 0;
2488}
2489
2490static void tcp_mtup_probe_success(struct sock *sk)
2491{
2492 struct tcp_sock *tp = tcp_sk(sk);
2493 struct inet_connection_sock *icsk = inet_csk(sk);
2494
2495
2496 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2497 tp->snd_cwnd = tp->snd_cwnd *
2498 tcp_mss_to_mtu(sk, tp->mss_cache) /
2499 icsk->icsk_mtup.probe_size;
2500 tp->snd_cwnd_cnt = 0;
2501 tp->snd_cwnd_stamp = tcp_time_stamp;
2502 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2503
2504 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2505 icsk->icsk_mtup.probe_size = 0;
2506 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2507}
2508
2509
2510
2511
2512
2513void tcp_simple_retransmit(struct sock *sk)
2514{
2515 const struct inet_connection_sock *icsk = inet_csk(sk);
2516 struct tcp_sock *tp = tcp_sk(sk);
2517 struct sk_buff *skb;
2518 unsigned int mss = tcp_current_mss(sk);
2519 u32 prior_lost = tp->lost_out;
2520
2521 tcp_for_write_queue(skb, sk) {
2522 if (skb == tcp_send_head(sk))
2523 break;
2524 if (tcp_skb_seglen(skb) > mss &&
2525 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2526 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2527 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2528 tp->retrans_out -= tcp_skb_pcount(skb);
2529 }
2530 tcp_skb_mark_lost_uncond_verify(tp, skb);
2531 }
2532 }
2533
2534 tcp_clear_retrans_hints_partial(tp);
2535
2536 if (prior_lost == tp->lost_out)
2537 return;
2538
2539 if (tcp_is_reno(tp))
2540 tcp_limit_reno_sacked(tp);
2541
2542 tcp_verify_left_out(tp);
2543
2544
2545
2546
2547
2548
2549 if (icsk->icsk_ca_state != TCP_CA_Loss) {
2550 tp->high_seq = tp->snd_nxt;
2551 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2552 tp->prior_ssthresh = 0;
2553 tp->undo_marker = 0;
2554 tcp_set_ca_state(sk, TCP_CA_Loss);
2555 }
2556 tcp_xmit_retransmit_queue(sk);
2557}
2558EXPORT_SYMBOL(tcp_simple_retransmit);
2559
2560static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2561{
2562 struct tcp_sock *tp = tcp_sk(sk);
2563 int mib_idx;
2564
2565 if (tcp_is_reno(tp))
2566 mib_idx = LINUX_MIB_TCPRENORECOVERY;
2567 else
2568 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
2569
2570 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2571
2572 tp->prior_ssthresh = 0;
2573 tp->undo_marker = tp->snd_una;
2574 tp->undo_retrans = tp->retrans_out;
2575
2576 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2577 if (!ece_ack)
2578 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2579 tcp_init_cwnd_reduction(sk, true);
2580 }
2581 tcp_set_ca_state(sk, TCP_CA_Recovery);
2582}
2583
2584
2585
2586
2587static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
2588{
2589 struct inet_connection_sock *icsk = inet_csk(sk);
2590 struct tcp_sock *tp = tcp_sk(sk);
2591 bool recovered = !before(tp->snd_una, tp->high_seq);
2592
2593 if (tp->frto) {
2594 if (flag & FLAG_ORIG_SACK_ACKED) {
2595
2596
2597
2598 tcp_try_undo_loss(sk, true);
2599 return;
2600 }
2601 if (after(tp->snd_nxt, tp->high_seq) &&
2602 (flag & FLAG_DATA_SACKED || is_dupack)) {
2603 tp->frto = 0;
2604 } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
2605 tp->high_seq = tp->snd_nxt;
2606 __tcp_push_pending_frames(sk, tcp_current_mss(sk),
2607 TCP_NAGLE_OFF);
2608 if (after(tp->snd_nxt, tp->high_seq))
2609 return;
2610 tp->frto = 0;
2611 }
2612 }
2613
2614 if (recovered) {
2615
2616 icsk->icsk_retransmits = 0;
2617 tcp_try_undo_recovery(sk);
2618 return;
2619 }
2620 if (flag & FLAG_DATA_ACKED)
2621 icsk->icsk_retransmits = 0;
2622 if (tcp_is_reno(tp)) {
2623
2624
2625
2626 if (after(tp->snd_nxt, tp->high_seq) && is_dupack)
2627 tcp_add_reno_sack(sk);
2628 else if (flag & FLAG_SND_UNA_ADVANCED)
2629 tcp_reset_reno_sack(tp);
2630 }
2631 if (tcp_try_undo_loss(sk, false))
2632 return;
2633 tcp_xmit_retransmit_queue(sk);
2634}
2635
2636
2637static bool tcp_try_undo_partial(struct sock *sk, const int acked,
2638 const int prior_unsacked)
2639{
2640 struct tcp_sock *tp = tcp_sk(sk);
2641
2642 if (tp->undo_marker && tcp_packet_delayed(tp)) {
2643
2644
2645
2646 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
2647
2648
2649
2650
2651
2652
2653 if (tp->retrans_out) {
2654 tcp_cwnd_reduction(sk, prior_unsacked, 0);
2655 return true;
2656 }
2657
2658 if (!tcp_any_retrans_done(sk))
2659 tp->retrans_stamp = 0;
2660
2661 DBGUNDO(sk, "partial recovery");
2662 tcp_undo_cwnd_reduction(sk, true);
2663 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2664 tcp_try_keep_open(sk);
2665 return true;
2666 }
2667 return false;
2668}
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2682 const int prior_unsacked,
2683 bool is_dupack, int flag)
2684{
2685 struct inet_connection_sock *icsk = inet_csk(sk);
2686 struct tcp_sock *tp = tcp_sk(sk);
2687 bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
2688 (tcp_fackets_out(tp) > tp->reordering));
2689 int fast_rexmit = 0;
2690
2691 if (WARN_ON(!tp->packets_out && tp->sacked_out))
2692 tp->sacked_out = 0;
2693 if (WARN_ON(!tp->sacked_out && tp->fackets_out))
2694 tp->fackets_out = 0;
2695
2696
2697
2698 if (flag & FLAG_ECE)
2699 tp->prior_ssthresh = 0;
2700
2701
2702 if (tcp_check_sack_reneging(sk, flag))
2703 return;
2704
2705
2706 tcp_verify_left_out(tp);
2707
2708
2709
2710 if (icsk->icsk_ca_state == TCP_CA_Open) {
2711 WARN_ON(tp->retrans_out != 0);
2712 tp->retrans_stamp = 0;
2713 } else if (!before(tp->snd_una, tp->high_seq)) {
2714 switch (icsk->icsk_ca_state) {
2715 case TCP_CA_CWR:
2716
2717
2718 if (tp->snd_una != tp->high_seq) {
2719 tcp_end_cwnd_reduction(sk);
2720 tcp_set_ca_state(sk, TCP_CA_Open);
2721 }
2722 break;
2723
2724 case TCP_CA_Recovery:
2725 if (tcp_is_reno(tp))
2726 tcp_reset_reno_sack(tp);
2727 if (tcp_try_undo_recovery(sk))
2728 return;
2729 tcp_end_cwnd_reduction(sk);
2730 break;
2731 }
2732 }
2733
2734
2735 switch (icsk->icsk_ca_state) {
2736 case TCP_CA_Recovery:
2737 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
2738 if (tcp_is_reno(tp) && is_dupack)
2739 tcp_add_reno_sack(sk);
2740 } else {
2741 if (tcp_try_undo_partial(sk, acked, prior_unsacked))
2742 return;
2743
2744 do_lost = tcp_is_reno(tp) ||
2745 tcp_fackets_out(tp) > tp->reordering;
2746 }
2747 if (tcp_try_undo_dsack(sk)) {
2748 tcp_try_keep_open(sk);
2749 return;
2750 }
2751 break;
2752 case TCP_CA_Loss:
2753 tcp_process_loss(sk, flag, is_dupack);
2754 if (icsk->icsk_ca_state != TCP_CA_Open)
2755 return;
2756
2757 default:
2758 if (tcp_is_reno(tp)) {
2759 if (flag & FLAG_SND_UNA_ADVANCED)
2760 tcp_reset_reno_sack(tp);
2761 if (is_dupack)
2762 tcp_add_reno_sack(sk);
2763 }
2764
2765 if (icsk->icsk_ca_state <= TCP_CA_Disorder)
2766 tcp_try_undo_dsack(sk);
2767
2768 if (!tcp_time_to_recover(sk, flag)) {
2769 tcp_try_to_open(sk, flag, prior_unsacked);
2770 return;
2771 }
2772
2773
2774 if (icsk->icsk_ca_state < TCP_CA_CWR &&
2775 icsk->icsk_mtup.probe_size &&
2776 tp->snd_una == tp->mtu_probe.probe_seq_start) {
2777 tcp_mtup_probe_failed(sk);
2778
2779 tp->snd_cwnd++;
2780 tcp_simple_retransmit(sk);
2781 return;
2782 }
2783
2784
2785 tcp_enter_recovery(sk, (flag & FLAG_ECE));
2786 fast_rexmit = 1;
2787 }
2788
2789 if (do_lost)
2790 tcp_update_scoreboard(sk, fast_rexmit);
2791 tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit);
2792 tcp_xmit_retransmit_queue(sk);
2793}
2794
2795void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
2796{
2797 tcp_rtt_estimator(sk, seq_rtt);
2798 tcp_set_rto(sk);
2799 inet_csk(sk)->icsk_backoff = 0;
2800}
2801EXPORT_SYMBOL(tcp_valid_rtt_meas);
2802
2803
2804
2805
2806static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
2807{
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823 struct tcp_sock *tp = tcp_sk(sk);
2824
2825 tcp_valid_rtt_meas(sk, tcp_time_stamp - tp->rx_opt.rcv_tsecr);
2826}
2827
2828static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag)
2829{
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839 if (flag & FLAG_RETRANS_DATA_ACKED)
2840 return;
2841
2842 tcp_valid_rtt_meas(sk, seq_rtt);
2843}
2844
2845static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
2846 const s32 seq_rtt)
2847{
2848 const struct tcp_sock *tp = tcp_sk(sk);
2849
2850 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
2851 tcp_ack_saw_tstamp(sk, flag);
2852 else if (seq_rtt >= 0)
2853 tcp_ack_no_tstamp(sk, seq_rtt, flag);
2854}
2855
2856static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
2857{
2858 const struct inet_connection_sock *icsk = inet_csk(sk);
2859 icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight);
2860 tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
2861}
2862
2863
2864
2865
2866void tcp_rearm_rto(struct sock *sk)
2867{
2868 const struct inet_connection_sock *icsk = inet_csk(sk);
2869 struct tcp_sock *tp = tcp_sk(sk);
2870
2871
2872
2873
2874 if (tp->fastopen_rsk)
2875 return;
2876
2877 if (!tp->packets_out) {
2878 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
2879 } else {
2880 u32 rto = inet_csk(sk)->icsk_rto;
2881
2882 if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2883 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2884 struct sk_buff *skb = tcp_write_queue_head(sk);
2885 const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto;
2886 s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);
2887
2888
2889
2890 if (delta > 0)
2891 rto = delta;
2892 }
2893 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
2894 TCP_RTO_MAX);
2895 }
2896}
2897
2898
2899
2900
2901void tcp_resume_early_retransmit(struct sock *sk)
2902{
2903 struct tcp_sock *tp = tcp_sk(sk);
2904
2905 tcp_rearm_rto(sk);
2906
2907
2908 if (!tp->do_early_retrans)
2909 return;
2910
2911 tcp_enter_recovery(sk, false);
2912 tcp_update_scoreboard(sk, 1);
2913 tcp_xmit_retransmit_queue(sk);
2914}
2915
2916
2917static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
2918{
2919 struct tcp_sock *tp = tcp_sk(sk);
2920 u32 packets_acked;
2921
2922 BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
2923
2924 packets_acked = tcp_skb_pcount(skb);
2925 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
2926 return 0;
2927 packets_acked -= tcp_skb_pcount(skb);
2928
2929 if (packets_acked) {
2930 BUG_ON(tcp_skb_pcount(skb) == 0);
2931 BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
2932 }
2933
2934 return packets_acked;
2935}
2936
2937
2938
2939
2940
2941static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
2942 u32 prior_snd_una)
2943{
2944 struct tcp_sock *tp = tcp_sk(sk);
2945 const struct inet_connection_sock *icsk = inet_csk(sk);
2946 struct sk_buff *skb;
2947 u32 now = tcp_time_stamp;
2948 int fully_acked = true;
2949 int flag = 0;
2950 u32 pkts_acked = 0;
2951 u32 reord = tp->packets_out;
2952 u32 prior_sacked = tp->sacked_out;
2953 s32 seq_rtt = -1;
2954 s32 ca_seq_rtt = -1;
2955 ktime_t last_ackt = net_invalid_timestamp();
2956
2957 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
2958 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
2959 u32 acked_pcount;
2960 u8 sacked = scb->sacked;
2961
2962
2963 if (after(scb->end_seq, tp->snd_una)) {
2964 if (tcp_skb_pcount(skb) == 1 ||
2965 !after(tp->snd_una, scb->seq))
2966 break;
2967
2968 acked_pcount = tcp_tso_acked(sk, skb);
2969 if (!acked_pcount)
2970 break;
2971
2972 fully_acked = false;
2973 } else {
2974 acked_pcount = tcp_skb_pcount(skb);
2975 }
2976
2977 if (sacked & TCPCB_RETRANS) {
2978 if (sacked & TCPCB_SACKED_RETRANS)
2979 tp->retrans_out -= acked_pcount;
2980 flag |= FLAG_RETRANS_DATA_ACKED;
2981 ca_seq_rtt = -1;
2982 seq_rtt = -1;
2983 } else {
2984 ca_seq_rtt = now - scb->when;
2985 last_ackt = skb->tstamp;
2986 if (seq_rtt < 0) {
2987 seq_rtt = ca_seq_rtt;
2988 }
2989 if (!(sacked & TCPCB_SACKED_ACKED))
2990 reord = min(pkts_acked, reord);
2991 if (!after(scb->end_seq, tp->high_seq))
2992 flag |= FLAG_ORIG_SACK_ACKED;
2993 }
2994
2995 if (sacked & TCPCB_SACKED_ACKED)
2996 tp->sacked_out -= acked_pcount;
2997 if (sacked & TCPCB_LOST)
2998 tp->lost_out -= acked_pcount;
2999
3000 tp->packets_out -= acked_pcount;
3001 pkts_acked += acked_pcount;
3002
3003
3004
3005
3006
3007
3008
3009
3010 if (!(scb->tcp_flags & TCPHDR_SYN)) {
3011 flag |= FLAG_DATA_ACKED;
3012 } else {
3013 flag |= FLAG_SYN_ACKED;
3014 tp->retrans_stamp = 0;
3015 }
3016
3017 if (!fully_acked)
3018 break;
3019
3020 tcp_unlink_write_queue(skb, sk);
3021 sk_wmem_free_skb(sk, skb);
3022 if (skb == tp->retransmit_skb_hint)
3023 tp->retransmit_skb_hint = NULL;
3024 if (skb == tp->lost_skb_hint)
3025 tp->lost_skb_hint = NULL;
3026 }
3027
3028 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
3029 tp->snd_up = tp->snd_una;
3030
3031 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
3032 flag |= FLAG_SACK_RENEGING;
3033
3034 if (flag & FLAG_ACKED) {
3035 const struct tcp_congestion_ops *ca_ops
3036 = inet_csk(sk)->icsk_ca_ops;
3037
3038 if (unlikely(icsk->icsk_mtup.probe_size &&
3039 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
3040 tcp_mtup_probe_success(sk);
3041 }
3042
3043 tcp_ack_update_rtt(sk, flag, seq_rtt);
3044 tcp_rearm_rto(sk);
3045
3046 if (tcp_is_reno(tp)) {
3047 tcp_remove_reno_sacks(sk, pkts_acked);
3048 } else {
3049 int delta;
3050
3051
3052 if (reord < prior_fackets)
3053 tcp_update_reordering(sk, tp->fackets_out - reord, 0);
3054
3055 delta = tcp_is_fack(tp) ? pkts_acked :
3056 prior_sacked - tp->sacked_out;
3057 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3058 }
3059
3060 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
3061
3062 if (ca_ops->pkts_acked) {
3063 s32 rtt_us = -1;
3064
3065
3066 if (!(flag & FLAG_RETRANS_DATA_ACKED)) {
3067
3068 if (ca_ops->flags & TCP_CONG_RTT_STAMP &&
3069 !ktime_equal(last_ackt,
3070 net_invalid_timestamp()))
3071 rtt_us = ktime_us_delta(ktime_get_real(),
3072 last_ackt);
3073 else if (ca_seq_rtt >= 0)
3074 rtt_us = jiffies_to_usecs(ca_seq_rtt);
3075 }
3076
3077 ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
3078 }
3079 }
3080
3081#if FASTRETRANS_DEBUG > 0
3082 WARN_ON((int)tp->sacked_out < 0);
3083 WARN_ON((int)tp->lost_out < 0);
3084 WARN_ON((int)tp->retrans_out < 0);
3085 if (!tp->packets_out && tcp_is_sack(tp)) {
3086 icsk = inet_csk(sk);
3087 if (tp->lost_out) {
3088 pr_debug("Leak l=%u %d\n",
3089 tp->lost_out, icsk->icsk_ca_state);
3090 tp->lost_out = 0;
3091 }
3092 if (tp->sacked_out) {
3093 pr_debug("Leak s=%u %d\n",
3094 tp->sacked_out, icsk->icsk_ca_state);
3095 tp->sacked_out = 0;
3096 }
3097 if (tp->retrans_out) {
3098 pr_debug("Leak r=%u %d\n",
3099 tp->retrans_out, icsk->icsk_ca_state);
3100 tp->retrans_out = 0;
3101 }
3102 }
3103#endif
3104 return flag;
3105}
3106
3107static void tcp_ack_probe(struct sock *sk)
3108{
3109 const struct tcp_sock *tp = tcp_sk(sk);
3110 struct inet_connection_sock *icsk = inet_csk(sk);
3111
3112
3113
3114 if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
3115 icsk->icsk_backoff = 0;
3116 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
3117
3118
3119
3120 } else {
3121 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3122 min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
3123 TCP_RTO_MAX);
3124 }
3125}
3126
3127static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
3128{
3129 return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3130 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3131}
3132
3133static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3134{
3135 const struct tcp_sock *tp = tcp_sk(sk);
3136 return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
3137 !tcp_in_cwnd_reduction(sk);
3138}
3139
3140
3141
3142
3143static inline bool tcp_may_update_window(const struct tcp_sock *tp,
3144 const u32 ack, const u32 ack_seq,
3145 const u32 nwin)
3146{
3147 return after(ack, tp->snd_una) ||
3148 after(ack_seq, tp->snd_wl1) ||
3149 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
3150}
3151
3152
3153
3154
3155
3156
3157static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
3158 u32 ack_seq)
3159{
3160 struct tcp_sock *tp = tcp_sk(sk);
3161 int flag = 0;
3162 u32 nwin = ntohs(tcp_hdr(skb)->window);
3163
3164 if (likely(!tcp_hdr(skb)->syn))
3165 nwin <<= tp->rx_opt.snd_wscale;
3166
3167 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
3168 flag |= FLAG_WIN_UPDATE;
3169 tcp_update_wl(tp, ack_seq);
3170
3171 if (tp->snd_wnd != nwin) {
3172 tp->snd_wnd = nwin;
3173
3174
3175
3176
3177 tp->pred_flags = 0;
3178 tcp_fast_path_check(sk);
3179
3180 if (nwin > tp->max_window) {
3181 tp->max_window = nwin;
3182 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
3183 }
3184 }
3185 }
3186
3187 tp->snd_una = ack;
3188
3189 return flag;
3190}
3191
3192
3193static void tcp_send_challenge_ack(struct sock *sk)
3194{
3195
3196 static u32 challenge_timestamp;
3197 static unsigned int challenge_count;
3198 u32 now = jiffies / HZ;
3199
3200 if (now != challenge_timestamp) {
3201 challenge_timestamp = now;
3202 challenge_count = 0;
3203 }
3204 if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
3205 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
3206 tcp_send_ack(sk);
3207 }
3208}
3209
3210static void tcp_store_ts_recent(struct tcp_sock *tp)
3211{
3212 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
3213 tp->rx_opt.ts_recent_stamp = get_seconds();
3214}
3215
3216static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3217{
3218 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
3219
3220
3221
3222
3223
3224
3225
3226 if (tcp_paws_check(&tp->rx_opt, 0))
3227 tcp_store_ts_recent(tp);
3228 }
3229}
3230
3231
3232
3233
3234static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
3235{
3236 struct tcp_sock *tp = tcp_sk(sk);
3237 bool is_tlp_dupack = (ack == tp->tlp_high_seq) &&
3238 !(flag & (FLAG_SND_UNA_ADVANCED |
3239 FLAG_NOT_DUP | FLAG_DATA_SACKED));
3240
3241
3242
3243
3244 if (is_tlp_dupack) {
3245 tp->tlp_high_seq = 0;
3246 return;
3247 }
3248
3249 if (after(ack, tp->tlp_high_seq)) {
3250 tp->tlp_high_seq = 0;
3251
3252 if (!(flag & FLAG_DSACKING_ACK)) {
3253 tcp_init_cwnd_reduction(sk, true);
3254 tcp_set_ca_state(sk, TCP_CA_CWR);
3255 tcp_end_cwnd_reduction(sk);
3256 tcp_set_ca_state(sk, TCP_CA_Open);
3257 NET_INC_STATS_BH(sock_net(sk),
3258 LINUX_MIB_TCPLOSSPROBERECOVERY);
3259 }
3260 }
3261}
3262
3263
3264static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3265{
3266 struct inet_connection_sock *icsk = inet_csk(sk);
3267 struct tcp_sock *tp = tcp_sk(sk);
3268 u32 prior_snd_una = tp->snd_una;
3269 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3270 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3271 bool is_dupack = false;
3272 u32 prior_in_flight;
3273 u32 prior_fackets;
3274 int prior_packets = tp->packets_out;
3275 const int prior_unsacked = tp->packets_out - tp->sacked_out;
3276 int acked = 0;
3277
3278
3279
3280
3281 if (before(ack, prior_snd_una)) {
3282
3283 if (before(ack, prior_snd_una - tp->max_window)) {
3284 tcp_send_challenge_ack(sk);
3285 return -1;
3286 }
3287 goto old_ack;
3288 }
3289
3290
3291
3292
3293 if (after(ack, tp->snd_nxt))
3294 goto invalid_ack;
3295
3296 if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
3297 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
3298 tcp_rearm_rto(sk);
3299
3300 if (after(ack, prior_snd_una))
3301 flag |= FLAG_SND_UNA_ADVANCED;
3302
3303 prior_fackets = tp->fackets_out;
3304 prior_in_flight = tcp_packets_in_flight(tp);
3305
3306
3307
3308
3309 if (flag & FLAG_UPDATE_TS_RECENT)
3310 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
3311
3312 if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
3313
3314
3315
3316
3317 tcp_update_wl(tp, ack_seq);
3318 tp->snd_una = ack;
3319 flag |= FLAG_WIN_UPDATE;
3320
3321 tcp_ca_event(sk, CA_EVENT_FAST_ACK);
3322
3323 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS);
3324 } else {
3325 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3326 flag |= FLAG_DATA;
3327 else
3328 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS);
3329
3330 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3331
3332 if (TCP_SKB_CB(skb)->sacked)
3333 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3334
3335 if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
3336 flag |= FLAG_ECE;
3337
3338 tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
3339 }
3340
3341
3342
3343
3344 sk->sk_err_soft = 0;
3345 icsk->icsk_probes_out = 0;
3346 tp->rcv_tstamp = tcp_time_stamp;
3347 if (!prior_packets)
3348 goto no_queue;
3349
3350
3351 acked = tp->packets_out;
3352 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
3353 acked -= tp->packets_out;
3354
3355 if (tcp_ack_is_dubious(sk, flag)) {
3356
3357 if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
3358 tcp_cong_avoid(sk, ack, prior_in_flight);
3359 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3360 tcp_fastretrans_alert(sk, acked, prior_unsacked,
3361 is_dupack, flag);
3362 } else {
3363 if (flag & FLAG_DATA_ACKED)
3364 tcp_cong_avoid(sk, ack, prior_in_flight);
3365 }
3366
3367 if (tp->tlp_high_seq)
3368 tcp_process_tlp_ack(sk, ack, flag);
3369
3370 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
3371 struct dst_entry *dst = __sk_dst_get(sk);
3372 if (dst)
3373 dst_confirm(dst);
3374 }
3375
3376 if (icsk->icsk_pending == ICSK_TIME_RETRANS)
3377 tcp_schedule_loss_probe(sk);
3378 return 1;
3379
3380no_queue:
3381
3382 if (flag & FLAG_DSACKING_ACK)
3383 tcp_fastretrans_alert(sk, acked, prior_unsacked,
3384 is_dupack, flag);
3385
3386
3387
3388
3389 if (tcp_send_head(sk))
3390 tcp_ack_probe(sk);
3391
3392 if (tp->tlp_high_seq)
3393 tcp_process_tlp_ack(sk, ack, flag);
3394 return 1;
3395
3396invalid_ack:
3397 SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3398 return -1;
3399
3400old_ack:
3401
3402
3403
3404 if (TCP_SKB_CB(skb)->sacked) {
3405 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3406 tcp_fastretrans_alert(sk, acked, prior_unsacked,
3407 is_dupack, flag);
3408 }
3409
3410 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3411 return 0;
3412}
3413
3414
3415
3416
3417
3418void tcp_parse_options(const struct sk_buff *skb,
3419 struct tcp_options_received *opt_rx, int estab,
3420 struct tcp_fastopen_cookie *foc)
3421{
3422 const unsigned char *ptr;
3423 const struct tcphdr *th = tcp_hdr(skb);
3424 int length = (th->doff * 4) - sizeof(struct tcphdr);
3425
3426 ptr = (const unsigned char *)(th + 1);
3427 opt_rx->saw_tstamp = 0;
3428
3429 while (length > 0) {
3430 int opcode = *ptr++;
3431 int opsize;
3432
3433 switch (opcode) {
3434 case TCPOPT_EOL:
3435 return;
3436 case TCPOPT_NOP:
3437 length--;
3438 continue;
3439 default:
3440 opsize = *ptr++;
3441 if (opsize < 2)
3442 return;
3443 if (opsize > length)
3444 return;
3445 switch (opcode) {
3446 case TCPOPT_MSS:
3447 if (opsize == TCPOLEN_MSS && th->syn && !estab) {
3448 u16 in_mss = get_unaligned_be16(ptr);
3449 if (in_mss) {
3450 if (opt_rx->user_mss &&
3451 opt_rx->user_mss < in_mss)
3452 in_mss = opt_rx->user_mss;
3453 opt_rx->mss_clamp = in_mss;
3454 }
3455 }
3456 break;
3457 case TCPOPT_WINDOW:
3458 if (opsize == TCPOLEN_WINDOW && th->syn &&
3459 !estab && sysctl_tcp_window_scaling) {
3460 __u8 snd_wscale = *(__u8 *)ptr;
3461 opt_rx->wscale_ok = 1;
3462 if (snd_wscale > 14) {
3463 net_info_ratelimited("%s: Illegal window scaling value %d >14 received\n",
3464 __func__,
3465 snd_wscale);
3466 snd_wscale = 14;
3467 }
3468 opt_rx->snd_wscale = snd_wscale;
3469 }
3470 break;
3471 case TCPOPT_TIMESTAMP:
3472 if ((opsize == TCPOLEN_TIMESTAMP) &&
3473 ((estab && opt_rx->tstamp_ok) ||
3474 (!estab && sysctl_tcp_timestamps))) {
3475 opt_rx->saw_tstamp = 1;
3476 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
3477 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
3478 }
3479 break;
3480 case TCPOPT_SACK_PERM:
3481 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3482 !estab && sysctl_tcp_sack) {
3483 opt_rx->sack_ok = TCP_SACK_SEEN;
3484 tcp_sack_reset(opt_rx);
3485 }
3486 break;
3487
3488 case TCPOPT_SACK:
3489 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
3490 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
3491 opt_rx->sack_ok) {
3492 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
3493 }
3494 break;
3495#ifdef CONFIG_TCP_MD5SIG
3496 case TCPOPT_MD5SIG:
3497
3498
3499
3500
3501 break;
3502#endif
3503 case TCPOPT_EXP:
3504
3505
3506
3507
3508 if (opsize < TCPOLEN_EXP_FASTOPEN_BASE ||
3509 get_unaligned_be16(ptr) != TCPOPT_FASTOPEN_MAGIC ||
3510 foc == NULL || !th->syn || (opsize & 1))
3511 break;
3512 foc->len = opsize - TCPOLEN_EXP_FASTOPEN_BASE;
3513 if (foc->len >= TCP_FASTOPEN_COOKIE_MIN &&
3514 foc->len <= TCP_FASTOPEN_COOKIE_MAX)
3515 memcpy(foc->val, ptr + 2, foc->len);
3516 else if (foc->len != 0)
3517 foc->len = -1;
3518 break;
3519
3520 }
3521 ptr += opsize-2;
3522 length -= opsize;
3523 }
3524 }
3525}
3526EXPORT_SYMBOL(tcp_parse_options);
3527
3528static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
3529{
3530 const __be32 *ptr = (const __be32 *)(th + 1);
3531
3532 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
3533 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
3534 tp->rx_opt.saw_tstamp = 1;
3535 ++ptr;
3536 tp->rx_opt.rcv_tsval = ntohl(*ptr);
3537 ++ptr;
3538 if (*ptr)
3539 tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
3540 else
3541 tp->rx_opt.rcv_tsecr = 0;
3542 return true;
3543 }
3544 return false;
3545}
3546
3547
3548
3549
3550static bool tcp_fast_parse_options(const struct sk_buff *skb,
3551 const struct tcphdr *th, struct tcp_sock *tp)
3552{
3553
3554
3555
3556 if (th->doff == (sizeof(*th) / 4)) {
3557 tp->rx_opt.saw_tstamp = 0;
3558 return false;
3559 } else if (tp->rx_opt.tstamp_ok &&
3560 th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
3561 if (tcp_parse_aligned_timestamp(tp, th))
3562 return true;
3563 }
3564
3565 tcp_parse_options(skb, &tp->rx_opt, 1, NULL);
3566 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
3567 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
3568
3569 return true;
3570}
3571
3572#ifdef CONFIG_TCP_MD5SIG
3573
3574
3575
3576const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
3577{
3578 int length = (th->doff << 2) - sizeof(*th);
3579 const u8 *ptr = (const u8 *)(th + 1);
3580
3581
3582 if (length < TCPOLEN_MD5SIG)
3583 return NULL;
3584
3585 while (length > 0) {
3586 int opcode = *ptr++;
3587 int opsize;
3588
3589 switch(opcode) {
3590 case TCPOPT_EOL:
3591 return NULL;
3592 case TCPOPT_NOP:
3593 length--;
3594 continue;
3595 default:
3596 opsize = *ptr++;
3597 if (opsize < 2 || opsize > length)
3598 return NULL;
3599 if (opcode == TCPOPT_MD5SIG)
3600 return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
3601 }
3602 ptr += opsize - 2;
3603 length -= opsize;
3604 }
3605 return NULL;
3606}
3607EXPORT_SYMBOL(tcp_parse_md5sig_option);
3608#endif
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
3634{
3635 const struct tcp_sock *tp = tcp_sk(sk);
3636 const struct tcphdr *th = tcp_hdr(skb);
3637 u32 seq = TCP_SKB_CB(skb)->seq;
3638 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3639
3640 return (
3641 (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
3642
3643
3644 ack == tp->snd_una &&
3645
3646
3647 !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
3648
3649
3650 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
3651}
3652
3653static inline bool tcp_paws_discard(const struct sock *sk,
3654 const struct sk_buff *skb)
3655{
3656 const struct tcp_sock *tp = tcp_sk(sk);
3657
3658 return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
3659 !tcp_disordered_ack(sk, skb);
3660}
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
3676{
3677 return !before(end_seq, tp->rcv_wup) &&
3678 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
3679}
3680
3681
3682void tcp_reset(struct sock *sk)
3683{
3684
3685 switch (sk->sk_state) {
3686 case TCP_SYN_SENT:
3687 sk->sk_err = ECONNREFUSED;
3688 break;
3689 case TCP_CLOSE_WAIT:
3690 sk->sk_err = EPIPE;
3691 break;
3692 case TCP_CLOSE:
3693 return;
3694 default:
3695 sk->sk_err = ECONNRESET;
3696 }
3697
3698 smp_wmb();
3699
3700 if (!sock_flag(sk, SOCK_DEAD))
3701 sk->sk_error_report(sk);
3702
3703 tcp_done(sk);
3704}
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720static void tcp_fin(struct sock *sk)
3721{
3722 struct tcp_sock *tp = tcp_sk(sk);
3723 const struct dst_entry *dst;
3724
3725 inet_csk_schedule_ack(sk);
3726
3727 sk->sk_shutdown |= RCV_SHUTDOWN;
3728 sock_set_flag(sk, SOCK_DONE);
3729
3730 switch (sk->sk_state) {
3731 case TCP_SYN_RECV:
3732 case TCP_ESTABLISHED:
3733
3734 tcp_set_state(sk, TCP_CLOSE_WAIT);
3735 dst = __sk_dst_get(sk);
3736 if (!dst || !dst_metric(dst, RTAX_QUICKACK))
3737 inet_csk(sk)->icsk_ack.pingpong = 1;
3738 break;
3739
3740 case TCP_CLOSE_WAIT:
3741 case TCP_CLOSING:
3742
3743
3744
3745 break;
3746 case TCP_LAST_ACK:
3747
3748 break;
3749
3750 case TCP_FIN_WAIT1:
3751
3752
3753
3754
3755 tcp_send_ack(sk);
3756 tcp_set_state(sk, TCP_CLOSING);
3757 break;
3758 case TCP_FIN_WAIT2:
3759
3760 tcp_send_ack(sk);
3761 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
3762 break;
3763 default:
3764
3765
3766
3767 pr_err("%s: Impossible, sk->sk_state=%d\n",
3768 __func__, sk->sk_state);
3769 break;
3770 }
3771
3772
3773
3774
3775 __skb_queue_purge(&tp->out_of_order_queue);
3776 if (tcp_is_sack(tp))
3777 tcp_sack_reset(&tp->rx_opt);
3778 sk_mem_reclaim(sk);
3779
3780 if (!sock_flag(sk, SOCK_DEAD)) {
3781 sk->sk_state_change(sk);
3782
3783
3784 if (sk->sk_shutdown == SHUTDOWN_MASK ||
3785 sk->sk_state == TCP_CLOSE)
3786 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
3787 else
3788 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
3789 }
3790}
3791
3792static inline bool tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
3793 u32 end_seq)
3794{
3795 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
3796 if (before(seq, sp->start_seq))
3797 sp->start_seq = seq;
3798 if (after(end_seq, sp->end_seq))
3799 sp->end_seq = end_seq;
3800 return true;
3801 }
3802 return false;
3803}
3804
3805static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
3806{
3807 struct tcp_sock *tp = tcp_sk(sk);
3808
3809 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
3810 int mib_idx;
3811
3812 if (before(seq, tp->rcv_nxt))
3813 mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
3814 else
3815 mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
3816
3817 NET_INC_STATS_BH(sock_net(sk), mib_idx);
3818
3819 tp->rx_opt.dsack = 1;
3820 tp->duplicate_sack[0].start_seq = seq;
3821 tp->duplicate_sack[0].end_seq = end_seq;
3822 }
3823}
3824
3825static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
3826{
3827 struct tcp_sock *tp = tcp_sk(sk);
3828
3829 if (!tp->rx_opt.dsack)
3830 tcp_dsack_set(sk, seq, end_seq);
3831 else
3832 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
3833}
3834
3835static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
3836{
3837 struct tcp_sock *tp = tcp_sk(sk);
3838
3839 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
3840 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
3841 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
3842 tcp_enter_quickack_mode(sk);
3843
3844 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
3845 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
3846
3847 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
3848 end_seq = tp->rcv_nxt;
3849 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
3850 }
3851 }
3852
3853 tcp_send_ack(sk);
3854}
3855
3856
3857
3858
3859static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
3860{
3861 int this_sack;
3862 struct tcp_sack_block *sp = &tp->selective_acks[0];
3863 struct tcp_sack_block *swalk = sp + 1;
3864
3865
3866
3867
3868 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
3869 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
3870 int i;
3871
3872
3873
3874
3875 tp->rx_opt.num_sacks--;
3876 for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
3877 sp[i] = sp[i + 1];
3878 continue;
3879 }
3880 this_sack++, swalk++;
3881 }
3882}
3883
3884static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
3885{
3886 struct tcp_sock *tp = tcp_sk(sk);
3887 struct tcp_sack_block *sp = &tp->selective_acks[0];
3888 int cur_sacks = tp->rx_opt.num_sacks;
3889 int this_sack;
3890
3891 if (!cur_sacks)
3892 goto new_sack;
3893
3894 for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
3895 if (tcp_sack_extend(sp, seq, end_seq)) {
3896
3897 for (; this_sack > 0; this_sack--, sp--)
3898 swap(*sp, *(sp - 1));
3899 if (cur_sacks > 1)
3900 tcp_sack_maybe_coalesce(tp);
3901 return;
3902 }
3903 }
3904
3905
3906
3907
3908
3909
3910
3911 if (this_sack >= TCP_NUM_SACKS) {
3912 this_sack--;
3913 tp->rx_opt.num_sacks--;
3914 sp--;
3915 }
3916 for (; this_sack > 0; this_sack--, sp--)
3917 *sp = *(sp - 1);
3918
3919new_sack:
3920
3921 sp->start_seq = seq;
3922 sp->end_seq = end_seq;
3923 tp->rx_opt.num_sacks++;
3924}
3925
3926
3927
3928static void tcp_sack_remove(struct tcp_sock *tp)
3929{
3930 struct tcp_sack_block *sp = &tp->selective_acks[0];
3931 int num_sacks = tp->rx_opt.num_sacks;
3932 int this_sack;
3933
3934
3935 if (skb_queue_empty(&tp->out_of_order_queue)) {
3936 tp->rx_opt.num_sacks = 0;
3937 return;
3938 }
3939
3940 for (this_sack = 0; this_sack < num_sacks;) {
3941
3942 if (!before(tp->rcv_nxt, sp->start_seq)) {
3943 int i;
3944
3945
3946 WARN_ON(before(tp->rcv_nxt, sp->end_seq));
3947
3948
3949 for (i=this_sack+1; i < num_sacks; i++)
3950 tp->selective_acks[i-1] = tp->selective_acks[i];
3951 num_sacks--;
3952 continue;
3953 }
3954 this_sack++;
3955 sp++;
3956 }
3957 tp->rx_opt.num_sacks = num_sacks;
3958}
3959
3960
3961
3962
3963static void tcp_ofo_queue(struct sock *sk)
3964{
3965 struct tcp_sock *tp = tcp_sk(sk);
3966 __u32 dsack_high = tp->rcv_nxt;
3967 struct sk_buff *skb;
3968
3969 while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
3970 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
3971 break;
3972
3973 if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
3974 __u32 dsack = dsack_high;
3975 if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
3976 dsack_high = TCP_SKB_CB(skb)->end_seq;
3977 tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
3978 }
3979
3980 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
3981 SOCK_DEBUG(sk, "ofo packet was already received\n");
3982 __skb_unlink(skb, &tp->out_of_order_queue);
3983 __kfree_skb(skb);
3984 continue;
3985 }
3986 SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
3987 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
3988 TCP_SKB_CB(skb)->end_seq);
3989
3990 __skb_unlink(skb, &tp->out_of_order_queue);
3991 __skb_queue_tail(&sk->sk_receive_queue, skb);
3992 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
3993 if (tcp_hdr(skb)->fin)
3994 tcp_fin(sk);
3995 }
3996}
3997
3998static bool tcp_prune_ofo_queue(struct sock *sk);
3999static int tcp_prune_queue(struct sock *sk);
4000
4001static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
4002 unsigned int size)
4003{
4004 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
4005 !sk_rmem_schedule(sk, skb, size)) {
4006
4007 if (tcp_prune_queue(sk) < 0)
4008 return -1;
4009
4010 if (!sk_rmem_schedule(sk, skb, size)) {
4011 if (!tcp_prune_ofo_queue(sk))
4012 return -1;
4013
4014 if (!sk_rmem_schedule(sk, skb, size))
4015 return -1;
4016 }
4017 }
4018 return 0;
4019}
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034static bool tcp_try_coalesce(struct sock *sk,
4035 struct sk_buff *to,
4036 struct sk_buff *from,
4037 bool *fragstolen)
4038{
4039 int delta;
4040
4041 *fragstolen = false;
4042
4043 if (tcp_hdr(from)->fin)
4044 return false;
4045
4046
4047 if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
4048 return false;
4049
4050 if (!skb_try_coalesce(to, from, fragstolen, &delta))
4051 return false;
4052
4053 atomic_add(delta, &sk->sk_rmem_alloc);
4054 sk_mem_charge(sk, delta);
4055 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
4056 TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
4057 TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
4058 return true;
4059}
4060
4061static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4062{
4063 struct tcp_sock *tp = tcp_sk(sk);
4064 struct sk_buff *skb1;
4065 u32 seq, end_seq;
4066
4067 TCP_ECN_check_ce(tp, skb);
4068
4069 if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
4070 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP);
4071 __kfree_skb(skb);
4072 return;
4073 }
4074
4075
4076 tp->pred_flags = 0;
4077 inet_csk_schedule_ack(sk);
4078
4079 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
4080 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
4081 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4082
4083 skb1 = skb_peek_tail(&tp->out_of_order_queue);
4084 if (!skb1) {
4085
4086 if (tcp_is_sack(tp)) {
4087 tp->rx_opt.num_sacks = 1;
4088 tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
4089 tp->selective_acks[0].end_seq =
4090 TCP_SKB_CB(skb)->end_seq;
4091 }
4092 __skb_queue_head(&tp->out_of_order_queue, skb);
4093 goto end;
4094 }
4095
4096 seq = TCP_SKB_CB(skb)->seq;
4097 end_seq = TCP_SKB_CB(skb)->end_seq;
4098
4099 if (seq == TCP_SKB_CB(skb1)->end_seq) {
4100 bool fragstolen;
4101
4102 if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
4103 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4104 } else {
4105 kfree_skb_partial(skb, fragstolen);
4106 skb = NULL;
4107 }
4108
4109 if (!tp->rx_opt.num_sacks ||
4110 tp->selective_acks[0].end_seq != seq)
4111 goto add_sack;
4112
4113
4114 tp->selective_acks[0].end_seq = end_seq;
4115 goto end;
4116 }
4117
4118
4119 while (1) {
4120 if (!after(TCP_SKB_CB(skb1)->seq, seq))
4121 break;
4122 if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
4123 skb1 = NULL;
4124 break;
4125 }
4126 skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
4127 }
4128
4129
4130 if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4131 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4132
4133 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
4134 __kfree_skb(skb);
4135 skb = NULL;
4136 tcp_dsack_set(sk, seq, end_seq);
4137 goto add_sack;
4138 }
4139 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4140
4141 tcp_dsack_set(sk, seq,
4142 TCP_SKB_CB(skb1)->end_seq);
4143 } else {
4144 if (skb_queue_is_first(&tp->out_of_order_queue,
4145 skb1))
4146 skb1 = NULL;
4147 else
4148 skb1 = skb_queue_prev(
4149 &tp->out_of_order_queue,
4150 skb1);
4151 }
4152 }
4153 if (!skb1)
4154 __skb_queue_head(&tp->out_of_order_queue, skb);
4155 else
4156 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4157
4158
4159 while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
4160 skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
4161
4162 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4163 break;
4164 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4165 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4166 end_seq);
4167 break;
4168 }
4169 __skb_unlink(skb1, &tp->out_of_order_queue);
4170 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4171 TCP_SKB_CB(skb1)->end_seq);
4172 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
4173 __kfree_skb(skb1);
4174 }
4175
4176add_sack:
4177 if (tcp_is_sack(tp))
4178 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4179end:
4180 if (skb)
4181 skb_set_owner_r(skb, sk);
4182}
4183
4184static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
4185 bool *fragstolen)
4186{
4187 int eaten;
4188 struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
4189
4190 __skb_pull(skb, hdrlen);
4191 eaten = (tail &&
4192 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
4193 tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4194 if (!eaten) {
4195 __skb_queue_tail(&sk->sk_receive_queue, skb);
4196 skb_set_owner_r(skb, sk);
4197 }
4198 return eaten;
4199}
4200
4201int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
4202{
4203 struct sk_buff *skb = NULL;
4204 struct tcphdr *th;
4205 bool fragstolen;
4206
4207 if (size == 0)
4208 return 0;
4209
4210 skb = alloc_skb(size + sizeof(*th), sk->sk_allocation);
4211 if (!skb)
4212 goto err;
4213
4214 if (tcp_try_rmem_schedule(sk, skb, size + sizeof(*th)))
4215 goto err_free;
4216
4217 th = (struct tcphdr *)skb_put(skb, sizeof(*th));
4218 skb_reset_transport_header(skb);
4219 memset(th, 0, sizeof(*th));
4220
4221 if (memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size))
4222 goto err_free;
4223
4224 TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
4225 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
4226 TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
4227
4228 if (tcp_queue_rcv(sk, skb, sizeof(*th), &fragstolen)) {
4229 WARN_ON_ONCE(fragstolen);
4230 __kfree_skb(skb);
4231 }
4232 return size;
4233
4234err_free:
4235 kfree_skb(skb);
4236err:
4237 return -ENOMEM;
4238}
4239
4240static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4241{
4242 const struct tcphdr *th = tcp_hdr(skb);
4243 struct tcp_sock *tp = tcp_sk(sk);
4244 int eaten = -1;
4245 bool fragstolen = false;
4246
4247 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
4248 goto drop;
4249
4250 skb_dst_drop(skb);
4251 __skb_pull(skb, th->doff * 4);
4252
4253 TCP_ECN_accept_cwr(tp, skb);
4254
4255 tp->rx_opt.dsack = 0;
4256
4257
4258
4259
4260
4261 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
4262 if (tcp_receive_window(tp) == 0)
4263 goto out_of_window;
4264
4265
4266 if (tp->ucopy.task == current &&
4267 tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
4268 sock_owned_by_user(sk) && !tp->urg_data) {
4269 int chunk = min_t(unsigned int, skb->len,
4270 tp->ucopy.len);
4271
4272 __set_current_state(TASK_RUNNING);
4273
4274 local_bh_enable();
4275 if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) {
4276 tp->ucopy.len -= chunk;
4277 tp->copied_seq += chunk;
4278 eaten = (chunk == skb->len);
4279 tcp_rcv_space_adjust(sk);
4280 }
4281 local_bh_disable();
4282 }
4283
4284 if (eaten <= 0) {
4285queue_and_out:
4286 if (eaten < 0 &&
4287 tcp_try_rmem_schedule(sk, skb, skb->truesize))
4288 goto drop;
4289
4290 eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
4291 }
4292 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4293 if (skb->len)
4294 tcp_event_data_recv(sk, skb);
4295 if (th->fin)
4296 tcp_fin(sk);
4297
4298 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4299 tcp_ofo_queue(sk);
4300
4301
4302
4303
4304 if (skb_queue_empty(&tp->out_of_order_queue))
4305 inet_csk(sk)->icsk_ack.pingpong = 0;
4306 }
4307
4308 if (tp->rx_opt.num_sacks)
4309 tcp_sack_remove(tp);
4310
4311 tcp_fast_path_check(sk);
4312
4313 if (eaten > 0)
4314 kfree_skb_partial(skb, fragstolen);
4315 if (!sock_flag(sk, SOCK_DEAD))
4316 sk->sk_data_ready(sk, 0);
4317 return;
4318 }
4319
4320 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4321
4322 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4323 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4324
4325out_of_window:
4326 tcp_enter_quickack_mode(sk);
4327 inet_csk_schedule_ack(sk);
4328drop:
4329 __kfree_skb(skb);
4330 return;
4331 }
4332
4333
4334 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
4335 goto out_of_window;
4336
4337 tcp_enter_quickack_mode(sk);
4338
4339 if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4340
4341 SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
4342 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4343 TCP_SKB_CB(skb)->end_seq);
4344
4345 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
4346
4347
4348
4349
4350 if (!tcp_receive_window(tp))
4351 goto out_of_window;
4352 goto queue_and_out;
4353 }
4354
4355 tcp_data_queue_ofo(sk, skb);
4356}
4357
4358static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
4359 struct sk_buff_head *list)
4360{
4361 struct sk_buff *next = NULL;
4362
4363 if (!skb_queue_is_last(list, skb))
4364 next = skb_queue_next(list, skb);
4365
4366 __skb_unlink(skb, list);
4367 __kfree_skb(skb);
4368 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
4369
4370 return next;
4371}
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381static void
4382tcp_collapse(struct sock *sk, struct sk_buff_head *list,
4383 struct sk_buff *head, struct sk_buff *tail,
4384 u32 start, u32 end)
4385{
4386 struct sk_buff *skb, *n;
4387 bool end_of_skbs;
4388
4389
4390
4391 skb = head;
4392restart:
4393 end_of_skbs = true;
4394 skb_queue_walk_from_safe(list, skb, n) {
4395 if (skb == tail)
4396 break;
4397
4398 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4399 skb = tcp_collapse_one(sk, skb, list);
4400 if (!skb)
4401 break;
4402 goto restart;
4403 }
4404
4405
4406
4407
4408
4409
4410 if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin &&
4411 (tcp_win_from_space(skb->truesize) > skb->len ||
4412 before(TCP_SKB_CB(skb)->seq, start))) {
4413 end_of_skbs = false;
4414 break;
4415 }
4416
4417 if (!skb_queue_is_last(list, skb)) {
4418 struct sk_buff *next = skb_queue_next(list, skb);
4419 if (next != tail &&
4420 TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) {
4421 end_of_skbs = false;
4422 break;
4423 }
4424 }
4425
4426
4427 start = TCP_SKB_CB(skb)->end_seq;
4428 }
4429 if (end_of_skbs || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin)
4430 return;
4431
4432 while (before(start, end)) {
4433 struct sk_buff *nskb;
4434 unsigned int header = skb_headroom(skb);
4435 int copy = SKB_MAX_ORDER(header, 0);
4436
4437
4438 if (copy < 0)
4439 return;
4440 if (end - start < copy)
4441 copy = end - start;
4442 nskb = alloc_skb(copy + header, GFP_ATOMIC);
4443 if (!nskb)
4444 return;
4445
4446 skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
4447 skb_set_network_header(nskb, (skb_network_header(skb) -
4448 skb->head));
4449 skb_set_transport_header(nskb, (skb_transport_header(skb) -
4450 skb->head));
4451 skb_reserve(nskb, header);
4452 memcpy(nskb->head, skb->head, header);
4453 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
4454 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
4455 __skb_queue_before(list, skb, nskb);
4456 skb_set_owner_r(nskb, sk);
4457
4458
4459 while (copy > 0) {
4460 int offset = start - TCP_SKB_CB(skb)->seq;
4461 int size = TCP_SKB_CB(skb)->end_seq - start;
4462
4463 BUG_ON(offset < 0);
4464 if (size > 0) {
4465 size = min(copy, size);
4466 if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
4467 BUG();
4468 TCP_SKB_CB(nskb)->end_seq += size;
4469 copy -= size;
4470 start += size;
4471 }
4472 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4473 skb = tcp_collapse_one(sk, skb, list);
4474 if (!skb ||
4475 skb == tail ||
4476 tcp_hdr(skb)->syn ||
4477 tcp_hdr(skb)->fin)
4478 return;
4479 }
4480 }
4481 }
4482}
4483
4484
4485
4486
4487static void tcp_collapse_ofo_queue(struct sock *sk)
4488{
4489 struct tcp_sock *tp = tcp_sk(sk);
4490 struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
4491 struct sk_buff *head;
4492 u32 start, end;
4493
4494 if (skb == NULL)
4495 return;
4496
4497 start = TCP_SKB_CB(skb)->seq;
4498 end = TCP_SKB_CB(skb)->end_seq;
4499 head = skb;
4500
4501 for (;;) {
4502 struct sk_buff *next = NULL;
4503
4504 if (!skb_queue_is_last(&tp->out_of_order_queue, skb))
4505 next = skb_queue_next(&tp->out_of_order_queue, skb);
4506 skb = next;
4507
4508
4509
4510 if (!skb ||
4511 after(TCP_SKB_CB(skb)->seq, end) ||
4512 before(TCP_SKB_CB(skb)->end_seq, start)) {
4513 tcp_collapse(sk, &tp->out_of_order_queue,
4514 head, skb, start, end);
4515 head = skb;
4516 if (!skb)
4517 break;
4518
4519 start = TCP_SKB_CB(skb)->seq;
4520 end = TCP_SKB_CB(skb)->end_seq;
4521 } else {
4522 if (before(TCP_SKB_CB(skb)->seq, start))
4523 start = TCP_SKB_CB(skb)->seq;
4524 if (after(TCP_SKB_CB(skb)->end_seq, end))
4525 end = TCP_SKB_CB(skb)->end_seq;
4526 }
4527 }
4528}
4529
4530
4531
4532
4533
4534static bool tcp_prune_ofo_queue(struct sock *sk)
4535{
4536 struct tcp_sock *tp = tcp_sk(sk);
4537 bool res = false;
4538
4539 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4540 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED);
4541 __skb_queue_purge(&tp->out_of_order_queue);
4542
4543
4544
4545
4546
4547
4548 if (tp->rx_opt.sack_ok)
4549 tcp_sack_reset(&tp->rx_opt);
4550 sk_mem_reclaim(sk);
4551 res = true;
4552 }
4553 return res;
4554}
4555
4556
4557
4558
4559
4560
4561
4562
4563static int tcp_prune_queue(struct sock *sk)
4564{
4565 struct tcp_sock *tp = tcp_sk(sk);
4566
4567 SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
4568
4569 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PRUNECALLED);
4570
4571 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
4572 tcp_clamp_window(sk);
4573 else if (sk_under_memory_pressure(sk))
4574 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
4575
4576 tcp_collapse_ofo_queue(sk);
4577 if (!skb_queue_empty(&sk->sk_receive_queue))
4578 tcp_collapse(sk, &sk->sk_receive_queue,
4579 skb_peek(&sk->sk_receive_queue),
4580 NULL,
4581 tp->copied_seq, tp->rcv_nxt);
4582 sk_mem_reclaim(sk);
4583
4584 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4585 return 0;
4586
4587
4588
4589
4590 tcp_prune_ofo_queue(sk);
4591
4592 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4593 return 0;
4594
4595
4596
4597
4598
4599 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_RCVPRUNED);
4600
4601
4602 tp->pred_flags = 0;
4603 return -1;
4604}
4605
4606
4607
4608
4609
4610void tcp_cwnd_application_limited(struct sock *sk)
4611{
4612 struct tcp_sock *tp = tcp_sk(sk);
4613
4614 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
4615 sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
4616
4617 u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
4618 u32 win_used = max(tp->snd_cwnd_used, init_win);
4619 if (win_used < tp->snd_cwnd) {
4620 tp->snd_ssthresh = tcp_current_ssthresh(sk);
4621 tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
4622 }
4623 tp->snd_cwnd_used = 0;
4624 }
4625 tp->snd_cwnd_stamp = tcp_time_stamp;
4626}
4627
4628static bool tcp_should_expand_sndbuf(const struct sock *sk)
4629{
4630 const struct tcp_sock *tp = tcp_sk(sk);
4631
4632
4633
4634
4635 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
4636 return false;
4637
4638
4639 if (sk_under_memory_pressure(sk))
4640 return false;
4641
4642
4643 if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
4644 return false;
4645
4646
4647 if (tp->packets_out >= tp->snd_cwnd)
4648 return false;
4649
4650 return true;
4651}
4652
4653
4654
4655
4656
4657
4658
4659static void tcp_new_space(struct sock *sk)
4660{
4661 struct tcp_sock *tp = tcp_sk(sk);
4662
4663 if (tcp_should_expand_sndbuf(sk)) {
4664 int sndmem = SKB_TRUESIZE(max_t(u32,
4665 tp->rx_opt.mss_clamp,
4666 tp->mss_cache) +
4667 MAX_TCP_HEADER);
4668 int demanded = max_t(unsigned int, tp->snd_cwnd,
4669 tp->reordering + 1);
4670 sndmem *= 2 * demanded;
4671 if (sndmem > sk->sk_sndbuf)
4672 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
4673 tp->snd_cwnd_stamp = tcp_time_stamp;
4674 }
4675
4676 sk->sk_write_space(sk);
4677}
4678
4679static void tcp_check_space(struct sock *sk)
4680{
4681 if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
4682 sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
4683 if (sk->sk_socket &&
4684 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
4685 tcp_new_space(sk);
4686 }
4687}
4688
4689static inline void tcp_data_snd_check(struct sock *sk)
4690{
4691 tcp_push_pending_frames(sk);
4692 tcp_check_space(sk);
4693}
4694
4695
4696
4697
4698static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
4699{
4700 struct tcp_sock *tp = tcp_sk(sk);
4701
4702
4703 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
4704
4705
4706
4707 __tcp_select_window(sk) >= tp->rcv_wnd) ||
4708
4709 tcp_in_quickack_mode(sk) ||
4710
4711 (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
4712
4713 tcp_send_ack(sk);
4714 } else {
4715
4716 tcp_send_delayed_ack(sk);
4717 }
4718}
4719
4720static inline void tcp_ack_snd_check(struct sock *sk)
4721{
4722 if (!inet_csk_ack_scheduled(sk)) {
4723
4724 return;
4725 }
4726 __tcp_ack_snd_check(sk, 1);
4727}
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
4740{
4741 struct tcp_sock *tp = tcp_sk(sk);
4742 u32 ptr = ntohs(th->urg_ptr);
4743
4744 if (ptr && !sysctl_tcp_stdurg)
4745 ptr--;
4746 ptr += ntohl(th->seq);
4747
4748
4749 if (after(tp->copied_seq, ptr))
4750 return;
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762 if (before(ptr, tp->rcv_nxt))
4763 return;
4764
4765
4766 if (tp->urg_data && !after(ptr, tp->urg_seq))
4767 return;
4768
4769
4770 sk_send_sigurg(sk);
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
4788 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
4789 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
4790 tp->copied_seq++;
4791 if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
4792 __skb_unlink(skb, &sk->sk_receive_queue);
4793 __kfree_skb(skb);
4794 }
4795 }
4796
4797 tp->urg_data = TCP_URG_NOTYET;
4798 tp->urg_seq = ptr;
4799
4800
4801 tp->pred_flags = 0;
4802}
4803
4804
4805static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
4806{
4807 struct tcp_sock *tp = tcp_sk(sk);
4808
4809
4810 if (th->urg)
4811 tcp_check_urg(sk, th);
4812
4813
4814 if (tp->urg_data == TCP_URG_NOTYET) {
4815 u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
4816 th->syn;
4817
4818
4819 if (ptr < skb->len) {
4820 u8 tmp;
4821 if (skb_copy_bits(skb, ptr, &tmp, 1))
4822 BUG();
4823 tp->urg_data = TCP_URG_VALID | tmp;
4824 if (!sock_flag(sk, SOCK_DEAD))
4825 sk->sk_data_ready(sk, 0);
4826 }
4827 }
4828}
4829
4830static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
4831{
4832 struct tcp_sock *tp = tcp_sk(sk);
4833 int chunk = skb->len - hlen;
4834 int err;
4835
4836 local_bh_enable();
4837 if (skb_csum_unnecessary(skb))
4838 err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk);
4839 else
4840 err = skb_copy_and_csum_datagram_iovec(skb, hlen,
4841 tp->ucopy.iov);
4842
4843 if (!err) {
4844 tp->ucopy.len -= chunk;
4845 tp->copied_seq += chunk;
4846 tcp_rcv_space_adjust(sk);
4847 }
4848
4849 local_bh_disable();
4850 return err;
4851}
4852
4853static __sum16 __tcp_checksum_complete_user(struct sock *sk,
4854 struct sk_buff *skb)
4855{
4856 __sum16 result;
4857
4858 if (sock_owned_by_user(sk)) {
4859 local_bh_enable();
4860 result = __tcp_checksum_complete(skb);
4861 local_bh_disable();
4862 } else {
4863 result = __tcp_checksum_complete(skb);
4864 }
4865 return result;
4866}
4867
4868static inline bool tcp_checksum_complete_user(struct sock *sk,
4869 struct sk_buff *skb)
4870{
4871 return !skb_csum_unnecessary(skb) &&
4872 __tcp_checksum_complete_user(sk, skb);
4873}
4874
4875#ifdef CONFIG_NET_DMA
4876static bool tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
4877 int hlen)
4878{
4879 struct tcp_sock *tp = tcp_sk(sk);
4880 int chunk = skb->len - hlen;
4881 int dma_cookie;
4882 bool copied_early = false;
4883
4884 if (tp->ucopy.wakeup)
4885 return false;
4886
4887 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
4888 tp->ucopy.dma_chan = net_dma_find_channel();
4889
4890 if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
4891
4892 dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
4893 skb, hlen,
4894 tp->ucopy.iov, chunk,
4895 tp->ucopy.pinned_list);
4896
4897 if (dma_cookie < 0)
4898 goto out;
4899
4900 tp->ucopy.dma_cookie = dma_cookie;
4901 copied_early = true;
4902
4903 tp->ucopy.len -= chunk;
4904 tp->copied_seq += chunk;
4905 tcp_rcv_space_adjust(sk);
4906
4907 if ((tp->ucopy.len == 0) ||
4908 (tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) ||
4909 (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) {
4910 tp->ucopy.wakeup = 1;
4911 sk->sk_data_ready(sk, 0);
4912 }
4913 } else if (chunk > 0) {
4914 tp->ucopy.wakeup = 1;
4915 sk->sk_data_ready(sk, 0);
4916 }
4917out:
4918 return copied_early;
4919}
4920#endif
4921
4922
4923
4924
4925static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
4926 const struct tcphdr *th, int syn_inerr)
4927{
4928 struct tcp_sock *tp = tcp_sk(sk);
4929
4930
4931 if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
4932 tcp_paws_discard(sk, skb)) {
4933 if (!th->rst) {
4934 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
4935 tcp_send_dupack(sk, skb);
4936 goto discard;
4937 }
4938
4939 }
4940
4941
4942 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
4943
4944
4945
4946
4947
4948
4949 if (!th->rst) {
4950 if (th->syn)
4951 goto syn_challenge;
4952 tcp_send_dupack(sk, skb);
4953 }
4954 goto discard;
4955 }
4956
4957
4958 if (th->rst) {
4959
4960
4961
4962
4963
4964
4965 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt)
4966 tcp_reset(sk);
4967 else
4968 tcp_send_challenge_ack(sk);
4969 goto discard;
4970 }
4971
4972
4973
4974
4975
4976
4977 if (th->syn) {
4978syn_challenge:
4979 if (syn_inerr)
4980 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
4981 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
4982 tcp_send_challenge_ack(sk);
4983 goto discard;
4984 }
4985
4986 return true;
4987
4988discard:
4989 __kfree_skb(skb);
4990 return false;
4991}
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5017 const struct tcphdr *th, unsigned int len)
5018{
5019 struct tcp_sock *tp = tcp_sk(sk);
5020
5021 if (unlikely(sk->sk_rx_dst == NULL))
5022 inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038 tp->rx_opt.saw_tstamp = 0;
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
5050 TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
5051 !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
5052 int tcp_header_len = tp->tcp_header_len;
5053
5054
5055
5056
5057
5058
5059
5060 if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
5061
5062 if (!tcp_parse_aligned_timestamp(tp, th))
5063 goto slow_path;
5064
5065
5066 if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
5067 goto slow_path;
5068
5069
5070
5071
5072
5073
5074 }
5075
5076 if (len <= tcp_header_len) {
5077
5078 if (len == tcp_header_len) {
5079
5080
5081
5082
5083 if (tcp_header_len ==
5084 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5085 tp->rcv_nxt == tp->rcv_wup)
5086 tcp_store_ts_recent(tp);
5087
5088
5089
5090
5091 tcp_ack(sk, skb, 0);
5092 __kfree_skb(skb);
5093 tcp_data_snd_check(sk);
5094 return 0;
5095 } else {
5096 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5097 goto discard;
5098 }
5099 } else {
5100 int eaten = 0;
5101 int copied_early = 0;
5102 bool fragstolen = false;
5103
5104 if (tp->copied_seq == tp->rcv_nxt &&
5105 len - tcp_header_len <= tp->ucopy.len) {
5106#ifdef CONFIG_NET_DMA
5107 if (tp->ucopy.task == current &&
5108 sock_owned_by_user(sk) &&
5109 tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
5110 copied_early = 1;
5111 eaten = 1;
5112 }
5113#endif
5114 if (tp->ucopy.task == current &&
5115 sock_owned_by_user(sk) && !copied_early) {
5116 __set_current_state(TASK_RUNNING);
5117
5118 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len))
5119 eaten = 1;
5120 }
5121 if (eaten) {
5122
5123
5124
5125
5126 if (tcp_header_len ==
5127 (sizeof(struct tcphdr) +
5128 TCPOLEN_TSTAMP_ALIGNED) &&
5129 tp->rcv_nxt == tp->rcv_wup)
5130 tcp_store_ts_recent(tp);
5131
5132 tcp_rcv_rtt_measure_ts(sk, skb);
5133
5134 __skb_pull(skb, tcp_header_len);
5135 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
5136 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
5137 }
5138 if (copied_early)
5139 tcp_cleanup_rbuf(sk, skb->len);
5140 }
5141 if (!eaten) {
5142 if (tcp_checksum_complete_user(sk, skb))
5143 goto csum_error;
5144
5145 if ((int)skb->truesize > sk->sk_forward_alloc)
5146 goto step5;
5147
5148
5149
5150
5151
5152 if (tcp_header_len ==
5153 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5154 tp->rcv_nxt == tp->rcv_wup)
5155 tcp_store_ts_recent(tp);
5156
5157 tcp_rcv_rtt_measure_ts(sk, skb);
5158
5159 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
5160
5161
5162 eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
5163 &fragstolen);
5164 }
5165
5166 tcp_event_data_recv(sk, skb);
5167
5168 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
5169
5170 tcp_ack(sk, skb, FLAG_DATA);
5171 tcp_data_snd_check(sk);
5172 if (!inet_csk_ack_scheduled(sk))
5173 goto no_ack;
5174 }
5175
5176 if (!copied_early || tp->rcv_nxt != tp->rcv_wup)
5177 __tcp_ack_snd_check(sk, 0);
5178no_ack:
5179#ifdef CONFIG_NET_DMA
5180 if (copied_early)
5181 __skb_queue_tail(&sk->sk_async_wait_queue, skb);
5182 else
5183#endif
5184 if (eaten)
5185 kfree_skb_partial(skb, fragstolen);
5186 sk->sk_data_ready(sk, 0);
5187 return 0;
5188 }
5189 }
5190
5191slow_path:
5192 if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
5193 goto csum_error;
5194
5195 if (!th->ack && !th->rst)
5196 goto discard;
5197
5198
5199
5200
5201
5202 if (!tcp_validate_incoming(sk, skb, th, 1))
5203 return 0;
5204
5205step5:
5206 if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
5207 goto discard;
5208
5209 tcp_rcv_rtt_measure_ts(sk, skb);
5210
5211
5212 tcp_urg(sk, skb, th);
5213
5214
5215 tcp_data_queue(sk, skb);
5216
5217 tcp_data_snd_check(sk);
5218 tcp_ack_snd_check(sk);
5219 return 0;
5220
5221csum_error:
5222 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
5223 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5224
5225discard:
5226 __kfree_skb(skb);
5227 return 0;
5228}
5229EXPORT_SYMBOL(tcp_rcv_established);
5230
5231void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
5232{
5233 struct tcp_sock *tp = tcp_sk(sk);
5234 struct inet_connection_sock *icsk = inet_csk(sk);
5235
5236 tcp_set_state(sk, TCP_ESTABLISHED);
5237
5238 if (skb != NULL) {
5239 icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
5240 security_inet_conn_established(sk, skb);
5241 }
5242
5243
5244 icsk->icsk_af_ops->rebuild_header(sk);
5245
5246 tcp_init_metrics(sk);
5247
5248 tcp_init_congestion_control(sk);
5249
5250
5251
5252
5253 tp->lsndtime = tcp_time_stamp;
5254
5255 tcp_init_buffer_space(sk);
5256
5257 if (sock_flag(sk, SOCK_KEEPOPEN))
5258 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
5259
5260 if (!tp->rx_opt.snd_wscale)
5261 __tcp_fast_path_on(tp, tp->snd_wnd);
5262 else
5263 tp->pred_flags = 0;
5264
5265 if (!sock_flag(sk, SOCK_DEAD)) {
5266 sk->sk_state_change(sk);
5267 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
5268 }
5269}
5270
5271static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5272 struct tcp_fastopen_cookie *cookie)
5273{
5274 struct tcp_sock *tp = tcp_sk(sk);
5275 struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL;
5276 u16 mss = tp->rx_opt.mss_clamp;
5277 bool syn_drop;
5278
5279 if (mss == tp->rx_opt.user_mss) {
5280 struct tcp_options_received opt;
5281
5282
5283 tcp_clear_options(&opt);
5284 opt.user_mss = opt.mss_clamp = 0;
5285 tcp_parse_options(synack, &opt, 0, NULL);
5286 mss = opt.mss_clamp;
5287 }
5288
5289 if (!tp->syn_fastopen)
5290 cookie->len = -1;
5291
5292
5293
5294
5295
5296 syn_drop = (cookie->len <= 0 && data && tp->total_retrans);
5297
5298 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop);
5299
5300 if (data) {
5301 tcp_for_write_queue_from(data, sk) {
5302 if (data == tcp_send_head(sk) ||
5303 __tcp_retransmit_skb(sk, data))
5304 break;
5305 }
5306 tcp_rearm_rto(sk);
5307 return true;
5308 }
5309 tp->syn_data_acked = tp->syn_data;
5310 return false;
5311}
5312
5313static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5314 const struct tcphdr *th, unsigned int len)
5315{
5316 struct inet_connection_sock *icsk = inet_csk(sk);
5317 struct tcp_sock *tp = tcp_sk(sk);
5318 struct tcp_fastopen_cookie foc = { .len = -1 };
5319 int saved_clamp = tp->rx_opt.mss_clamp;
5320
5321 tcp_parse_options(skb, &tp->rx_opt, 0, &foc);
5322 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
5323 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
5324
5325 if (th->ack) {
5326
5327
5328
5329
5330
5331
5332
5333
5334 if (!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_una) ||
5335 after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt))
5336 goto reset_and_undo;
5337
5338 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
5339 !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
5340 tcp_time_stamp)) {
5341 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED);
5342 goto reset_and_undo;
5343 }
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353 if (th->rst) {
5354 tcp_reset(sk);
5355 goto discard;
5356 }
5357
5358
5359
5360
5361
5362
5363
5364
5365 if (!th->syn)
5366 goto discard_and_undo;
5367
5368
5369
5370
5371
5372
5373
5374
5375 TCP_ECN_rcv_synack(tp, th);
5376
5377 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5378 tcp_ack(sk, skb, FLAG_SLOWPATH);
5379
5380
5381
5382
5383 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5384 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5385
5386
5387
5388
5389 tp->snd_wnd = ntohs(th->window);
5390
5391 if (!tp->rx_opt.wscale_ok) {
5392 tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
5393 tp->window_clamp = min(tp->window_clamp, 65535U);
5394 }
5395
5396 if (tp->rx_opt.saw_tstamp) {
5397 tp->rx_opt.tstamp_ok = 1;
5398 tp->tcp_header_len =
5399 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5400 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5401 tcp_store_ts_recent(tp);
5402 } else {
5403 tp->tcp_header_len = sizeof(struct tcphdr);
5404 }
5405
5406 if (tcp_is_sack(tp) && sysctl_tcp_fack)
5407 tcp_enable_fack(tp);
5408
5409 tcp_mtup_init(sk);
5410 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5411 tcp_initialize_rcv_mss(sk);
5412
5413
5414
5415
5416 tp->copied_seq = tp->rcv_nxt;
5417
5418 smp_mb();
5419
5420 tcp_finish_connect(sk, skb);
5421
5422 if ((tp->syn_fastopen || tp->syn_data) &&
5423 tcp_rcv_fastopen_synack(sk, skb, &foc))
5424 return -1;
5425
5426 if (sk->sk_write_pending ||
5427 icsk->icsk_accept_queue.rskq_defer_accept ||
5428 icsk->icsk_ack.pingpong) {
5429
5430
5431
5432
5433
5434
5435
5436 inet_csk_schedule_ack(sk);
5437 icsk->icsk_ack.lrcvtime = tcp_time_stamp;
5438 tcp_enter_quickack_mode(sk);
5439 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
5440 TCP_DELACK_MAX, TCP_RTO_MAX);
5441
5442discard:
5443 __kfree_skb(skb);
5444 return 0;
5445 } else {
5446 tcp_send_ack(sk);
5447 }
5448 return -1;
5449 }
5450
5451
5452
5453 if (th->rst) {
5454
5455
5456
5457
5458
5459
5460 goto discard_and_undo;
5461 }
5462
5463
5464 if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
5465 tcp_paws_reject(&tp->rx_opt, 0))
5466 goto discard_and_undo;
5467
5468 if (th->syn) {
5469
5470
5471
5472
5473 tcp_set_state(sk, TCP_SYN_RECV);
5474
5475 if (tp->rx_opt.saw_tstamp) {
5476 tp->rx_opt.tstamp_ok = 1;
5477 tcp_store_ts_recent(tp);
5478 tp->tcp_header_len =
5479 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5480 } else {
5481 tp->tcp_header_len = sizeof(struct tcphdr);
5482 }
5483
5484 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5485 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5486
5487
5488
5489
5490 tp->snd_wnd = ntohs(th->window);
5491 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
5492 tp->max_window = tp->snd_wnd;
5493
5494 TCP_ECN_rcv_syn(tp, th);
5495
5496 tcp_mtup_init(sk);
5497 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5498 tcp_initialize_rcv_mss(sk);
5499
5500 tcp_send_synack(sk);
5501#if 0
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513 return -1;
5514#else
5515 goto discard;
5516#endif
5517 }
5518
5519
5520
5521
5522discard_and_undo:
5523 tcp_clear_options(&tp->rx_opt);
5524 tp->rx_opt.mss_clamp = saved_clamp;
5525 goto discard;
5526
5527reset_and_undo:
5528 tcp_clear_options(&tp->rx_opt);
5529 tp->rx_opt.mss_clamp = saved_clamp;
5530 return 1;
5531}
5532
5533
5534
5535
5536
5537
5538
5539
5540int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5541 const struct tcphdr *th, unsigned int len)
5542{
5543 struct tcp_sock *tp = tcp_sk(sk);
5544 struct inet_connection_sock *icsk = inet_csk(sk);
5545 struct request_sock *req;
5546 int queued = 0;
5547 bool acceptable;
5548
5549 tp->rx_opt.saw_tstamp = 0;
5550
5551 switch (sk->sk_state) {
5552 case TCP_CLOSE:
5553 goto discard;
5554
5555 case TCP_LISTEN:
5556 if (th->ack)
5557 return 1;
5558
5559 if (th->rst)
5560 goto discard;
5561
5562 if (th->syn) {
5563 if (th->fin)
5564 goto discard;
5565 if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
5566 return 1;
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582
5583
5584
5585 kfree_skb(skb);
5586 return 0;
5587 }
5588 goto discard;
5589
5590 case TCP_SYN_SENT:
5591 queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
5592 if (queued >= 0)
5593 return queued;
5594
5595
5596 tcp_urg(sk, skb, th);
5597 __kfree_skb(skb);
5598 tcp_data_snd_check(sk);
5599 return 0;
5600 }
5601
5602 req = tp->fastopen_rsk;
5603 if (req != NULL) {
5604 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
5605 sk->sk_state != TCP_FIN_WAIT1);
5606
5607 if (tcp_check_req(sk, skb, req, NULL, true) == NULL)
5608 goto discard;
5609 }
5610
5611 if (!th->ack && !th->rst)
5612 goto discard;
5613
5614 if (!tcp_validate_incoming(sk, skb, th, 0))
5615 return 0;
5616
5617
5618 acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
5619 FLAG_UPDATE_TS_RECENT) > 0;
5620
5621 switch (sk->sk_state) {
5622 case TCP_SYN_RECV:
5623 if (!acceptable)
5624 return 1;
5625
5626
5627
5628
5629 if (req) {
5630 tcp_synack_rtt_meas(sk, req);
5631 tp->total_retrans = req->num_retrans;
5632
5633 reqsk_fastopen_remove(sk, req, false);
5634 } else {
5635
5636 icsk->icsk_af_ops->rebuild_header(sk);
5637 tcp_init_congestion_control(sk);
5638
5639 tcp_mtup_init(sk);
5640 tcp_init_buffer_space(sk);
5641 tp->copied_seq = tp->rcv_nxt;
5642 }
5643 smp_mb();
5644 tcp_set_state(sk, TCP_ESTABLISHED);
5645 sk->sk_state_change(sk);
5646
5647
5648
5649
5650
5651 if (sk->sk_socket)
5652 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
5653
5654 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
5655 tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
5656 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5657
5658 if (tp->rx_opt.tstamp_ok)
5659 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5660
5661 if (req) {
5662
5663
5664
5665
5666
5667
5668
5669
5670 tcp_rearm_rto(sk);
5671 } else
5672 tcp_init_metrics(sk);
5673
5674
5675 tp->lsndtime = tcp_time_stamp;
5676
5677 tcp_initialize_rcv_mss(sk);
5678 tcp_fast_path_on(tp);
5679 break;
5680
5681 case TCP_FIN_WAIT1: {
5682 struct dst_entry *dst;
5683 int tmo;
5684
5685
5686
5687
5688
5689
5690 if (req != NULL) {
5691
5692
5693
5694
5695
5696
5697 if (!acceptable)
5698 return 1;
5699
5700 reqsk_fastopen_remove(sk, req, false);
5701 tcp_rearm_rto(sk);
5702 }
5703 if (tp->snd_una != tp->write_seq)
5704 break;
5705
5706 tcp_set_state(sk, TCP_FIN_WAIT2);
5707 sk->sk_shutdown |= SEND_SHUTDOWN;
5708
5709 dst = __sk_dst_get(sk);
5710 if (dst)
5711 dst_confirm(dst);
5712
5713 if (!sock_flag(sk, SOCK_DEAD)) {
5714
5715 sk->sk_state_change(sk);
5716 break;
5717 }
5718
5719 if (tp->linger2 < 0 ||
5720 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5721 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
5722 tcp_done(sk);
5723 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5724 return 1;
5725 }
5726
5727 tmo = tcp_fin_time(sk);
5728 if (tmo > TCP_TIMEWAIT_LEN) {
5729 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
5730 } else if (th->fin || sock_owned_by_user(sk)) {
5731
5732
5733
5734
5735
5736
5737 inet_csk_reset_keepalive_timer(sk, tmo);
5738 } else {
5739 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
5740 goto discard;
5741 }
5742 break;
5743 }
5744
5745 case TCP_CLOSING:
5746 if (tp->snd_una == tp->write_seq) {
5747 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
5748 goto discard;
5749 }
5750 break;
5751
5752 case TCP_LAST_ACK:
5753 if (tp->snd_una == tp->write_seq) {
5754 tcp_update_metrics(sk);
5755 tcp_done(sk);
5756 goto discard;
5757 }
5758 break;
5759 }
5760
5761
5762 tcp_urg(sk, skb, th);
5763
5764
5765 switch (sk->sk_state) {
5766 case TCP_CLOSE_WAIT:
5767 case TCP_CLOSING:
5768 case TCP_LAST_ACK:
5769 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
5770 break;
5771 case TCP_FIN_WAIT1:
5772 case TCP_FIN_WAIT2:
5773
5774
5775
5776
5777 if (sk->sk_shutdown & RCV_SHUTDOWN) {
5778 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5779 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
5780 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5781 tcp_reset(sk);
5782 return 1;
5783 }
5784 }
5785
5786 case TCP_ESTABLISHED:
5787 tcp_data_queue(sk, skb);
5788 queued = 1;
5789 break;
5790 }
5791
5792
5793 if (sk->sk_state != TCP_CLOSE) {
5794 tcp_data_snd_check(sk);
5795 tcp_ack_snd_check(sk);
5796 }
5797
5798 if (!queued) {
5799discard:
5800 __kfree_skb(skb);
5801 }
5802 return 0;
5803}
5804EXPORT_SYMBOL(tcp_rcv_state_process);
5805