1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64#define pr_fmt(fmt) "TCP: " fmt
65
66#include <linux/mm.h>
67#include <linux/slab.h>
68#include <linux/module.h>
69#include <linux/sysctl.h>
70#include <linux/kernel.h>
71#include <linux/prefetch.h>
72#include <net/dst.h>
73#include <net/tcp.h>
74#include <net/inet_common.h>
75#include <linux/ipsec.h>
76#include <asm/unaligned.h>
77#include <linux/errqueue.h>
78
79int sysctl_tcp_timestamps __read_mostly = 1;
80int sysctl_tcp_window_scaling __read_mostly = 1;
81int sysctl_tcp_sack __read_mostly = 1;
82int sysctl_tcp_fack __read_mostly = 1;
83int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
84EXPORT_SYMBOL(sysctl_tcp_reordering);
85int sysctl_tcp_dsack __read_mostly = 1;
86int sysctl_tcp_app_win __read_mostly = 31;
87int sysctl_tcp_adv_win_scale __read_mostly = 1;
88EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
89
90
91int sysctl_tcp_challenge_ack_limit = 100;
92
93int sysctl_tcp_stdurg __read_mostly;
94int sysctl_tcp_rfc1337 __read_mostly;
95int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
96int sysctl_tcp_frto __read_mostly = 2;
97
98int sysctl_tcp_thin_dupack __read_mostly;
99
100int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
101int sysctl_tcp_early_retrans __read_mostly = 3;
102
103#define FLAG_DATA 0x01
104#define FLAG_WIN_UPDATE 0x02
105#define FLAG_DATA_ACKED 0x04
106#define FLAG_RETRANS_DATA_ACKED 0x08
107#define FLAG_SYN_ACKED 0x10
108#define FLAG_DATA_SACKED 0x20
109#define FLAG_ECE 0x40
110#define FLAG_SLOWPATH 0x100
111#define FLAG_ORIG_SACK_ACKED 0x200
112#define FLAG_SND_UNA_ADVANCED 0x400
113#define FLAG_DSACKING_ACK 0x800
114#define FLAG_SACK_RENEGING 0x2000
115#define FLAG_UPDATE_TS_RECENT 0x4000
116
117#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
118#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
119#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
120#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
121
122#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
123#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
124
125
126
127
128static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
129{
130 struct inet_connection_sock *icsk = inet_csk(sk);
131 const unsigned int lss = icsk->icsk_ack.last_seg_size;
132 unsigned int len;
133
134 icsk->icsk_ack.last_seg_size = 0;
135
136
137
138
139 len = skb_shinfo(skb)->gso_size ? : skb->len;
140 if (len >= icsk->icsk_ack.rcv_mss) {
141 icsk->icsk_ack.rcv_mss = len;
142 } else {
143
144
145
146
147
148 len += skb->data - skb_transport_header(skb);
149 if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
150
151
152
153
154
155 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
156 !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
157
158
159
160
161 len -= tcp_sk(sk)->tcp_header_len;
162 icsk->icsk_ack.last_seg_size = len;
163 if (len == lss) {
164 icsk->icsk_ack.rcv_mss = len;
165 return;
166 }
167 }
168 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
169 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
170 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
171 }
172}
173
174static void tcp_incr_quickack(struct sock *sk)
175{
176 struct inet_connection_sock *icsk = inet_csk(sk);
177 unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
178
179 if (quickacks == 0)
180 quickacks = 2;
181 if (quickacks > icsk->icsk_ack.quick)
182 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
183}
184
185static void tcp_enter_quickack_mode(struct sock *sk)
186{
187 struct inet_connection_sock *icsk = inet_csk(sk);
188 tcp_incr_quickack(sk);
189 icsk->icsk_ack.pingpong = 0;
190 icsk->icsk_ack.ato = TCP_ATO_MIN;
191}
192
193
194
195
196
197static inline bool tcp_in_quickack_mode(const struct sock *sk)
198{
199 const struct inet_connection_sock *icsk = inet_csk(sk);
200
201 return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong;
202}
203
204static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
205{
206 if (tp->ecn_flags & TCP_ECN_OK)
207 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
208}
209
210static void tcp_ecn_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb)
211{
212 if (tcp_hdr(skb)->cwr)
213 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
214}
215
216static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
217{
218 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
219}
220
221static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
222{
223 switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
224 case INET_ECN_NOT_ECT:
225
226
227
228
229 if (tp->ecn_flags & TCP_ECN_SEEN)
230 tcp_enter_quickack_mode((struct sock *)tp);
231 break;
232 case INET_ECN_CE:
233 if (tcp_ca_needs_ecn((struct sock *)tp))
234 tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_IS_CE);
235
236 if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
237
238 tcp_enter_quickack_mode((struct sock *)tp);
239 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
240 }
241 tp->ecn_flags |= TCP_ECN_SEEN;
242 break;
243 default:
244 if (tcp_ca_needs_ecn((struct sock *)tp))
245 tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_NO_CE);
246 tp->ecn_flags |= TCP_ECN_SEEN;
247 break;
248 }
249}
250
251static void tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
252{
253 if (tp->ecn_flags & TCP_ECN_OK)
254 __tcp_ecn_check_ce(tp, skb);
255}
256
257static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
258{
259 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
260 tp->ecn_flags &= ~TCP_ECN_OK;
261}
262
263static void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
264{
265 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
266 tp->ecn_flags &= ~TCP_ECN_OK;
267}
268
269static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
270{
271 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
272 return true;
273 return false;
274}
275
276
277
278
279
280
281static void tcp_sndbuf_expand(struct sock *sk)
282{
283 const struct tcp_sock *tp = tcp_sk(sk);
284 int sndmem, per_mss;
285 u32 nr_segs;
286
287
288
289
290 per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
291 MAX_TCP_HEADER +
292 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
293
294 per_mss = roundup_pow_of_two(per_mss) +
295 SKB_DATA_ALIGN(sizeof(struct sk_buff));
296
297 nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
298 nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
299
300
301
302
303
304 sndmem = 2 * nr_segs * per_mss;
305
306 if (sk->sk_sndbuf < sndmem)
307 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
308}
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
337{
338 struct tcp_sock *tp = tcp_sk(sk);
339
340 int truesize = tcp_win_from_space(skb->truesize) >> 1;
341 int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
342
343 while (tp->rcv_ssthresh <= window) {
344 if (truesize <= skb->len)
345 return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
346
347 truesize >>= 1;
348 window >>= 1;
349 }
350 return 0;
351}
352
353static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
354{
355 struct tcp_sock *tp = tcp_sk(sk);
356
357
358 if (tp->rcv_ssthresh < tp->window_clamp &&
359 (int)tp->rcv_ssthresh < tcp_space(sk) &&
360 !sk_under_memory_pressure(sk)) {
361 int incr;
362
363
364
365
366 if (tcp_win_from_space(skb->truesize) <= skb->len)
367 incr = 2 * tp->advmss;
368 else
369 incr = __tcp_grow_window(sk, skb);
370
371 if (incr) {
372 incr = max_t(int, incr, 2 * skb->len);
373 tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
374 tp->window_clamp);
375 inet_csk(sk)->icsk_ack.quick |= 1;
376 }
377 }
378}
379
380
381static void tcp_fixup_rcvbuf(struct sock *sk)
382{
383 u32 mss = tcp_sk(sk)->advmss;
384 int rcvmem;
385
386 rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) *
387 tcp_default_init_rwnd(mss);
388
389
390
391
392 if (sysctl_tcp_moderate_rcvbuf)
393 rcvmem <<= 2;
394
395 if (sk->sk_rcvbuf < rcvmem)
396 sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
397}
398
399
400
401
402void tcp_init_buffer_space(struct sock *sk)
403{
404 struct tcp_sock *tp = tcp_sk(sk);
405 int maxwin;
406
407 if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
408 tcp_fixup_rcvbuf(sk);
409 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
410 tcp_sndbuf_expand(sk);
411
412 tp->rcvq_space.space = tp->rcv_wnd;
413 tp->rcvq_space.time = tcp_time_stamp;
414 tp->rcvq_space.seq = tp->copied_seq;
415
416 maxwin = tcp_full_space(sk);
417
418 if (tp->window_clamp >= maxwin) {
419 tp->window_clamp = maxwin;
420
421 if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
422 tp->window_clamp = max(maxwin -
423 (maxwin >> sysctl_tcp_app_win),
424 4 * tp->advmss);
425 }
426
427
428 if (sysctl_tcp_app_win &&
429 tp->window_clamp > 2 * tp->advmss &&
430 tp->window_clamp + tp->advmss > maxwin)
431 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
432
433 tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
434 tp->snd_cwnd_stamp = tcp_time_stamp;
435}
436
437
438static void tcp_clamp_window(struct sock *sk)
439{
440 struct tcp_sock *tp = tcp_sk(sk);
441 struct inet_connection_sock *icsk = inet_csk(sk);
442
443 icsk->icsk_ack.quick = 0;
444
445 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
446 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
447 !sk_under_memory_pressure(sk) &&
448 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
449 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
450 sysctl_tcp_rmem[2]);
451 }
452 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
453 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
454}
455
456
457
458
459
460
461
462
463void tcp_initialize_rcv_mss(struct sock *sk)
464{
465 const struct tcp_sock *tp = tcp_sk(sk);
466 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
467
468 hint = min(hint, tp->rcv_wnd / 2);
469 hint = min(hint, TCP_MSS_DEFAULT);
470 hint = max(hint, TCP_MIN_MSS);
471
472 inet_csk(sk)->icsk_ack.rcv_mss = hint;
473}
474EXPORT_SYMBOL(tcp_initialize_rcv_mss);
475
476
477
478
479
480
481
482
483
484
485
486
487static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
488{
489 u32 new_sample = tp->rcv_rtt_est.rtt;
490 long m = sample;
491
492 if (m == 0)
493 m = 1;
494
495 if (new_sample != 0) {
496
497
498
499
500
501
502
503
504
505
506 if (!win_dep) {
507 m -= (new_sample >> 3);
508 new_sample += m;
509 } else {
510 m <<= 3;
511 if (m < new_sample)
512 new_sample = m;
513 }
514 } else {
515
516 new_sample = m << 3;
517 }
518
519 if (tp->rcv_rtt_est.rtt != new_sample)
520 tp->rcv_rtt_est.rtt = new_sample;
521}
522
523static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
524{
525 if (tp->rcv_rtt_est.time == 0)
526 goto new_measure;
527 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
528 return;
529 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_rtt_est.time, 1);
530
531new_measure:
532 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
533 tp->rcv_rtt_est.time = tcp_time_stamp;
534}
535
536static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
537 const struct sk_buff *skb)
538{
539 struct tcp_sock *tp = tcp_sk(sk);
540 if (tp->rx_opt.rcv_tsecr &&
541 (TCP_SKB_CB(skb)->end_seq -
542 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
543 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
544}
545
546
547
548
549
550void tcp_rcv_space_adjust(struct sock *sk)
551{
552 struct tcp_sock *tp = tcp_sk(sk);
553 int time;
554 int copied;
555
556 time = tcp_time_stamp - tp->rcvq_space.time;
557 if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
558 return;
559
560
561 copied = tp->copied_seq - tp->rcvq_space.seq;
562 if (copied <= tp->rcvq_space.space)
563 goto new_measure;
564
565
566
567
568
569
570
571
572
573
574 if (sysctl_tcp_moderate_rcvbuf &&
575 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
576 int rcvwin, rcvmem, rcvbuf;
577
578
579
580
581 rcvwin = (copied << 1) + 16 * tp->advmss;
582
583
584
585
586
587
588 if (copied >=
589 tp->rcvq_space.space + (tp->rcvq_space.space >> 2)) {
590 if (copied >=
591 tp->rcvq_space.space + (tp->rcvq_space.space >> 1))
592 rcvwin <<= 1;
593 else
594 rcvwin += (rcvwin >> 1);
595 }
596
597 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
598 while (tcp_win_from_space(rcvmem) < tp->advmss)
599 rcvmem += 128;
600
601 rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]);
602 if (rcvbuf > sk->sk_rcvbuf) {
603 sk->sk_rcvbuf = rcvbuf;
604
605
606 tp->window_clamp = rcvwin;
607 }
608 }
609 tp->rcvq_space.space = copied;
610
611new_measure:
612 tp->rcvq_space.seq = tp->copied_seq;
613 tp->rcvq_space.time = tcp_time_stamp;
614}
615
616
617
618
619
620
621
622
623
624
625
626static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
627{
628 struct tcp_sock *tp = tcp_sk(sk);
629 struct inet_connection_sock *icsk = inet_csk(sk);
630 u32 now;
631
632 inet_csk_schedule_ack(sk);
633
634 tcp_measure_rcv_mss(sk, skb);
635
636 tcp_rcv_rtt_measure(tp);
637
638 now = tcp_time_stamp;
639
640 if (!icsk->icsk_ack.ato) {
641
642
643
644 tcp_incr_quickack(sk);
645 icsk->icsk_ack.ato = TCP_ATO_MIN;
646 } else {
647 int m = now - icsk->icsk_ack.lrcvtime;
648
649 if (m <= TCP_ATO_MIN / 2) {
650
651 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
652 } else if (m < icsk->icsk_ack.ato) {
653 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
654 if (icsk->icsk_ack.ato > icsk->icsk_rto)
655 icsk->icsk_ack.ato = icsk->icsk_rto;
656 } else if (m > icsk->icsk_rto) {
657
658
659
660 tcp_incr_quickack(sk);
661 sk_mem_reclaim(sk);
662 }
663 }
664 icsk->icsk_ack.lrcvtime = now;
665
666 tcp_ecn_check_ce(tp, skb);
667
668 if (skb->len >= 128)
669 tcp_grow_window(sk, skb);
670}
671
672
673
674
675
676
677
678
679
680
681static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
682{
683 struct tcp_sock *tp = tcp_sk(sk);
684 long m = mrtt_us;
685 u32 srtt = tp->srtt_us;
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703 if (srtt != 0) {
704 m -= (srtt >> 3);
705 srtt += m;
706 if (m < 0) {
707 m = -m;
708 m -= (tp->mdev_us >> 2);
709
710
711
712
713
714
715
716
717 if (m > 0)
718 m >>= 3;
719 } else {
720 m -= (tp->mdev_us >> 2);
721 }
722 tp->mdev_us += m;
723 if (tp->mdev_us > tp->mdev_max_us) {
724 tp->mdev_max_us = tp->mdev_us;
725 if (tp->mdev_max_us > tp->rttvar_us)
726 tp->rttvar_us = tp->mdev_max_us;
727 }
728 if (after(tp->snd_una, tp->rtt_seq)) {
729 if (tp->mdev_max_us < tp->rttvar_us)
730 tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
731 tp->rtt_seq = tp->snd_nxt;
732 tp->mdev_max_us = tcp_rto_min_us(sk);
733 }
734 } else {
735
736 srtt = m << 3;
737 tp->mdev_us = m << 1;
738 tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
739 tp->mdev_max_us = tp->rttvar_us;
740 tp->rtt_seq = tp->snd_nxt;
741 }
742 tp->srtt_us = max(1U, srtt);
743}
744
745
746
747
748
749
750
751static void tcp_update_pacing_rate(struct sock *sk)
752{
753 const struct tcp_sock *tp = tcp_sk(sk);
754 u64 rate;
755
756
757 rate = (u64)tp->mss_cache * 2 * (USEC_PER_SEC << 3);
758
759 rate *= max(tp->snd_cwnd, tp->packets_out);
760
761 if (likely(tp->srtt_us))
762 do_div(rate, tp->srtt_us);
763
764
765
766
767
768 ACCESS_ONCE(sk->sk_pacing_rate) = min_t(u64, rate,
769 sk->sk_max_pacing_rate);
770}
771
772
773
774
775static void tcp_set_rto(struct sock *sk)
776{
777 const struct tcp_sock *tp = tcp_sk(sk);
778
779
780
781
782
783
784
785
786
787
788 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
789
790
791
792
793
794
795
796
797
798
799 tcp_bound_rto(sk);
800}
801
802__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
803{
804 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
805
806 if (!cwnd)
807 cwnd = TCP_INIT_CWND;
808 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
809}
810
811
812
813
814
815void tcp_disable_fack(struct tcp_sock *tp)
816{
817
818 if (tcp_is_fack(tp))
819 tp->lost_skb_hint = NULL;
820 tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;
821}
822
823
824static void tcp_dsack_seen(struct tcp_sock *tp)
825{
826 tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
827}
828
829static void tcp_update_reordering(struct sock *sk, const int metric,
830 const int ts)
831{
832 struct tcp_sock *tp = tcp_sk(sk);
833 if (metric > tp->reordering) {
834 int mib_idx;
835
836 tp->reordering = min(TCP_MAX_REORDERING, metric);
837
838
839 if (ts)
840 mib_idx = LINUX_MIB_TCPTSREORDER;
841 else if (tcp_is_reno(tp))
842 mib_idx = LINUX_MIB_TCPRENOREORDER;
843 else if (tcp_is_fack(tp))
844 mib_idx = LINUX_MIB_TCPFACKREORDER;
845 else
846 mib_idx = LINUX_MIB_TCPSACKREORDER;
847
848 NET_INC_STATS_BH(sock_net(sk), mib_idx);
849#if FASTRETRANS_DEBUG > 1
850 pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
851 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
852 tp->reordering,
853 tp->fackets_out,
854 tp->sacked_out,
855 tp->undo_marker ? tp->undo_retrans : 0);
856#endif
857 tcp_disable_fack(tp);
858 }
859
860 if (metric > 0)
861 tcp_disable_early_retrans(tp);
862}
863
864
865static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
866{
867 if ((tp->retransmit_skb_hint == NULL) ||
868 before(TCP_SKB_CB(skb)->seq,
869 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
870 tp->retransmit_skb_hint = skb;
871
872 if (!tp->lost_out ||
873 after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high))
874 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
875}
876
877static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
878{
879 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
880 tcp_verify_retransmit_hint(tp, skb);
881
882 tp->lost_out += tcp_skb_pcount(skb);
883 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
884 }
885}
886
887static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
888 struct sk_buff *skb)
889{
890 tcp_verify_retransmit_hint(tp, skb);
891
892 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
893 tp->lost_out += tcp_skb_pcount(skb);
894 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
895 }
896}
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
993 u32 start_seq, u32 end_seq)
994{
995
996 if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
997 return false;
998
999
1000 if (!before(start_seq, tp->snd_nxt))
1001 return false;
1002
1003
1004
1005
1006 if (after(start_seq, tp->snd_una))
1007 return true;
1008
1009 if (!is_dsack || !tp->undo_marker)
1010 return false;
1011
1012
1013 if (after(end_seq, tp->snd_una))
1014 return false;
1015
1016 if (!before(start_seq, tp->undo_marker))
1017 return true;
1018
1019
1020 if (!after(end_seq, tp->undo_marker))
1021 return false;
1022
1023
1024
1025
1026 return !before(start_seq, end_seq - tp->max_window);
1027}
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038static void tcp_mark_lost_retrans(struct sock *sk)
1039{
1040 const struct inet_connection_sock *icsk = inet_csk(sk);
1041 struct tcp_sock *tp = tcp_sk(sk);
1042 struct sk_buff *skb;
1043 int cnt = 0;
1044 u32 new_low_seq = tp->snd_nxt;
1045 u32 received_upto = tcp_highest_sack_seq(tp);
1046
1047 if (!tcp_is_fack(tp) || !tp->retrans_out ||
1048 !after(received_upto, tp->lost_retrans_low) ||
1049 icsk->icsk_ca_state != TCP_CA_Recovery)
1050 return;
1051
1052 tcp_for_write_queue(skb, sk) {
1053 u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
1054
1055 if (skb == tcp_send_head(sk))
1056 break;
1057 if (cnt == tp->retrans_out)
1058 break;
1059 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1060 continue;
1061
1062 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS))
1063 continue;
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076 if (after(received_upto, ack_seq)) {
1077 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1078 tp->retrans_out -= tcp_skb_pcount(skb);
1079
1080 tcp_skb_mark_lost_uncond_verify(tp, skb);
1081 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
1082 } else {
1083 if (before(ack_seq, new_low_seq))
1084 new_low_seq = ack_seq;
1085 cnt += tcp_skb_pcount(skb);
1086 }
1087 }
1088
1089 if (tp->retrans_out)
1090 tp->lost_retrans_low = new_low_seq;
1091}
1092
1093static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1094 struct tcp_sack_block_wire *sp, int num_sacks,
1095 u32 prior_snd_una)
1096{
1097 struct tcp_sock *tp = tcp_sk(sk);
1098 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
1099 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
1100 bool dup_sack = false;
1101
1102 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1103 dup_sack = true;
1104 tcp_dsack_seen(tp);
1105 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
1106 } else if (num_sacks > 1) {
1107 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
1108 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
1109
1110 if (!after(end_seq_0, end_seq_1) &&
1111 !before(start_seq_0, start_seq_1)) {
1112 dup_sack = true;
1113 tcp_dsack_seen(tp);
1114 NET_INC_STATS_BH(sock_net(sk),
1115 LINUX_MIB_TCPDSACKOFORECV);
1116 }
1117 }
1118
1119
1120 if (dup_sack && tp->undo_marker && tp->undo_retrans > 0 &&
1121 !after(end_seq_0, prior_snd_una) &&
1122 after(end_seq_0, tp->undo_marker))
1123 tp->undo_retrans--;
1124
1125 return dup_sack;
1126}
1127
1128struct tcp_sacktag_state {
1129 int reord;
1130 int fack_count;
1131 long rtt_us;
1132 int flag;
1133};
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1144 u32 start_seq, u32 end_seq)
1145{
1146 int err;
1147 bool in_sack;
1148 unsigned int pkt_len;
1149 unsigned int mss;
1150
1151 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1152 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1153
1154 if (tcp_skb_pcount(skb) > 1 && !in_sack &&
1155 after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
1156 mss = tcp_skb_mss(skb);
1157 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1158
1159 if (!in_sack) {
1160 pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
1161 if (pkt_len < mss)
1162 pkt_len = mss;
1163 } else {
1164 pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
1165 if (pkt_len < mss)
1166 return -EINVAL;
1167 }
1168
1169
1170
1171
1172 if (pkt_len > mss) {
1173 unsigned int new_len = (pkt_len / mss) * mss;
1174 if (!in_sack && new_len < pkt_len) {
1175 new_len += mss;
1176 if (new_len >= skb->len)
1177 return 0;
1178 }
1179 pkt_len = new_len;
1180 }
1181 err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC);
1182 if (err < 0)
1183 return err;
1184 }
1185
1186 return in_sack;
1187}
1188
1189
1190static u8 tcp_sacktag_one(struct sock *sk,
1191 struct tcp_sacktag_state *state, u8 sacked,
1192 u32 start_seq, u32 end_seq,
1193 int dup_sack, int pcount,
1194 const struct skb_mstamp *xmit_time)
1195{
1196 struct tcp_sock *tp = tcp_sk(sk);
1197 int fack_count = state->fack_count;
1198
1199
1200 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1201 if (tp->undo_marker && tp->undo_retrans > 0 &&
1202 after(end_seq, tp->undo_marker))
1203 tp->undo_retrans--;
1204 if (sacked & TCPCB_SACKED_ACKED)
1205 state->reord = min(fack_count, state->reord);
1206 }
1207
1208
1209 if (!after(end_seq, tp->snd_una))
1210 return sacked;
1211
1212 if (!(sacked & TCPCB_SACKED_ACKED)) {
1213 if (sacked & TCPCB_SACKED_RETRANS) {
1214
1215
1216
1217
1218 if (sacked & TCPCB_LOST) {
1219 sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1220 tp->lost_out -= pcount;
1221 tp->retrans_out -= pcount;
1222 }
1223 } else {
1224 if (!(sacked & TCPCB_RETRANS)) {
1225
1226
1227
1228 if (before(start_seq,
1229 tcp_highest_sack_seq(tp)))
1230 state->reord = min(fack_count,
1231 state->reord);
1232 if (!after(end_seq, tp->high_seq))
1233 state->flag |= FLAG_ORIG_SACK_ACKED;
1234
1235 if (state->rtt_us < 0) {
1236 struct skb_mstamp now;
1237
1238 skb_mstamp_get(&now);
1239 state->rtt_us = skb_mstamp_us_delta(&now,
1240 xmit_time);
1241 }
1242 }
1243
1244 if (sacked & TCPCB_LOST) {
1245 sacked &= ~TCPCB_LOST;
1246 tp->lost_out -= pcount;
1247 }
1248 }
1249
1250 sacked |= TCPCB_SACKED_ACKED;
1251 state->flag |= FLAG_DATA_SACKED;
1252 tp->sacked_out += pcount;
1253
1254 fack_count += pcount;
1255
1256
1257 if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
1258 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
1259 tp->lost_cnt_hint += pcount;
1260
1261 if (fack_count > tp->fackets_out)
1262 tp->fackets_out = fack_count;
1263 }
1264
1265
1266
1267
1268
1269 if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
1270 sacked &= ~TCPCB_SACKED_RETRANS;
1271 tp->retrans_out -= pcount;
1272 }
1273
1274 return sacked;
1275}
1276
1277
1278
1279
1280static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1281 struct tcp_sacktag_state *state,
1282 unsigned int pcount, int shifted, int mss,
1283 bool dup_sack)
1284{
1285 struct tcp_sock *tp = tcp_sk(sk);
1286 struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
1287 u32 start_seq = TCP_SKB_CB(skb)->seq;
1288 u32 end_seq = start_seq + shifted;
1289
1290 BUG_ON(!pcount);
1291
1292
1293
1294
1295
1296
1297
1298 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1299 start_seq, end_seq, dup_sack, pcount,
1300 &skb->skb_mstamp);
1301
1302 if (skb == tp->lost_skb_hint)
1303 tp->lost_cnt_hint += pcount;
1304
1305 TCP_SKB_CB(prev)->end_seq += shifted;
1306 TCP_SKB_CB(skb)->seq += shifted;
1307
1308 tcp_skb_pcount_add(prev, pcount);
1309 BUG_ON(tcp_skb_pcount(skb) < pcount);
1310 tcp_skb_pcount_add(skb, -pcount);
1311
1312
1313
1314
1315
1316
1317 if (!skb_shinfo(prev)->gso_size) {
1318 skb_shinfo(prev)->gso_size = mss;
1319 skb_shinfo(prev)->gso_type = sk->sk_gso_type;
1320 }
1321
1322
1323 if (tcp_skb_pcount(skb) <= 1) {
1324 skb_shinfo(skb)->gso_size = 0;
1325 skb_shinfo(skb)->gso_type = 0;
1326 }
1327
1328
1329 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
1330
1331 if (skb->len > 0) {
1332 BUG_ON(!tcp_skb_pcount(skb));
1333 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED);
1334 return false;
1335 }
1336
1337
1338
1339 if (skb == tp->retransmit_skb_hint)
1340 tp->retransmit_skb_hint = prev;
1341 if (skb == tp->lost_skb_hint) {
1342 tp->lost_skb_hint = prev;
1343 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1344 }
1345
1346 TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1347 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1348 TCP_SKB_CB(prev)->end_seq++;
1349
1350 if (skb == tcp_highest_sack(sk))
1351 tcp_advance_highest_sack(sk, skb);
1352
1353 tcp_unlink_write_queue(skb, sk);
1354 sk_wmem_free_skb(sk, skb);
1355
1356 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED);
1357
1358 return true;
1359}
1360
1361
1362
1363
1364static int tcp_skb_seglen(const struct sk_buff *skb)
1365{
1366 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
1367}
1368
1369
1370static int skb_can_shift(const struct sk_buff *skb)
1371{
1372 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1373}
1374
1375
1376
1377
1378static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1379 struct tcp_sacktag_state *state,
1380 u32 start_seq, u32 end_seq,
1381 bool dup_sack)
1382{
1383 struct tcp_sock *tp = tcp_sk(sk);
1384 struct sk_buff *prev;
1385 int mss;
1386 int pcount = 0;
1387 int len;
1388 int in_sack;
1389
1390 if (!sk_can_gso(sk))
1391 goto fallback;
1392
1393
1394 if (!dup_sack &&
1395 (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
1396 goto fallback;
1397 if (!skb_can_shift(skb))
1398 goto fallback;
1399
1400 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1401 goto fallback;
1402
1403
1404 if (unlikely(skb == tcp_write_queue_head(sk)))
1405 goto fallback;
1406 prev = tcp_write_queue_prev(sk, skb);
1407
1408 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1409 goto fallback;
1410
1411 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1412 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1413
1414 if (in_sack) {
1415 len = skb->len;
1416 pcount = tcp_skb_pcount(skb);
1417 mss = tcp_skb_seglen(skb);
1418
1419
1420
1421
1422 if (mss != tcp_skb_seglen(prev))
1423 goto fallback;
1424 } else {
1425 if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
1426 goto noop;
1427
1428
1429
1430
1431 if (tcp_skb_pcount(skb) <= 1)
1432 goto noop;
1433
1434 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1435 if (!in_sack) {
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447 goto fallback;
1448 }
1449
1450 len = end_seq - TCP_SKB_CB(skb)->seq;
1451 BUG_ON(len < 0);
1452 BUG_ON(len > skb->len);
1453
1454
1455
1456
1457
1458 mss = tcp_skb_mss(skb);
1459
1460
1461
1462
1463 if (mss != tcp_skb_seglen(prev))
1464 goto fallback;
1465
1466 if (len == mss) {
1467 pcount = 1;
1468 } else if (len < mss) {
1469 goto noop;
1470 } else {
1471 pcount = len / mss;
1472 len = pcount * mss;
1473 }
1474 }
1475
1476
1477 if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
1478 goto fallback;
1479
1480 if (!skb_shift(prev, skb, len))
1481 goto fallback;
1482 if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
1483 goto out;
1484
1485
1486
1487
1488 if (prev == tcp_write_queue_tail(sk))
1489 goto out;
1490 skb = tcp_write_queue_next(sk, prev);
1491
1492 if (!skb_can_shift(skb) ||
1493 (skb == tcp_send_head(sk)) ||
1494 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1495 (mss != tcp_skb_seglen(skb)))
1496 goto out;
1497
1498 len = skb->len;
1499 if (skb_shift(prev, skb, len)) {
1500 pcount += tcp_skb_pcount(skb);
1501 tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
1502 }
1503
1504out:
1505 state->fack_count += pcount;
1506 return prev;
1507
1508noop:
1509 return skb;
1510
1511fallback:
1512 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
1513 return NULL;
1514}
1515
1516static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1517 struct tcp_sack_block *next_dup,
1518 struct tcp_sacktag_state *state,
1519 u32 start_seq, u32 end_seq,
1520 bool dup_sack_in)
1521{
1522 struct tcp_sock *tp = tcp_sk(sk);
1523 struct sk_buff *tmp;
1524
1525 tcp_for_write_queue_from(skb, sk) {
1526 int in_sack = 0;
1527 bool dup_sack = dup_sack_in;
1528
1529 if (skb == tcp_send_head(sk))
1530 break;
1531
1532
1533 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1534 break;
1535
1536 if ((next_dup != NULL) &&
1537 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
1538 in_sack = tcp_match_skb_to_sack(sk, skb,
1539 next_dup->start_seq,
1540 next_dup->end_seq);
1541 if (in_sack > 0)
1542 dup_sack = true;
1543 }
1544
1545
1546
1547
1548
1549 if (in_sack <= 0) {
1550 tmp = tcp_shift_skb_data(sk, skb, state,
1551 start_seq, end_seq, dup_sack);
1552 if (tmp != NULL) {
1553 if (tmp != skb) {
1554 skb = tmp;
1555 continue;
1556 }
1557
1558 in_sack = 0;
1559 } else {
1560 in_sack = tcp_match_skb_to_sack(sk, skb,
1561 start_seq,
1562 end_seq);
1563 }
1564 }
1565
1566 if (unlikely(in_sack < 0))
1567 break;
1568
1569 if (in_sack) {
1570 TCP_SKB_CB(skb)->sacked =
1571 tcp_sacktag_one(sk,
1572 state,
1573 TCP_SKB_CB(skb)->sacked,
1574 TCP_SKB_CB(skb)->seq,
1575 TCP_SKB_CB(skb)->end_seq,
1576 dup_sack,
1577 tcp_skb_pcount(skb),
1578 &skb->skb_mstamp);
1579
1580 if (!before(TCP_SKB_CB(skb)->seq,
1581 tcp_highest_sack_seq(tp)))
1582 tcp_advance_highest_sack(sk, skb);
1583 }
1584
1585 state->fack_count += tcp_skb_pcount(skb);
1586 }
1587 return skb;
1588}
1589
1590
1591
1592
1593static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1594 struct tcp_sacktag_state *state,
1595 u32 skip_to_seq)
1596{
1597 tcp_for_write_queue_from(skb, sk) {
1598 if (skb == tcp_send_head(sk))
1599 break;
1600
1601 if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
1602 break;
1603
1604 state->fack_count += tcp_skb_pcount(skb);
1605 }
1606 return skb;
1607}
1608
1609static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1610 struct sock *sk,
1611 struct tcp_sack_block *next_dup,
1612 struct tcp_sacktag_state *state,
1613 u32 skip_to_seq)
1614{
1615 if (next_dup == NULL)
1616 return skb;
1617
1618 if (before(next_dup->start_seq, skip_to_seq)) {
1619 skb = tcp_sacktag_skip(skb, sk, state, next_dup->start_seq);
1620 skb = tcp_sacktag_walk(skb, sk, NULL, state,
1621 next_dup->start_seq, next_dup->end_seq,
1622 1);
1623 }
1624
1625 return skb;
1626}
1627
1628static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
1629{
1630 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1631}
1632
1633static int
1634tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1635 u32 prior_snd_una, long *sack_rtt_us)
1636{
1637 struct tcp_sock *tp = tcp_sk(sk);
1638 const unsigned char *ptr = (skb_transport_header(ack_skb) +
1639 TCP_SKB_CB(ack_skb)->sacked);
1640 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1641 struct tcp_sack_block sp[TCP_NUM_SACKS];
1642 struct tcp_sack_block *cache;
1643 struct tcp_sacktag_state state;
1644 struct sk_buff *skb;
1645 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1646 int used_sacks;
1647 bool found_dup_sack = false;
1648 int i, j;
1649 int first_sack_index;
1650
1651 state.flag = 0;
1652 state.reord = tp->packets_out;
1653 state.rtt_us = -1L;
1654
1655 if (!tp->sacked_out) {
1656 if (WARN_ON(tp->fackets_out))
1657 tp->fackets_out = 0;
1658 tcp_highest_sack_reset(sk);
1659 }
1660
1661 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1662 num_sacks, prior_snd_una);
1663 if (found_dup_sack)
1664 state.flag |= FLAG_DSACKING_ACK;
1665
1666
1667
1668
1669
1670 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
1671 return 0;
1672
1673 if (!tp->packets_out)
1674 goto out;
1675
1676 used_sacks = 0;
1677 first_sack_index = 0;
1678 for (i = 0; i < num_sacks; i++) {
1679 bool dup_sack = !i && found_dup_sack;
1680
1681 sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
1682 sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
1683
1684 if (!tcp_is_sackblock_valid(tp, dup_sack,
1685 sp[used_sacks].start_seq,
1686 sp[used_sacks].end_seq)) {
1687 int mib_idx;
1688
1689 if (dup_sack) {
1690 if (!tp->undo_marker)
1691 mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
1692 else
1693 mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
1694 } else {
1695
1696 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1697 !after(sp[used_sacks].end_seq, tp->snd_una))
1698 continue;
1699 mib_idx = LINUX_MIB_TCPSACKDISCARD;
1700 }
1701
1702 NET_INC_STATS_BH(sock_net(sk), mib_idx);
1703 if (i == 0)
1704 first_sack_index = -1;
1705 continue;
1706 }
1707
1708
1709 if (!after(sp[used_sacks].end_seq, prior_snd_una))
1710 continue;
1711
1712 used_sacks++;
1713 }
1714
1715
1716 for (i = used_sacks - 1; i > 0; i--) {
1717 for (j = 0; j < i; j++) {
1718 if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
1719 swap(sp[j], sp[j + 1]);
1720
1721
1722 if (j == first_sack_index)
1723 first_sack_index = j + 1;
1724 }
1725 }
1726 }
1727
1728 skb = tcp_write_queue_head(sk);
1729 state.fack_count = 0;
1730 i = 0;
1731
1732 if (!tp->sacked_out) {
1733
1734 cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1735 } else {
1736 cache = tp->recv_sack_cache;
1737
1738 while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
1739 !cache->end_seq)
1740 cache++;
1741 }
1742
1743 while (i < used_sacks) {
1744 u32 start_seq = sp[i].start_seq;
1745 u32 end_seq = sp[i].end_seq;
1746 bool dup_sack = (found_dup_sack && (i == first_sack_index));
1747 struct tcp_sack_block *next_dup = NULL;
1748
1749 if (found_dup_sack && ((i + 1) == first_sack_index))
1750 next_dup = &sp[i + 1];
1751
1752
1753 while (tcp_sack_cache_ok(tp, cache) &&
1754 !before(start_seq, cache->end_seq))
1755 cache++;
1756
1757
1758 if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
1759 after(end_seq, cache->start_seq)) {
1760
1761
1762 if (before(start_seq, cache->start_seq)) {
1763 skb = tcp_sacktag_skip(skb, sk, &state,
1764 start_seq);
1765 skb = tcp_sacktag_walk(skb, sk, next_dup,
1766 &state,
1767 start_seq,
1768 cache->start_seq,
1769 dup_sack);
1770 }
1771
1772
1773 if (!after(end_seq, cache->end_seq))
1774 goto advance_sp;
1775
1776 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1777 &state,
1778 cache->end_seq);
1779
1780
1781 if (tcp_highest_sack_seq(tp) == cache->end_seq) {
1782
1783 skb = tcp_highest_sack(sk);
1784 if (skb == NULL)
1785 break;
1786 state.fack_count = tp->fackets_out;
1787 cache++;
1788 goto walk;
1789 }
1790
1791 skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq);
1792
1793 cache++;
1794 continue;
1795 }
1796
1797 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
1798 skb = tcp_highest_sack(sk);
1799 if (skb == NULL)
1800 break;
1801 state.fack_count = tp->fackets_out;
1802 }
1803 skb = tcp_sacktag_skip(skb, sk, &state, start_seq);
1804
1805walk:
1806 skb = tcp_sacktag_walk(skb, sk, next_dup, &state,
1807 start_seq, end_seq, dup_sack);
1808
1809advance_sp:
1810 i++;
1811 }
1812
1813
1814 for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
1815 tp->recv_sack_cache[i].start_seq = 0;
1816 tp->recv_sack_cache[i].end_seq = 0;
1817 }
1818 for (j = 0; j < used_sacks; j++)
1819 tp->recv_sack_cache[i++] = sp[j];
1820
1821 tcp_mark_lost_retrans(sk);
1822
1823 tcp_verify_left_out(tp);
1824
1825 if ((state.reord < tp->fackets_out) &&
1826 ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
1827 tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
1828
1829out:
1830
1831#if FASTRETRANS_DEBUG > 0
1832 WARN_ON((int)tp->sacked_out < 0);
1833 WARN_ON((int)tp->lost_out < 0);
1834 WARN_ON((int)tp->retrans_out < 0);
1835 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1836#endif
1837 *sack_rtt_us = state.rtt_us;
1838 return state.flag;
1839}
1840
1841
1842
1843
1844static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
1845{
1846 u32 holes;
1847
1848 holes = max(tp->lost_out, 1U);
1849 holes = min(holes, tp->packets_out);
1850
1851 if ((tp->sacked_out + holes) > tp->packets_out) {
1852 tp->sacked_out = tp->packets_out - holes;
1853 return true;
1854 }
1855 return false;
1856}
1857
1858
1859
1860
1861
1862static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1863{
1864 struct tcp_sock *tp = tcp_sk(sk);
1865 if (tcp_limit_reno_sacked(tp))
1866 tcp_update_reordering(sk, tp->packets_out + addend, 0);
1867}
1868
1869
1870
1871static void tcp_add_reno_sack(struct sock *sk)
1872{
1873 struct tcp_sock *tp = tcp_sk(sk);
1874 tp->sacked_out++;
1875 tcp_check_reno_reordering(sk, 0);
1876 tcp_verify_left_out(tp);
1877}
1878
1879
1880
1881static void tcp_remove_reno_sacks(struct sock *sk, int acked)
1882{
1883 struct tcp_sock *tp = tcp_sk(sk);
1884
1885 if (acked > 0) {
1886
1887 if (acked - 1 >= tp->sacked_out)
1888 tp->sacked_out = 0;
1889 else
1890 tp->sacked_out -= acked - 1;
1891 }
1892 tcp_check_reno_reordering(sk, acked);
1893 tcp_verify_left_out(tp);
1894}
1895
1896static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1897{
1898 tp->sacked_out = 0;
1899}
1900
1901void tcp_clear_retrans(struct tcp_sock *tp)
1902{
1903 tp->retrans_out = 0;
1904 tp->lost_out = 0;
1905 tp->undo_marker = 0;
1906 tp->undo_retrans = -1;
1907 tp->fackets_out = 0;
1908 tp->sacked_out = 0;
1909}
1910
1911static inline void tcp_init_undo(struct tcp_sock *tp)
1912{
1913 tp->undo_marker = tp->snd_una;
1914
1915 tp->undo_retrans = tp->retrans_out ? : -1;
1916}
1917
1918
1919
1920
1921
1922void tcp_enter_loss(struct sock *sk)
1923{
1924 const struct inet_connection_sock *icsk = inet_csk(sk);
1925 struct tcp_sock *tp = tcp_sk(sk);
1926 struct sk_buff *skb;
1927 bool new_recovery = false;
1928 bool is_reneg;
1929
1930
1931 if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
1932 !after(tp->high_seq, tp->snd_una) ||
1933 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
1934 new_recovery = true;
1935 tp->prior_ssthresh = tcp_current_ssthresh(sk);
1936 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1937 tcp_ca_event(sk, CA_EVENT_LOSS);
1938 tcp_init_undo(tp);
1939 }
1940 tp->snd_cwnd = 1;
1941 tp->snd_cwnd_cnt = 0;
1942 tp->snd_cwnd_stamp = tcp_time_stamp;
1943
1944 tp->retrans_out = 0;
1945 tp->lost_out = 0;
1946
1947 if (tcp_is_reno(tp))
1948 tcp_reset_reno_sack(tp);
1949
1950 skb = tcp_write_queue_head(sk);
1951 is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
1952 if (is_reneg) {
1953 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
1954 tp->sacked_out = 0;
1955 tp->fackets_out = 0;
1956 }
1957 tcp_clear_all_retrans_hints(tp);
1958
1959 tcp_for_write_queue(skb, sk) {
1960 if (skb == tcp_send_head(sk))
1961 break;
1962
1963 TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
1964 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) {
1965 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
1966 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1967 tp->lost_out += tcp_skb_pcount(skb);
1968 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
1969 }
1970 }
1971 tcp_verify_left_out(tp);
1972
1973
1974
1975
1976 if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
1977 tp->sacked_out >= sysctl_tcp_reordering)
1978 tp->reordering = min_t(unsigned int, tp->reordering,
1979 sysctl_tcp_reordering);
1980 tcp_set_ca_state(sk, TCP_CA_Loss);
1981 tp->high_seq = tp->snd_nxt;
1982 tcp_ecn_queue_cwr(tp);
1983
1984
1985
1986
1987
1988 tp->frto = sysctl_tcp_frto &&
1989 (new_recovery || icsk->icsk_retransmits) &&
1990 !inet_csk(sk)->icsk_mtup.probe_size;
1991}
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003static bool tcp_check_sack_reneging(struct sock *sk, int flag)
2004{
2005 if (flag & FLAG_SACK_RENEGING) {
2006 struct tcp_sock *tp = tcp_sk(sk);
2007 unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
2008 msecs_to_jiffies(10));
2009
2010 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2011 delay, TCP_RTO_MAX);
2012 return true;
2013 }
2014 return false;
2015}
2016
2017static inline int tcp_fackets_out(const struct tcp_sock *tp)
2018{
2019 return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
2020}
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
2038{
2039 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
2040}
2041
2042static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
2043{
2044 struct tcp_sock *tp = tcp_sk(sk);
2045 unsigned long delay;
2046
2047
2048
2049
2050
2051 if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 ||
2052 (flag & FLAG_ECE) || !tp->srtt_us)
2053 return false;
2054
2055 delay = max(usecs_to_jiffies(tp->srtt_us >> 5),
2056 msecs_to_jiffies(2));
2057
2058 if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
2059 return false;
2060
2061 inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay,
2062 TCP_RTO_MAX);
2063 return true;
2064}
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159static bool tcp_time_to_recover(struct sock *sk, int flag)
2160{
2161 struct tcp_sock *tp = tcp_sk(sk);
2162 __u32 packets_out;
2163
2164
2165 if (tp->lost_out)
2166 return true;
2167
2168
2169 if (tcp_dupack_heuristics(tp) > tp->reordering)
2170 return true;
2171
2172
2173
2174
2175 packets_out = tp->packets_out;
2176 if (packets_out <= tp->reordering &&
2177 tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
2178 !tcp_may_send_now(sk)) {
2179
2180
2181
2182 return true;
2183 }
2184
2185
2186
2187
2188
2189
2190 if ((tp->thin_dupack || sysctl_tcp_thin_dupack) &&
2191 tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 &&
2192 tcp_is_sack(tp) && !tcp_send_head(sk))
2193 return true;
2194
2195
2196
2197
2198
2199
2200 if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
2201 (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) &&
2202 !tcp_may_send_now(sk))
2203 return !tcp_pause_early_retransmit(sk, flag);
2204
2205 return false;
2206}
2207
2208
2209
2210
2211
2212
2213
2214static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2215{
2216 struct tcp_sock *tp = tcp_sk(sk);
2217 struct sk_buff *skb;
2218 int cnt, oldcnt;
2219 int err;
2220 unsigned int mss;
2221
2222 const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
2223
2224 WARN_ON(packets > tp->packets_out);
2225 if (tp->lost_skb_hint) {
2226 skb = tp->lost_skb_hint;
2227 cnt = tp->lost_cnt_hint;
2228
2229 if (mark_head && skb != tcp_write_queue_head(sk))
2230 return;
2231 } else {
2232 skb = tcp_write_queue_head(sk);
2233 cnt = 0;
2234 }
2235
2236 tcp_for_write_queue_from(skb, sk) {
2237 if (skb == tcp_send_head(sk))
2238 break;
2239
2240
2241 tp->lost_skb_hint = skb;
2242 tp->lost_cnt_hint = cnt;
2243
2244 if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
2245 break;
2246
2247 oldcnt = cnt;
2248 if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
2249 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2250 cnt += tcp_skb_pcount(skb);
2251
2252 if (cnt > packets) {
2253 if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
2254 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
2255 (oldcnt >= packets))
2256 break;
2257
2258 mss = skb_shinfo(skb)->gso_size;
2259 err = tcp_fragment(sk, skb, (packets - oldcnt) * mss,
2260 mss, GFP_ATOMIC);
2261 if (err < 0)
2262 break;
2263 cnt = packets;
2264 }
2265
2266 tcp_skb_mark_lost(tp, skb);
2267
2268 if (mark_head)
2269 break;
2270 }
2271 tcp_verify_left_out(tp);
2272}
2273
2274
2275
2276static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2277{
2278 struct tcp_sock *tp = tcp_sk(sk);
2279
2280 if (tcp_is_reno(tp)) {
2281 tcp_mark_head_lost(sk, 1, 1);
2282 } else if (tcp_is_fack(tp)) {
2283 int lost = tp->fackets_out - tp->reordering;
2284 if (lost <= 0)
2285 lost = 1;
2286 tcp_mark_head_lost(sk, lost, 0);
2287 } else {
2288 int sacked_upto = tp->sacked_out - tp->reordering;
2289 if (sacked_upto >= 0)
2290 tcp_mark_head_lost(sk, sacked_upto, 0);
2291 else if (fast_rexmit)
2292 tcp_mark_head_lost(sk, 1, 1);
2293 }
2294}
2295
2296
2297
2298
2299static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
2300{
2301 tp->snd_cwnd = min(tp->snd_cwnd,
2302 tcp_packets_in_flight(tp) + tcp_max_burst(tp));
2303 tp->snd_cwnd_stamp = tcp_time_stamp;
2304}
2305
2306
2307
2308
2309static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
2310{
2311 return !tp->retrans_stamp ||
2312 (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2313 before(tp->rx_opt.rcv_tsecr, tp->retrans_stamp));
2314}
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332static bool tcp_any_retrans_done(const struct sock *sk)
2333{
2334 const struct tcp_sock *tp = tcp_sk(sk);
2335 struct sk_buff *skb;
2336
2337 if (tp->retrans_out)
2338 return true;
2339
2340 skb = tcp_write_queue_head(sk);
2341 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2342 return true;
2343
2344 return false;
2345}
2346
2347#if FASTRETRANS_DEBUG > 1
2348static void DBGUNDO(struct sock *sk, const char *msg)
2349{
2350 struct tcp_sock *tp = tcp_sk(sk);
2351 struct inet_sock *inet = inet_sk(sk);
2352
2353 if (sk->sk_family == AF_INET) {
2354 pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2355 msg,
2356 &inet->inet_daddr, ntohs(inet->inet_dport),
2357 tp->snd_cwnd, tcp_left_out(tp),
2358 tp->snd_ssthresh, tp->prior_ssthresh,
2359 tp->packets_out);
2360 }
2361#if IS_ENABLED(CONFIG_IPV6)
2362 else if (sk->sk_family == AF_INET6) {
2363 struct ipv6_pinfo *np = inet6_sk(sk);
2364 pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2365 msg,
2366 &np->daddr, ntohs(inet->inet_dport),
2367 tp->snd_cwnd, tcp_left_out(tp),
2368 tp->snd_ssthresh, tp->prior_ssthresh,
2369 tp->packets_out);
2370 }
2371#endif
2372}
2373#else
2374#define DBGUNDO(x...) do { } while (0)
2375#endif
2376
2377static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
2378{
2379 struct tcp_sock *tp = tcp_sk(sk);
2380
2381 if (unmark_loss) {
2382 struct sk_buff *skb;
2383
2384 tcp_for_write_queue(skb, sk) {
2385 if (skb == tcp_send_head(sk))
2386 break;
2387 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2388 }
2389 tp->lost_out = 0;
2390 tcp_clear_all_retrans_hints(tp);
2391 }
2392
2393 if (tp->prior_ssthresh) {
2394 const struct inet_connection_sock *icsk = inet_csk(sk);
2395
2396 if (icsk->icsk_ca_ops->undo_cwnd)
2397 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
2398 else
2399 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
2400
2401 if (tp->prior_ssthresh > tp->snd_ssthresh) {
2402 tp->snd_ssthresh = tp->prior_ssthresh;
2403 tcp_ecn_withdraw_cwr(tp);
2404 }
2405 } else {
2406 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
2407 }
2408 tp->snd_cwnd_stamp = tcp_time_stamp;
2409 tp->undo_marker = 0;
2410}
2411
2412static inline bool tcp_may_undo(const struct tcp_sock *tp)
2413{
2414 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2415}
2416
2417
2418static bool tcp_try_undo_recovery(struct sock *sk)
2419{
2420 struct tcp_sock *tp = tcp_sk(sk);
2421
2422 if (tcp_may_undo(tp)) {
2423 int mib_idx;
2424
2425
2426
2427
2428 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2429 tcp_undo_cwnd_reduction(sk, false);
2430 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2431 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2432 else
2433 mib_idx = LINUX_MIB_TCPFULLUNDO;
2434
2435 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2436 }
2437 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2438
2439
2440
2441 tcp_moderate_cwnd(tp);
2442 if (!tcp_any_retrans_done(sk))
2443 tp->retrans_stamp = 0;
2444 return true;
2445 }
2446 tcp_set_ca_state(sk, TCP_CA_Open);
2447 return false;
2448}
2449
2450
2451static bool tcp_try_undo_dsack(struct sock *sk)
2452{
2453 struct tcp_sock *tp = tcp_sk(sk);
2454
2455 if (tp->undo_marker && !tp->undo_retrans) {
2456 DBGUNDO(sk, "D-SACK");
2457 tcp_undo_cwnd_reduction(sk, false);
2458 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2459 return true;
2460 }
2461 return false;
2462}
2463
2464
2465static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
2466{
2467 struct tcp_sock *tp = tcp_sk(sk);
2468
2469 if (frto_undo || tcp_may_undo(tp)) {
2470 tcp_undo_cwnd_reduction(sk, true);
2471
2472 DBGUNDO(sk, "partial loss");
2473 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2474 if (frto_undo)
2475 NET_INC_STATS_BH(sock_net(sk),
2476 LINUX_MIB_TCPSPURIOUSRTOS);
2477 inet_csk(sk)->icsk_retransmits = 0;
2478 if (frto_undo || tcp_is_sack(tp))
2479 tcp_set_ca_state(sk, TCP_CA_Open);
2480 return true;
2481 }
2482 return false;
2483}
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495static void tcp_init_cwnd_reduction(struct sock *sk)
2496{
2497 struct tcp_sock *tp = tcp_sk(sk);
2498
2499 tp->high_seq = tp->snd_nxt;
2500 tp->tlp_high_seq = 0;
2501 tp->snd_cwnd_cnt = 0;
2502 tp->prior_cwnd = tp->snd_cwnd;
2503 tp->prr_delivered = 0;
2504 tp->prr_out = 0;
2505 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
2506 tcp_ecn_queue_cwr(tp);
2507}
2508
2509static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
2510 int fast_rexmit)
2511{
2512 struct tcp_sock *tp = tcp_sk(sk);
2513 int sndcnt = 0;
2514 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2515 int newly_acked_sacked = prior_unsacked -
2516 (tp->packets_out - tp->sacked_out);
2517
2518 tp->prr_delivered += newly_acked_sacked;
2519 if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
2520 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2521 tp->prior_cwnd - 1;
2522 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2523 } else {
2524 sndcnt = min_t(int, delta,
2525 max_t(int, tp->prr_delivered - tp->prr_out,
2526 newly_acked_sacked) + 1);
2527 }
2528
2529 sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
2530 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2531}
2532
2533static inline void tcp_end_cwnd_reduction(struct sock *sk)
2534{
2535 struct tcp_sock *tp = tcp_sk(sk);
2536
2537
2538 if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
2539 (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
2540 tp->snd_cwnd = tp->snd_ssthresh;
2541 tp->snd_cwnd_stamp = tcp_time_stamp;
2542 }
2543 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2544}
2545
2546
2547void tcp_enter_cwr(struct sock *sk)
2548{
2549 struct tcp_sock *tp = tcp_sk(sk);
2550
2551 tp->prior_ssthresh = 0;
2552 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2553 tp->undo_marker = 0;
2554 tcp_init_cwnd_reduction(sk);
2555 tcp_set_ca_state(sk, TCP_CA_CWR);
2556 }
2557}
2558
2559static void tcp_try_keep_open(struct sock *sk)
2560{
2561 struct tcp_sock *tp = tcp_sk(sk);
2562 int state = TCP_CA_Open;
2563
2564 if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
2565 state = TCP_CA_Disorder;
2566
2567 if (inet_csk(sk)->icsk_ca_state != state) {
2568 tcp_set_ca_state(sk, state);
2569 tp->high_seq = tp->snd_nxt;
2570 }
2571}
2572
2573static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
2574{
2575 struct tcp_sock *tp = tcp_sk(sk);
2576
2577 tcp_verify_left_out(tp);
2578
2579 if (!tcp_any_retrans_done(sk))
2580 tp->retrans_stamp = 0;
2581
2582 if (flag & FLAG_ECE)
2583 tcp_enter_cwr(sk);
2584
2585 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2586 tcp_try_keep_open(sk);
2587 } else {
2588 tcp_cwnd_reduction(sk, prior_unsacked, 0);
2589 }
2590}
2591
2592static void tcp_mtup_probe_failed(struct sock *sk)
2593{
2594 struct inet_connection_sock *icsk = inet_csk(sk);
2595
2596 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2597 icsk->icsk_mtup.probe_size = 0;
2598}
2599
2600static void tcp_mtup_probe_success(struct sock *sk)
2601{
2602 struct tcp_sock *tp = tcp_sk(sk);
2603 struct inet_connection_sock *icsk = inet_csk(sk);
2604
2605
2606 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2607 tp->snd_cwnd = tp->snd_cwnd *
2608 tcp_mss_to_mtu(sk, tp->mss_cache) /
2609 icsk->icsk_mtup.probe_size;
2610 tp->snd_cwnd_cnt = 0;
2611 tp->snd_cwnd_stamp = tcp_time_stamp;
2612 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2613
2614 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2615 icsk->icsk_mtup.probe_size = 0;
2616 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2617}
2618
2619
2620
2621
2622
2623void tcp_simple_retransmit(struct sock *sk)
2624{
2625 const struct inet_connection_sock *icsk = inet_csk(sk);
2626 struct tcp_sock *tp = tcp_sk(sk);
2627 struct sk_buff *skb;
2628 unsigned int mss = tcp_current_mss(sk);
2629 u32 prior_lost = tp->lost_out;
2630
2631 tcp_for_write_queue(skb, sk) {
2632 if (skb == tcp_send_head(sk))
2633 break;
2634 if (tcp_skb_seglen(skb) > mss &&
2635 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2636 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2637 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2638 tp->retrans_out -= tcp_skb_pcount(skb);
2639 }
2640 tcp_skb_mark_lost_uncond_verify(tp, skb);
2641 }
2642 }
2643
2644 tcp_clear_retrans_hints_partial(tp);
2645
2646 if (prior_lost == tp->lost_out)
2647 return;
2648
2649 if (tcp_is_reno(tp))
2650 tcp_limit_reno_sacked(tp);
2651
2652 tcp_verify_left_out(tp);
2653
2654
2655
2656
2657
2658
2659 if (icsk->icsk_ca_state != TCP_CA_Loss) {
2660 tp->high_seq = tp->snd_nxt;
2661 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2662 tp->prior_ssthresh = 0;
2663 tp->undo_marker = 0;
2664 tcp_set_ca_state(sk, TCP_CA_Loss);
2665 }
2666 tcp_xmit_retransmit_queue(sk);
2667}
2668EXPORT_SYMBOL(tcp_simple_retransmit);
2669
2670static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2671{
2672 struct tcp_sock *tp = tcp_sk(sk);
2673 int mib_idx;
2674
2675 if (tcp_is_reno(tp))
2676 mib_idx = LINUX_MIB_TCPRENORECOVERY;
2677 else
2678 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
2679
2680 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2681
2682 tp->prior_ssthresh = 0;
2683 tcp_init_undo(tp);
2684
2685 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2686 if (!ece_ack)
2687 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2688 tcp_init_cwnd_reduction(sk);
2689 }
2690 tcp_set_ca_state(sk, TCP_CA_Recovery);
2691}
2692
2693
2694
2695
2696static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
2697{
2698 struct tcp_sock *tp = tcp_sk(sk);
2699 bool recovered = !before(tp->snd_una, tp->high_seq);
2700
2701 if (tp->frto) {
2702
2703
2704
2705 if (tcp_try_undo_loss(sk, flag & FLAG_ORIG_SACK_ACKED))
2706 return;
2707
2708 if (after(tp->snd_nxt, tp->high_seq) &&
2709 (flag & FLAG_DATA_SACKED || is_dupack)) {
2710 tp->frto = 0;
2711 } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
2712 tp->high_seq = tp->snd_nxt;
2713 __tcp_push_pending_frames(sk, tcp_current_mss(sk),
2714 TCP_NAGLE_OFF);
2715 if (after(tp->snd_nxt, tp->high_seq))
2716 return;
2717 tp->frto = 0;
2718 }
2719 }
2720
2721 if (recovered) {
2722
2723 tcp_try_undo_recovery(sk);
2724 return;
2725 }
2726 if (tcp_is_reno(tp)) {
2727
2728
2729
2730 if (after(tp->snd_nxt, tp->high_seq) && is_dupack)
2731 tcp_add_reno_sack(sk);
2732 else if (flag & FLAG_SND_UNA_ADVANCED)
2733 tcp_reset_reno_sack(tp);
2734 }
2735 if (tcp_try_undo_loss(sk, false))
2736 return;
2737 tcp_xmit_retransmit_queue(sk);
2738}
2739
2740
2741static bool tcp_try_undo_partial(struct sock *sk, const int acked,
2742 const int prior_unsacked)
2743{
2744 struct tcp_sock *tp = tcp_sk(sk);
2745
2746 if (tp->undo_marker && tcp_packet_delayed(tp)) {
2747
2748
2749
2750 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
2751
2752
2753
2754
2755
2756
2757 if (tp->retrans_out) {
2758 tcp_cwnd_reduction(sk, prior_unsacked, 0);
2759 return true;
2760 }
2761
2762 if (!tcp_any_retrans_done(sk))
2763 tp->retrans_stamp = 0;
2764
2765 DBGUNDO(sk, "partial recovery");
2766 tcp_undo_cwnd_reduction(sk, true);
2767 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2768 tcp_try_keep_open(sk);
2769 return true;
2770 }
2771 return false;
2772}
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2786 const int prior_unsacked,
2787 bool is_dupack, int flag)
2788{
2789 struct inet_connection_sock *icsk = inet_csk(sk);
2790 struct tcp_sock *tp = tcp_sk(sk);
2791 bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
2792 (tcp_fackets_out(tp) > tp->reordering));
2793 int fast_rexmit = 0;
2794
2795 if (WARN_ON(!tp->packets_out && tp->sacked_out))
2796 tp->sacked_out = 0;
2797 if (WARN_ON(!tp->sacked_out && tp->fackets_out))
2798 tp->fackets_out = 0;
2799
2800
2801
2802 if (flag & FLAG_ECE)
2803 tp->prior_ssthresh = 0;
2804
2805
2806 if (tcp_check_sack_reneging(sk, flag))
2807 return;
2808
2809
2810 tcp_verify_left_out(tp);
2811
2812
2813
2814 if (icsk->icsk_ca_state == TCP_CA_Open) {
2815 WARN_ON(tp->retrans_out != 0);
2816 tp->retrans_stamp = 0;
2817 } else if (!before(tp->snd_una, tp->high_seq)) {
2818 switch (icsk->icsk_ca_state) {
2819 case TCP_CA_CWR:
2820
2821
2822 if (tp->snd_una != tp->high_seq) {
2823 tcp_end_cwnd_reduction(sk);
2824 tcp_set_ca_state(sk, TCP_CA_Open);
2825 }
2826 break;
2827
2828 case TCP_CA_Recovery:
2829 if (tcp_is_reno(tp))
2830 tcp_reset_reno_sack(tp);
2831 if (tcp_try_undo_recovery(sk))
2832 return;
2833 tcp_end_cwnd_reduction(sk);
2834 break;
2835 }
2836 }
2837
2838
2839 switch (icsk->icsk_ca_state) {
2840 case TCP_CA_Recovery:
2841 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
2842 if (tcp_is_reno(tp) && is_dupack)
2843 tcp_add_reno_sack(sk);
2844 } else {
2845 if (tcp_try_undo_partial(sk, acked, prior_unsacked))
2846 return;
2847
2848 do_lost = tcp_is_reno(tp) ||
2849 tcp_fackets_out(tp) > tp->reordering;
2850 }
2851 if (tcp_try_undo_dsack(sk)) {
2852 tcp_try_keep_open(sk);
2853 return;
2854 }
2855 break;
2856 case TCP_CA_Loss:
2857 tcp_process_loss(sk, flag, is_dupack);
2858 if (icsk->icsk_ca_state != TCP_CA_Open)
2859 return;
2860
2861 default:
2862 if (tcp_is_reno(tp)) {
2863 if (flag & FLAG_SND_UNA_ADVANCED)
2864 tcp_reset_reno_sack(tp);
2865 if (is_dupack)
2866 tcp_add_reno_sack(sk);
2867 }
2868
2869 if (icsk->icsk_ca_state <= TCP_CA_Disorder)
2870 tcp_try_undo_dsack(sk);
2871
2872 if (!tcp_time_to_recover(sk, flag)) {
2873 tcp_try_to_open(sk, flag, prior_unsacked);
2874 return;
2875 }
2876
2877
2878 if (icsk->icsk_ca_state < TCP_CA_CWR &&
2879 icsk->icsk_mtup.probe_size &&
2880 tp->snd_una == tp->mtu_probe.probe_seq_start) {
2881 tcp_mtup_probe_failed(sk);
2882
2883 tp->snd_cwnd++;
2884 tcp_simple_retransmit(sk);
2885 return;
2886 }
2887
2888
2889 tcp_enter_recovery(sk, (flag & FLAG_ECE));
2890 fast_rexmit = 1;
2891 }
2892
2893 if (do_lost)
2894 tcp_update_scoreboard(sk, fast_rexmit);
2895 tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit);
2896 tcp_xmit_retransmit_queue(sk);
2897}
2898
2899static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
2900 long seq_rtt_us, long sack_rtt_us)
2901{
2902 const struct tcp_sock *tp = tcp_sk(sk);
2903
2904
2905
2906
2907
2908
2909 if (flag & FLAG_RETRANS_DATA_ACKED)
2910 seq_rtt_us = -1L;
2911
2912 if (seq_rtt_us < 0)
2913 seq_rtt_us = sack_rtt_us;
2914
2915
2916
2917
2918
2919
2920
2921 if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2922 flag & FLAG_ACKED)
2923 seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - tp->rx_opt.rcv_tsecr);
2924
2925 if (seq_rtt_us < 0)
2926 return false;
2927
2928 tcp_rtt_estimator(sk, seq_rtt_us);
2929 tcp_set_rto(sk);
2930
2931
2932 inet_csk(sk)->icsk_backoff = 0;
2933 return true;
2934}
2935
2936
2937static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp)
2938{
2939 struct tcp_sock *tp = tcp_sk(sk);
2940 long seq_rtt_us = -1L;
2941
2942 if (synack_stamp && !tp->total_retrans)
2943 seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - synack_stamp);
2944
2945
2946
2947
2948 if (!tp->srtt_us)
2949 tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L);
2950}
2951
2952static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
2953{
2954 const struct inet_connection_sock *icsk = inet_csk(sk);
2955
2956 icsk->icsk_ca_ops->cong_avoid(sk, ack, acked);
2957 tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
2958}
2959
2960
2961
2962
2963void tcp_rearm_rto(struct sock *sk)
2964{
2965 const struct inet_connection_sock *icsk = inet_csk(sk);
2966 struct tcp_sock *tp = tcp_sk(sk);
2967
2968
2969
2970
2971 if (tp->fastopen_rsk)
2972 return;
2973
2974 if (!tp->packets_out) {
2975 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
2976 } else {
2977 u32 rto = inet_csk(sk)->icsk_rto;
2978
2979 if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2980 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2981 struct sk_buff *skb = tcp_write_queue_head(sk);
2982 const u32 rto_time_stamp =
2983 tcp_skb_timestamp(skb) + rto;
2984 s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);
2985
2986
2987
2988 if (delta > 0)
2989 rto = delta;
2990 }
2991 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
2992 TCP_RTO_MAX);
2993 }
2994}
2995
2996
2997
2998
2999void tcp_resume_early_retransmit(struct sock *sk)
3000{
3001 struct tcp_sock *tp = tcp_sk(sk);
3002
3003 tcp_rearm_rto(sk);
3004
3005
3006 if (!tp->do_early_retrans)
3007 return;
3008
3009 tcp_enter_recovery(sk, false);
3010 tcp_update_scoreboard(sk, 1);
3011 tcp_xmit_retransmit_queue(sk);
3012}
3013
3014
3015static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
3016{
3017 struct tcp_sock *tp = tcp_sk(sk);
3018 u32 packets_acked;
3019
3020 BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
3021
3022 packets_acked = tcp_skb_pcount(skb);
3023 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
3024 return 0;
3025 packets_acked -= tcp_skb_pcount(skb);
3026
3027 if (packets_acked) {
3028 BUG_ON(tcp_skb_pcount(skb) == 0);
3029 BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
3030 }
3031
3032 return packets_acked;
3033}
3034
3035static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
3036 u32 prior_snd_una)
3037{
3038 const struct skb_shared_info *shinfo;
3039
3040
3041 if (likely(!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)))
3042 return;
3043
3044 shinfo = skb_shinfo(skb);
3045 if ((shinfo->tx_flags & SKBTX_ACK_TSTAMP) &&
3046 between(shinfo->tskey, prior_snd_una, tcp_sk(sk)->snd_una - 1))
3047 __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
3048}
3049
3050
3051
3052
3053
3054static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3055 u32 prior_snd_una, long sack_rtt_us)
3056{
3057 const struct inet_connection_sock *icsk = inet_csk(sk);
3058 struct skb_mstamp first_ackt, last_ackt, now;
3059 struct tcp_sock *tp = tcp_sk(sk);
3060 u32 prior_sacked = tp->sacked_out;
3061 u32 reord = tp->packets_out;
3062 bool fully_acked = true;
3063 long ca_seq_rtt_us = -1L;
3064 long seq_rtt_us = -1L;
3065 struct sk_buff *skb;
3066 u32 pkts_acked = 0;
3067 bool rtt_update;
3068 int flag = 0;
3069
3070 first_ackt.v64 = 0;
3071
3072 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
3073 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3074 u8 sacked = scb->sacked;
3075 u32 acked_pcount;
3076
3077 tcp_ack_tstamp(sk, skb, prior_snd_una);
3078
3079
3080 if (after(scb->end_seq, tp->snd_una)) {
3081 if (tcp_skb_pcount(skb) == 1 ||
3082 !after(tp->snd_una, scb->seq))
3083 break;
3084
3085 acked_pcount = tcp_tso_acked(sk, skb);
3086 if (!acked_pcount)
3087 break;
3088
3089 fully_acked = false;
3090 } else {
3091
3092 prefetchw(skb->next);
3093 acked_pcount = tcp_skb_pcount(skb);
3094 }
3095
3096 if (unlikely(sacked & TCPCB_RETRANS)) {
3097 if (sacked & TCPCB_SACKED_RETRANS)
3098 tp->retrans_out -= acked_pcount;
3099 flag |= FLAG_RETRANS_DATA_ACKED;
3100 } else {
3101 last_ackt = skb->skb_mstamp;
3102 WARN_ON_ONCE(last_ackt.v64 == 0);
3103 if (!first_ackt.v64)
3104 first_ackt = last_ackt;
3105
3106 if (!(sacked & TCPCB_SACKED_ACKED))
3107 reord = min(pkts_acked, reord);
3108 if (!after(scb->end_seq, tp->high_seq))
3109 flag |= FLAG_ORIG_SACK_ACKED;
3110 }
3111
3112 if (sacked & TCPCB_SACKED_ACKED)
3113 tp->sacked_out -= acked_pcount;
3114 if (sacked & TCPCB_LOST)
3115 tp->lost_out -= acked_pcount;
3116
3117 tp->packets_out -= acked_pcount;
3118 pkts_acked += acked_pcount;
3119
3120
3121
3122
3123
3124
3125
3126
3127 if (likely(!(scb->tcp_flags & TCPHDR_SYN))) {
3128 flag |= FLAG_DATA_ACKED;
3129 } else {
3130 flag |= FLAG_SYN_ACKED;
3131 tp->retrans_stamp = 0;
3132 }
3133
3134 if (!fully_acked)
3135 break;
3136
3137 tcp_unlink_write_queue(skb, sk);
3138 sk_wmem_free_skb(sk, skb);
3139 if (unlikely(skb == tp->retransmit_skb_hint))
3140 tp->retransmit_skb_hint = NULL;
3141 if (unlikely(skb == tp->lost_skb_hint))
3142 tp->lost_skb_hint = NULL;
3143 }
3144
3145 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
3146 tp->snd_up = tp->snd_una;
3147
3148 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
3149 flag |= FLAG_SACK_RENEGING;
3150
3151 skb_mstamp_get(&now);
3152 if (likely(first_ackt.v64)) {
3153 seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
3154 ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
3155 }
3156
3157 rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us);
3158
3159 if (flag & FLAG_ACKED) {
3160 const struct tcp_congestion_ops *ca_ops
3161 = inet_csk(sk)->icsk_ca_ops;
3162
3163 tcp_rearm_rto(sk);
3164 if (unlikely(icsk->icsk_mtup.probe_size &&
3165 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
3166 tcp_mtup_probe_success(sk);
3167 }
3168
3169 if (tcp_is_reno(tp)) {
3170 tcp_remove_reno_sacks(sk, pkts_acked);
3171 } else {
3172 int delta;
3173
3174
3175 if (reord < prior_fackets)
3176 tcp_update_reordering(sk, tp->fackets_out - reord, 0);
3177
3178 delta = tcp_is_fack(tp) ? pkts_acked :
3179 prior_sacked - tp->sacked_out;
3180 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3181 }
3182
3183 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
3184
3185 if (ca_ops->pkts_acked)
3186 ca_ops->pkts_acked(sk, pkts_acked, ca_seq_rtt_us);
3187
3188 } else if (skb && rtt_update && sack_rtt_us >= 0 &&
3189 sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
3190
3191
3192
3193
3194 tcp_rearm_rto(sk);
3195 }
3196
3197#if FASTRETRANS_DEBUG > 0
3198 WARN_ON((int)tp->sacked_out < 0);
3199 WARN_ON((int)tp->lost_out < 0);
3200 WARN_ON((int)tp->retrans_out < 0);
3201 if (!tp->packets_out && tcp_is_sack(tp)) {
3202 icsk = inet_csk(sk);
3203 if (tp->lost_out) {
3204 pr_debug("Leak l=%u %d\n",
3205 tp->lost_out, icsk->icsk_ca_state);
3206 tp->lost_out = 0;
3207 }
3208 if (tp->sacked_out) {
3209 pr_debug("Leak s=%u %d\n",
3210 tp->sacked_out, icsk->icsk_ca_state);
3211 tp->sacked_out = 0;
3212 }
3213 if (tp->retrans_out) {
3214 pr_debug("Leak r=%u %d\n",
3215 tp->retrans_out, icsk->icsk_ca_state);
3216 tp->retrans_out = 0;
3217 }
3218 }
3219#endif
3220 return flag;
3221}
3222
3223static void tcp_ack_probe(struct sock *sk)
3224{
3225 const struct tcp_sock *tp = tcp_sk(sk);
3226 struct inet_connection_sock *icsk = inet_csk(sk);
3227
3228
3229
3230 if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
3231 icsk->icsk_backoff = 0;
3232 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
3233
3234
3235
3236 } else {
3237 unsigned long when = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
3238
3239 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3240 when, TCP_RTO_MAX);
3241 }
3242}
3243
3244static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
3245{
3246 return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3247 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3248}
3249
3250
3251static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3252{
3253 if (tcp_in_cwnd_reduction(sk))
3254 return false;
3255
3256
3257
3258
3259
3260
3261
3262 if (tcp_sk(sk)->reordering > sysctl_tcp_reordering)
3263 return flag & FLAG_FORWARD_PROGRESS;
3264
3265 return flag & FLAG_DATA_ACKED;
3266}
3267
3268
3269
3270
3271static inline bool tcp_may_update_window(const struct tcp_sock *tp,
3272 const u32 ack, const u32 ack_seq,
3273 const u32 nwin)
3274{
3275 return after(ack, tp->snd_una) ||
3276 after(ack_seq, tp->snd_wl1) ||
3277 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
3278}
3279
3280
3281
3282
3283
3284
3285static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
3286 u32 ack_seq)
3287{
3288 struct tcp_sock *tp = tcp_sk(sk);
3289 int flag = 0;
3290 u32 nwin = ntohs(tcp_hdr(skb)->window);
3291
3292 if (likely(!tcp_hdr(skb)->syn))
3293 nwin <<= tp->rx_opt.snd_wscale;
3294
3295 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
3296 flag |= FLAG_WIN_UPDATE;
3297 tcp_update_wl(tp, ack_seq);
3298
3299 if (tp->snd_wnd != nwin) {
3300 tp->snd_wnd = nwin;
3301
3302
3303
3304
3305 tp->pred_flags = 0;
3306 tcp_fast_path_check(sk);
3307
3308 if (nwin > tp->max_window) {
3309 tp->max_window = nwin;
3310 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
3311 }
3312 }
3313 }
3314
3315 tp->snd_una = ack;
3316
3317 return flag;
3318}
3319
3320
3321static void tcp_send_challenge_ack(struct sock *sk)
3322{
3323
3324 static u32 challenge_timestamp;
3325 static unsigned int challenge_count;
3326 u32 now = jiffies / HZ;
3327
3328 if (now != challenge_timestamp) {
3329 challenge_timestamp = now;
3330 challenge_count = 0;
3331 }
3332 if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
3333 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
3334 tcp_send_ack(sk);
3335 }
3336}
3337
3338static void tcp_store_ts_recent(struct tcp_sock *tp)
3339{
3340 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
3341 tp->rx_opt.ts_recent_stamp = get_seconds();
3342}
3343
3344static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3345{
3346 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
3347
3348
3349
3350
3351
3352
3353
3354 if (tcp_paws_check(&tp->rx_opt, 0))
3355 tcp_store_ts_recent(tp);
3356 }
3357}
3358
3359
3360
3361
3362static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
3363{
3364 struct tcp_sock *tp = tcp_sk(sk);
3365 bool is_tlp_dupack = (ack == tp->tlp_high_seq) &&
3366 !(flag & (FLAG_SND_UNA_ADVANCED |
3367 FLAG_NOT_DUP | FLAG_DATA_SACKED));
3368
3369
3370
3371
3372 if (is_tlp_dupack) {
3373 tp->tlp_high_seq = 0;
3374 return;
3375 }
3376
3377 if (after(ack, tp->tlp_high_seq)) {
3378 tp->tlp_high_seq = 0;
3379
3380 if (!(flag & FLAG_DSACKING_ACK)) {
3381 tcp_init_cwnd_reduction(sk);
3382 tcp_set_ca_state(sk, TCP_CA_CWR);
3383 tcp_end_cwnd_reduction(sk);
3384 tcp_try_keep_open(sk);
3385 NET_INC_STATS_BH(sock_net(sk),
3386 LINUX_MIB_TCPLOSSPROBERECOVERY);
3387 }
3388 }
3389}
3390
3391static inline void tcp_in_ack_event(struct sock *sk, u32 flags)
3392{
3393 const struct inet_connection_sock *icsk = inet_csk(sk);
3394
3395 if (icsk->icsk_ca_ops->in_ack_event)
3396 icsk->icsk_ca_ops->in_ack_event(sk, flags);
3397}
3398
3399
3400static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3401{
3402 struct inet_connection_sock *icsk = inet_csk(sk);
3403 struct tcp_sock *tp = tcp_sk(sk);
3404 u32 prior_snd_una = tp->snd_una;
3405 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3406 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3407 bool is_dupack = false;
3408 u32 prior_fackets;
3409 int prior_packets = tp->packets_out;
3410 const int prior_unsacked = tp->packets_out - tp->sacked_out;
3411 int acked = 0;
3412 long sack_rtt_us = -1L;
3413
3414
3415 prefetchw(sk->sk_write_queue.next);
3416
3417
3418
3419
3420 if (before(ack, prior_snd_una)) {
3421
3422 if (before(ack, prior_snd_una - tp->max_window)) {
3423 tcp_send_challenge_ack(sk);
3424 return -1;
3425 }
3426 goto old_ack;
3427 }
3428
3429
3430
3431
3432 if (after(ack, tp->snd_nxt))
3433 goto invalid_ack;
3434
3435 if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
3436 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
3437 tcp_rearm_rto(sk);
3438
3439 if (after(ack, prior_snd_una)) {
3440 flag |= FLAG_SND_UNA_ADVANCED;
3441 icsk->icsk_retransmits = 0;
3442 }
3443
3444 prior_fackets = tp->fackets_out;
3445
3446
3447
3448
3449 if (flag & FLAG_UPDATE_TS_RECENT)
3450 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
3451
3452 if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
3453
3454
3455
3456
3457 tcp_update_wl(tp, ack_seq);
3458 tp->snd_una = ack;
3459 flag |= FLAG_WIN_UPDATE;
3460
3461 tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
3462
3463 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS);
3464 } else {
3465 u32 ack_ev_flags = CA_ACK_SLOWPATH;
3466
3467 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3468 flag |= FLAG_DATA;
3469 else
3470 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS);
3471
3472 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3473
3474 if (TCP_SKB_CB(skb)->sacked)
3475 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3476 &sack_rtt_us);
3477
3478 if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
3479 flag |= FLAG_ECE;
3480 ack_ev_flags |= CA_ACK_ECE;
3481 }
3482
3483 if (flag & FLAG_WIN_UPDATE)
3484 ack_ev_flags |= CA_ACK_WIN_UPDATE;
3485
3486 tcp_in_ack_event(sk, ack_ev_flags);
3487 }
3488
3489
3490
3491
3492 sk->sk_err_soft = 0;
3493 icsk->icsk_probes_out = 0;
3494 tp->rcv_tstamp = tcp_time_stamp;
3495 if (!prior_packets)
3496 goto no_queue;
3497
3498
3499 acked = tp->packets_out;
3500 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
3501 sack_rtt_us);
3502 acked -= tp->packets_out;
3503
3504
3505 if (tcp_may_raise_cwnd(sk, flag))
3506 tcp_cong_avoid(sk, ack, acked);
3507
3508 if (tcp_ack_is_dubious(sk, flag)) {
3509 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3510 tcp_fastretrans_alert(sk, acked, prior_unsacked,
3511 is_dupack, flag);
3512 }
3513 if (tp->tlp_high_seq)
3514 tcp_process_tlp_ack(sk, ack, flag);
3515
3516 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
3517 struct dst_entry *dst = __sk_dst_get(sk);
3518 if (dst)
3519 dst_confirm(dst);
3520 }
3521
3522 if (icsk->icsk_pending == ICSK_TIME_RETRANS)
3523 tcp_schedule_loss_probe(sk);
3524 tcp_update_pacing_rate(sk);
3525 return 1;
3526
3527no_queue:
3528
3529 if (flag & FLAG_DSACKING_ACK)
3530 tcp_fastretrans_alert(sk, acked, prior_unsacked,
3531 is_dupack, flag);
3532
3533
3534
3535
3536 if (tcp_send_head(sk))
3537 tcp_ack_probe(sk);
3538
3539 if (tp->tlp_high_seq)
3540 tcp_process_tlp_ack(sk, ack, flag);
3541 return 1;
3542
3543invalid_ack:
3544 SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3545 return -1;
3546
3547old_ack:
3548
3549
3550
3551 if (TCP_SKB_CB(skb)->sacked) {
3552 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3553 &sack_rtt_us);
3554 tcp_fastretrans_alert(sk, acked, prior_unsacked,
3555 is_dupack, flag);
3556 }
3557
3558 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3559 return 0;
3560}
3561
3562
3563
3564
3565
3566void tcp_parse_options(const struct sk_buff *skb,
3567 struct tcp_options_received *opt_rx, int estab,
3568 struct tcp_fastopen_cookie *foc)
3569{
3570 const unsigned char *ptr;
3571 const struct tcphdr *th = tcp_hdr(skb);
3572 int length = (th->doff * 4) - sizeof(struct tcphdr);
3573
3574 ptr = (const unsigned char *)(th + 1);
3575 opt_rx->saw_tstamp = 0;
3576
3577 while (length > 0) {
3578 int opcode = *ptr++;
3579 int opsize;
3580
3581 switch (opcode) {
3582 case TCPOPT_EOL:
3583 return;
3584 case TCPOPT_NOP:
3585 length--;
3586 continue;
3587 default:
3588 opsize = *ptr++;
3589 if (opsize < 2)
3590 return;
3591 if (opsize > length)
3592 return;
3593 switch (opcode) {
3594 case TCPOPT_MSS:
3595 if (opsize == TCPOLEN_MSS && th->syn && !estab) {
3596 u16 in_mss = get_unaligned_be16(ptr);
3597 if (in_mss) {
3598 if (opt_rx->user_mss &&
3599 opt_rx->user_mss < in_mss)
3600 in_mss = opt_rx->user_mss;
3601 opt_rx->mss_clamp = in_mss;
3602 }
3603 }
3604 break;
3605 case TCPOPT_WINDOW:
3606 if (opsize == TCPOLEN_WINDOW && th->syn &&
3607 !estab && sysctl_tcp_window_scaling) {
3608 __u8 snd_wscale = *(__u8 *)ptr;
3609 opt_rx->wscale_ok = 1;
3610 if (snd_wscale > 14) {
3611 net_info_ratelimited("%s: Illegal window scaling value %d >14 received\n",
3612 __func__,
3613 snd_wscale);
3614 snd_wscale = 14;
3615 }
3616 opt_rx->snd_wscale = snd_wscale;
3617 }
3618 break;
3619 case TCPOPT_TIMESTAMP:
3620 if ((opsize == TCPOLEN_TIMESTAMP) &&
3621 ((estab && opt_rx->tstamp_ok) ||
3622 (!estab && sysctl_tcp_timestamps))) {
3623 opt_rx->saw_tstamp = 1;
3624 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
3625 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
3626 }
3627 break;
3628 case TCPOPT_SACK_PERM:
3629 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3630 !estab && sysctl_tcp_sack) {
3631 opt_rx->sack_ok = TCP_SACK_SEEN;
3632 tcp_sack_reset(opt_rx);
3633 }
3634 break;
3635
3636 case TCPOPT_SACK:
3637 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
3638 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
3639 opt_rx->sack_ok) {
3640 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
3641 }
3642 break;
3643#ifdef CONFIG_TCP_MD5SIG
3644 case TCPOPT_MD5SIG:
3645
3646
3647
3648
3649 break;
3650#endif
3651 case TCPOPT_EXP:
3652
3653
3654
3655
3656 if (opsize < TCPOLEN_EXP_FASTOPEN_BASE ||
3657 get_unaligned_be16(ptr) != TCPOPT_FASTOPEN_MAGIC ||
3658 foc == NULL || !th->syn || (opsize & 1))
3659 break;
3660 foc->len = opsize - TCPOLEN_EXP_FASTOPEN_BASE;
3661 if (foc->len >= TCP_FASTOPEN_COOKIE_MIN &&
3662 foc->len <= TCP_FASTOPEN_COOKIE_MAX)
3663 memcpy(foc->val, ptr + 2, foc->len);
3664 else if (foc->len != 0)
3665 foc->len = -1;
3666 break;
3667
3668 }
3669 ptr += opsize-2;
3670 length -= opsize;
3671 }
3672 }
3673}
3674EXPORT_SYMBOL(tcp_parse_options);
3675
3676static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
3677{
3678 const __be32 *ptr = (const __be32 *)(th + 1);
3679
3680 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
3681 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
3682 tp->rx_opt.saw_tstamp = 1;
3683 ++ptr;
3684 tp->rx_opt.rcv_tsval = ntohl(*ptr);
3685 ++ptr;
3686 if (*ptr)
3687 tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
3688 else
3689 tp->rx_opt.rcv_tsecr = 0;
3690 return true;
3691 }
3692 return false;
3693}
3694
3695
3696
3697
3698static bool tcp_fast_parse_options(const struct sk_buff *skb,
3699 const struct tcphdr *th, struct tcp_sock *tp)
3700{
3701
3702
3703
3704 if (th->doff == (sizeof(*th) / 4)) {
3705 tp->rx_opt.saw_tstamp = 0;
3706 return false;
3707 } else if (tp->rx_opt.tstamp_ok &&
3708 th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
3709 if (tcp_parse_aligned_timestamp(tp, th))
3710 return true;
3711 }
3712
3713 tcp_parse_options(skb, &tp->rx_opt, 1, NULL);
3714 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
3715 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
3716
3717 return true;
3718}
3719
3720#ifdef CONFIG_TCP_MD5SIG
3721
3722
3723
3724const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
3725{
3726 int length = (th->doff << 2) - sizeof(*th);
3727 const u8 *ptr = (const u8 *)(th + 1);
3728
3729
3730 if (length < TCPOLEN_MD5SIG)
3731 return NULL;
3732
3733 while (length > 0) {
3734 int opcode = *ptr++;
3735 int opsize;
3736
3737 switch (opcode) {
3738 case TCPOPT_EOL:
3739 return NULL;
3740 case TCPOPT_NOP:
3741 length--;
3742 continue;
3743 default:
3744 opsize = *ptr++;
3745 if (opsize < 2 || opsize > length)
3746 return NULL;
3747 if (opcode == TCPOPT_MD5SIG)
3748 return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
3749 }
3750 ptr += opsize - 2;
3751 length -= opsize;
3752 }
3753 return NULL;
3754}
3755EXPORT_SYMBOL(tcp_parse_md5sig_option);
3756#endif
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
3782{
3783 const struct tcp_sock *tp = tcp_sk(sk);
3784 const struct tcphdr *th = tcp_hdr(skb);
3785 u32 seq = TCP_SKB_CB(skb)->seq;
3786 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3787
3788 return (
3789 (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
3790
3791
3792 ack == tp->snd_una &&
3793
3794
3795 !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
3796
3797
3798 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
3799}
3800
3801static inline bool tcp_paws_discard(const struct sock *sk,
3802 const struct sk_buff *skb)
3803{
3804 const struct tcp_sock *tp = tcp_sk(sk);
3805
3806 return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
3807 !tcp_disordered_ack(sk, skb);
3808}
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
3824{
3825 return !before(end_seq, tp->rcv_wup) &&
3826 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
3827}
3828
3829
3830void tcp_reset(struct sock *sk)
3831{
3832
3833 switch (sk->sk_state) {
3834 case TCP_SYN_SENT:
3835 sk->sk_err = ECONNREFUSED;
3836 break;
3837 case TCP_CLOSE_WAIT:
3838 sk->sk_err = EPIPE;
3839 break;
3840 case TCP_CLOSE:
3841 return;
3842 default:
3843 sk->sk_err = ECONNRESET;
3844 }
3845
3846 smp_wmb();
3847
3848 if (!sock_flag(sk, SOCK_DEAD))
3849 sk->sk_error_report(sk);
3850
3851 tcp_done(sk);
3852}
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868static void tcp_fin(struct sock *sk)
3869{
3870 struct tcp_sock *tp = tcp_sk(sk);
3871 const struct dst_entry *dst;
3872
3873 inet_csk_schedule_ack(sk);
3874
3875 sk->sk_shutdown |= RCV_SHUTDOWN;
3876 sock_set_flag(sk, SOCK_DONE);
3877
3878 switch (sk->sk_state) {
3879 case TCP_SYN_RECV:
3880 case TCP_ESTABLISHED:
3881
3882 tcp_set_state(sk, TCP_CLOSE_WAIT);
3883 dst = __sk_dst_get(sk);
3884 if (!dst || !dst_metric(dst, RTAX_QUICKACK))
3885 inet_csk(sk)->icsk_ack.pingpong = 1;
3886 break;
3887
3888 case TCP_CLOSE_WAIT:
3889 case TCP_CLOSING:
3890
3891
3892
3893 break;
3894 case TCP_LAST_ACK:
3895
3896 break;
3897
3898 case TCP_FIN_WAIT1:
3899
3900
3901
3902
3903 tcp_send_ack(sk);
3904 tcp_set_state(sk, TCP_CLOSING);
3905 break;
3906 case TCP_FIN_WAIT2:
3907
3908 tcp_send_ack(sk);
3909 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
3910 break;
3911 default:
3912
3913
3914
3915 pr_err("%s: Impossible, sk->sk_state=%d\n",
3916 __func__, sk->sk_state);
3917 break;
3918 }
3919
3920
3921
3922
3923 __skb_queue_purge(&tp->out_of_order_queue);
3924 if (tcp_is_sack(tp))
3925 tcp_sack_reset(&tp->rx_opt);
3926 sk_mem_reclaim(sk);
3927
3928 if (!sock_flag(sk, SOCK_DEAD)) {
3929 sk->sk_state_change(sk);
3930
3931
3932 if (sk->sk_shutdown == SHUTDOWN_MASK ||
3933 sk->sk_state == TCP_CLOSE)
3934 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
3935 else
3936 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
3937 }
3938}
3939
3940static inline bool tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
3941 u32 end_seq)
3942{
3943 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
3944 if (before(seq, sp->start_seq))
3945 sp->start_seq = seq;
3946 if (after(end_seq, sp->end_seq))
3947 sp->end_seq = end_seq;
3948 return true;
3949 }
3950 return false;
3951}
3952
3953static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
3954{
3955 struct tcp_sock *tp = tcp_sk(sk);
3956
3957 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
3958 int mib_idx;
3959
3960 if (before(seq, tp->rcv_nxt))
3961 mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
3962 else
3963 mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
3964
3965 NET_INC_STATS_BH(sock_net(sk), mib_idx);
3966
3967 tp->rx_opt.dsack = 1;
3968 tp->duplicate_sack[0].start_seq = seq;
3969 tp->duplicate_sack[0].end_seq = end_seq;
3970 }
3971}
3972
3973static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
3974{
3975 struct tcp_sock *tp = tcp_sk(sk);
3976
3977 if (!tp->rx_opt.dsack)
3978 tcp_dsack_set(sk, seq, end_seq);
3979 else
3980 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
3981}
3982
3983static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
3984{
3985 struct tcp_sock *tp = tcp_sk(sk);
3986
3987 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
3988 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
3989 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
3990 tcp_enter_quickack_mode(sk);
3991
3992 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
3993 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
3994
3995 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
3996 end_seq = tp->rcv_nxt;
3997 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
3998 }
3999 }
4000
4001 tcp_send_ack(sk);
4002}
4003
4004
4005
4006
4007static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
4008{
4009 int this_sack;
4010 struct tcp_sack_block *sp = &tp->selective_acks[0];
4011 struct tcp_sack_block *swalk = sp + 1;
4012
4013
4014
4015
4016 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
4017 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
4018 int i;
4019
4020
4021
4022
4023 tp->rx_opt.num_sacks--;
4024 for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
4025 sp[i] = sp[i + 1];
4026 continue;
4027 }
4028 this_sack++, swalk++;
4029 }
4030}
4031
4032static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
4033{
4034 struct tcp_sock *tp = tcp_sk(sk);
4035 struct tcp_sack_block *sp = &tp->selective_acks[0];
4036 int cur_sacks = tp->rx_opt.num_sacks;
4037 int this_sack;
4038
4039 if (!cur_sacks)
4040 goto new_sack;
4041
4042 for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
4043 if (tcp_sack_extend(sp, seq, end_seq)) {
4044
4045 for (; this_sack > 0; this_sack--, sp--)
4046 swap(*sp, *(sp - 1));
4047 if (cur_sacks > 1)
4048 tcp_sack_maybe_coalesce(tp);
4049 return;
4050 }
4051 }
4052
4053
4054
4055
4056
4057
4058
4059 if (this_sack >= TCP_NUM_SACKS) {
4060 this_sack--;
4061 tp->rx_opt.num_sacks--;
4062 sp--;
4063 }
4064 for (; this_sack > 0; this_sack--, sp--)
4065 *sp = *(sp - 1);
4066
4067new_sack:
4068
4069 sp->start_seq = seq;
4070 sp->end_seq = end_seq;
4071 tp->rx_opt.num_sacks++;
4072}
4073
4074
4075
4076static void tcp_sack_remove(struct tcp_sock *tp)
4077{
4078 struct tcp_sack_block *sp = &tp->selective_acks[0];
4079 int num_sacks = tp->rx_opt.num_sacks;
4080 int this_sack;
4081
4082
4083 if (skb_queue_empty(&tp->out_of_order_queue)) {
4084 tp->rx_opt.num_sacks = 0;
4085 return;
4086 }
4087
4088 for (this_sack = 0; this_sack < num_sacks;) {
4089
4090 if (!before(tp->rcv_nxt, sp->start_seq)) {
4091 int i;
4092
4093
4094 WARN_ON(before(tp->rcv_nxt, sp->end_seq));
4095
4096
4097 for (i = this_sack+1; i < num_sacks; i++)
4098 tp->selective_acks[i-1] = tp->selective_acks[i];
4099 num_sacks--;
4100 continue;
4101 }
4102 this_sack++;
4103 sp++;
4104 }
4105 tp->rx_opt.num_sacks = num_sacks;
4106}
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121static bool tcp_try_coalesce(struct sock *sk,
4122 struct sk_buff *to,
4123 struct sk_buff *from,
4124 bool *fragstolen)
4125{
4126 int delta;
4127
4128 *fragstolen = false;
4129
4130
4131 if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
4132 return false;
4133
4134 if (!skb_try_coalesce(to, from, fragstolen, &delta))
4135 return false;
4136
4137 atomic_add(delta, &sk->sk_rmem_alloc);
4138 sk_mem_charge(sk, delta);
4139 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
4140 TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
4141 TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
4142 TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
4143 return true;
4144}
4145
4146
4147
4148
4149static void tcp_ofo_queue(struct sock *sk)
4150{
4151 struct tcp_sock *tp = tcp_sk(sk);
4152 __u32 dsack_high = tp->rcv_nxt;
4153 struct sk_buff *skb, *tail;
4154 bool fragstolen, eaten;
4155
4156 while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
4157 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
4158 break;
4159
4160 if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
4161 __u32 dsack = dsack_high;
4162 if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
4163 dsack_high = TCP_SKB_CB(skb)->end_seq;
4164 tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
4165 }
4166
4167 __skb_unlink(skb, &tp->out_of_order_queue);
4168 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4169 SOCK_DEBUG(sk, "ofo packet was already received\n");
4170 __kfree_skb(skb);
4171 continue;
4172 }
4173 SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
4174 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4175 TCP_SKB_CB(skb)->end_seq);
4176
4177 tail = skb_peek_tail(&sk->sk_receive_queue);
4178 eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
4179 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4180 if (!eaten)
4181 __skb_queue_tail(&sk->sk_receive_queue, skb);
4182 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
4183 tcp_fin(sk);
4184 if (eaten)
4185 kfree_skb_partial(skb, fragstolen);
4186 }
4187}
4188
4189static bool tcp_prune_ofo_queue(struct sock *sk);
4190static int tcp_prune_queue(struct sock *sk);
4191
4192static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
4193 unsigned int size)
4194{
4195 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
4196 !sk_rmem_schedule(sk, skb, size)) {
4197
4198 if (tcp_prune_queue(sk) < 0)
4199 return -1;
4200
4201 if (!sk_rmem_schedule(sk, skb, size)) {
4202 if (!tcp_prune_ofo_queue(sk))
4203 return -1;
4204
4205 if (!sk_rmem_schedule(sk, skb, size))
4206 return -1;
4207 }
4208 }
4209 return 0;
4210}
4211
4212static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4213{
4214 struct tcp_sock *tp = tcp_sk(sk);
4215 struct sk_buff *skb1;
4216 u32 seq, end_seq;
4217
4218 tcp_ecn_check_ce(tp, skb);
4219
4220 if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
4221 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP);
4222 __kfree_skb(skb);
4223 return;
4224 }
4225
4226
4227 tp->pred_flags = 0;
4228 inet_csk_schedule_ack(sk);
4229
4230 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
4231 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
4232 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4233
4234 skb1 = skb_peek_tail(&tp->out_of_order_queue);
4235 if (!skb1) {
4236
4237 if (tcp_is_sack(tp)) {
4238 tp->rx_opt.num_sacks = 1;
4239 tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
4240 tp->selective_acks[0].end_seq =
4241 TCP_SKB_CB(skb)->end_seq;
4242 }
4243 __skb_queue_head(&tp->out_of_order_queue, skb);
4244 goto end;
4245 }
4246
4247 seq = TCP_SKB_CB(skb)->seq;
4248 end_seq = TCP_SKB_CB(skb)->end_seq;
4249
4250 if (seq == TCP_SKB_CB(skb1)->end_seq) {
4251 bool fragstolen;
4252
4253 if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
4254 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4255 } else {
4256 tcp_grow_window(sk, skb);
4257 kfree_skb_partial(skb, fragstolen);
4258 skb = NULL;
4259 }
4260
4261 if (!tp->rx_opt.num_sacks ||
4262 tp->selective_acks[0].end_seq != seq)
4263 goto add_sack;
4264
4265
4266 tp->selective_acks[0].end_seq = end_seq;
4267 goto end;
4268 }
4269
4270
4271 while (1) {
4272 if (!after(TCP_SKB_CB(skb1)->seq, seq))
4273 break;
4274 if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
4275 skb1 = NULL;
4276 break;
4277 }
4278 skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
4279 }
4280
4281
4282 if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4283 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4284
4285 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
4286 __kfree_skb(skb);
4287 skb = NULL;
4288 tcp_dsack_set(sk, seq, end_seq);
4289 goto add_sack;
4290 }
4291 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4292
4293 tcp_dsack_set(sk, seq,
4294 TCP_SKB_CB(skb1)->end_seq);
4295 } else {
4296 if (skb_queue_is_first(&tp->out_of_order_queue,
4297 skb1))
4298 skb1 = NULL;
4299 else
4300 skb1 = skb_queue_prev(
4301 &tp->out_of_order_queue,
4302 skb1);
4303 }
4304 }
4305 if (!skb1)
4306 __skb_queue_head(&tp->out_of_order_queue, skb);
4307 else
4308 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4309
4310
4311 while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
4312 skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
4313
4314 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4315 break;
4316 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4317 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4318 end_seq);
4319 break;
4320 }
4321 __skb_unlink(skb1, &tp->out_of_order_queue);
4322 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4323 TCP_SKB_CB(skb1)->end_seq);
4324 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
4325 __kfree_skb(skb1);
4326 }
4327
4328add_sack:
4329 if (tcp_is_sack(tp))
4330 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4331end:
4332 if (skb) {
4333 tcp_grow_window(sk, skb);
4334 skb_set_owner_r(skb, sk);
4335 }
4336}
4337
4338static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
4339 bool *fragstolen)
4340{
4341 int eaten;
4342 struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
4343
4344 __skb_pull(skb, hdrlen);
4345 eaten = (tail &&
4346 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
4347 tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4348 if (!eaten) {
4349 __skb_queue_tail(&sk->sk_receive_queue, skb);
4350 skb_set_owner_r(skb, sk);
4351 }
4352 return eaten;
4353}
4354
4355int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
4356{
4357 struct sk_buff *skb;
4358 bool fragstolen;
4359
4360 if (size == 0)
4361 return 0;
4362
4363 skb = alloc_skb(size, sk->sk_allocation);
4364 if (!skb)
4365 goto err;
4366
4367 if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
4368 goto err_free;
4369
4370 if (memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size))
4371 goto err_free;
4372
4373 TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
4374 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
4375 TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
4376
4377 if (tcp_queue_rcv(sk, skb, 0, &fragstolen)) {
4378 WARN_ON_ONCE(fragstolen);
4379 __kfree_skb(skb);
4380 }
4381 return size;
4382
4383err_free:
4384 kfree_skb(skb);
4385err:
4386 return -ENOMEM;
4387}
4388
4389static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4390{
4391 struct tcp_sock *tp = tcp_sk(sk);
4392 int eaten = -1;
4393 bool fragstolen = false;
4394
4395 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
4396 goto drop;
4397
4398 skb_dst_drop(skb);
4399 __skb_pull(skb, tcp_hdr(skb)->doff * 4);
4400
4401 tcp_ecn_accept_cwr(tp, skb);
4402
4403 tp->rx_opt.dsack = 0;
4404
4405
4406
4407
4408
4409 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
4410 if (tcp_receive_window(tp) == 0)
4411 goto out_of_window;
4412
4413
4414 if (tp->ucopy.task == current &&
4415 tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
4416 sock_owned_by_user(sk) && !tp->urg_data) {
4417 int chunk = min_t(unsigned int, skb->len,
4418 tp->ucopy.len);
4419
4420 __set_current_state(TASK_RUNNING);
4421
4422 local_bh_enable();
4423 if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) {
4424 tp->ucopy.len -= chunk;
4425 tp->copied_seq += chunk;
4426 eaten = (chunk == skb->len);
4427 tcp_rcv_space_adjust(sk);
4428 }
4429 local_bh_disable();
4430 }
4431
4432 if (eaten <= 0) {
4433queue_and_out:
4434 if (eaten < 0 &&
4435 tcp_try_rmem_schedule(sk, skb, skb->truesize))
4436 goto drop;
4437
4438 eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
4439 }
4440 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4441 if (skb->len)
4442 tcp_event_data_recv(sk, skb);
4443 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
4444 tcp_fin(sk);
4445
4446 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4447 tcp_ofo_queue(sk);
4448
4449
4450
4451
4452 if (skb_queue_empty(&tp->out_of_order_queue))
4453 inet_csk(sk)->icsk_ack.pingpong = 0;
4454 }
4455
4456 if (tp->rx_opt.num_sacks)
4457 tcp_sack_remove(tp);
4458
4459 tcp_fast_path_check(sk);
4460
4461 if (eaten > 0)
4462 kfree_skb_partial(skb, fragstolen);
4463 if (!sock_flag(sk, SOCK_DEAD))
4464 sk->sk_data_ready(sk);
4465 return;
4466 }
4467
4468 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4469
4470 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4471 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4472
4473out_of_window:
4474 tcp_enter_quickack_mode(sk);
4475 inet_csk_schedule_ack(sk);
4476drop:
4477 __kfree_skb(skb);
4478 return;
4479 }
4480
4481
4482 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
4483 goto out_of_window;
4484
4485 tcp_enter_quickack_mode(sk);
4486
4487 if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4488
4489 SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
4490 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4491 TCP_SKB_CB(skb)->end_seq);
4492
4493 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
4494
4495
4496
4497
4498 if (!tcp_receive_window(tp))
4499 goto out_of_window;
4500 goto queue_and_out;
4501 }
4502
4503 tcp_data_queue_ofo(sk, skb);
4504}
4505
4506static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
4507 struct sk_buff_head *list)
4508{
4509 struct sk_buff *next = NULL;
4510
4511 if (!skb_queue_is_last(list, skb))
4512 next = skb_queue_next(list, skb);
4513
4514 __skb_unlink(skb, list);
4515 __kfree_skb(skb);
4516 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
4517
4518 return next;
4519}
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529static void
4530tcp_collapse(struct sock *sk, struct sk_buff_head *list,
4531 struct sk_buff *head, struct sk_buff *tail,
4532 u32 start, u32 end)
4533{
4534 struct sk_buff *skb, *n;
4535 bool end_of_skbs;
4536
4537
4538
4539 skb = head;
4540restart:
4541 end_of_skbs = true;
4542 skb_queue_walk_from_safe(list, skb, n) {
4543 if (skb == tail)
4544 break;
4545
4546 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4547 skb = tcp_collapse_one(sk, skb, list);
4548 if (!skb)
4549 break;
4550 goto restart;
4551 }
4552
4553
4554
4555
4556
4557
4558 if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
4559 (tcp_win_from_space(skb->truesize) > skb->len ||
4560 before(TCP_SKB_CB(skb)->seq, start))) {
4561 end_of_skbs = false;
4562 break;
4563 }
4564
4565 if (!skb_queue_is_last(list, skb)) {
4566 struct sk_buff *next = skb_queue_next(list, skb);
4567 if (next != tail &&
4568 TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) {
4569 end_of_skbs = false;
4570 break;
4571 }
4572 }
4573
4574
4575 start = TCP_SKB_CB(skb)->end_seq;
4576 }
4577 if (end_of_skbs ||
4578 (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
4579 return;
4580
4581 while (before(start, end)) {
4582 int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start);
4583 struct sk_buff *nskb;
4584
4585 nskb = alloc_skb(copy, GFP_ATOMIC);
4586 if (!nskb)
4587 return;
4588
4589 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
4590 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
4591 __skb_queue_before(list, skb, nskb);
4592 skb_set_owner_r(nskb, sk);
4593
4594
4595 while (copy > 0) {
4596 int offset = start - TCP_SKB_CB(skb)->seq;
4597 int size = TCP_SKB_CB(skb)->end_seq - start;
4598
4599 BUG_ON(offset < 0);
4600 if (size > 0) {
4601 size = min(copy, size);
4602 if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
4603 BUG();
4604 TCP_SKB_CB(nskb)->end_seq += size;
4605 copy -= size;
4606 start += size;
4607 }
4608 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4609 skb = tcp_collapse_one(sk, skb, list);
4610 if (!skb ||
4611 skb == tail ||
4612 (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
4613 return;
4614 }
4615 }
4616 }
4617}
4618
4619
4620
4621
4622static void tcp_collapse_ofo_queue(struct sock *sk)
4623{
4624 struct tcp_sock *tp = tcp_sk(sk);
4625 struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
4626 struct sk_buff *head;
4627 u32 start, end;
4628
4629 if (skb == NULL)
4630 return;
4631
4632 start = TCP_SKB_CB(skb)->seq;
4633 end = TCP_SKB_CB(skb)->end_seq;
4634 head = skb;
4635
4636 for (;;) {
4637 struct sk_buff *next = NULL;
4638
4639 if (!skb_queue_is_last(&tp->out_of_order_queue, skb))
4640 next = skb_queue_next(&tp->out_of_order_queue, skb);
4641 skb = next;
4642
4643
4644
4645 if (!skb ||
4646 after(TCP_SKB_CB(skb)->seq, end) ||
4647 before(TCP_SKB_CB(skb)->end_seq, start)) {
4648 tcp_collapse(sk, &tp->out_of_order_queue,
4649 head, skb, start, end);
4650 head = skb;
4651 if (!skb)
4652 break;
4653
4654 start = TCP_SKB_CB(skb)->seq;
4655 end = TCP_SKB_CB(skb)->end_seq;
4656 } else {
4657 if (before(TCP_SKB_CB(skb)->seq, start))
4658 start = TCP_SKB_CB(skb)->seq;
4659 if (after(TCP_SKB_CB(skb)->end_seq, end))
4660 end = TCP_SKB_CB(skb)->end_seq;
4661 }
4662 }
4663}
4664
4665
4666
4667
4668
4669static bool tcp_prune_ofo_queue(struct sock *sk)
4670{
4671 struct tcp_sock *tp = tcp_sk(sk);
4672 bool res = false;
4673
4674 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4675 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED);
4676 __skb_queue_purge(&tp->out_of_order_queue);
4677
4678
4679
4680
4681
4682
4683 if (tp->rx_opt.sack_ok)
4684 tcp_sack_reset(&tp->rx_opt);
4685 sk_mem_reclaim(sk);
4686 res = true;
4687 }
4688 return res;
4689}
4690
4691
4692
4693
4694
4695
4696
4697
4698static int tcp_prune_queue(struct sock *sk)
4699{
4700 struct tcp_sock *tp = tcp_sk(sk);
4701
4702 SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
4703
4704 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PRUNECALLED);
4705
4706 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
4707 tcp_clamp_window(sk);
4708 else if (sk_under_memory_pressure(sk))
4709 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
4710
4711 tcp_collapse_ofo_queue(sk);
4712 if (!skb_queue_empty(&sk->sk_receive_queue))
4713 tcp_collapse(sk, &sk->sk_receive_queue,
4714 skb_peek(&sk->sk_receive_queue),
4715 NULL,
4716 tp->copied_seq, tp->rcv_nxt);
4717 sk_mem_reclaim(sk);
4718
4719 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4720 return 0;
4721
4722
4723
4724
4725 tcp_prune_ofo_queue(sk);
4726
4727 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4728 return 0;
4729
4730
4731
4732
4733
4734 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_RCVPRUNED);
4735
4736
4737 tp->pred_flags = 0;
4738 return -1;
4739}
4740
4741static bool tcp_should_expand_sndbuf(const struct sock *sk)
4742{
4743 const struct tcp_sock *tp = tcp_sk(sk);
4744
4745
4746
4747
4748 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
4749 return false;
4750
4751
4752 if (sk_under_memory_pressure(sk))
4753 return false;
4754
4755
4756 if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
4757 return false;
4758
4759
4760 if (tp->packets_out >= tp->snd_cwnd)
4761 return false;
4762
4763 return true;
4764}
4765
4766
4767
4768
4769
4770
4771
4772static void tcp_new_space(struct sock *sk)
4773{
4774 struct tcp_sock *tp = tcp_sk(sk);
4775
4776 if (tcp_should_expand_sndbuf(sk)) {
4777 tcp_sndbuf_expand(sk);
4778 tp->snd_cwnd_stamp = tcp_time_stamp;
4779 }
4780
4781 sk->sk_write_space(sk);
4782}
4783
4784static void tcp_check_space(struct sock *sk)
4785{
4786 if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
4787 sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
4788 if (sk->sk_socket &&
4789 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
4790 tcp_new_space(sk);
4791 }
4792}
4793
4794static inline void tcp_data_snd_check(struct sock *sk)
4795{
4796 tcp_push_pending_frames(sk);
4797 tcp_check_space(sk);
4798}
4799
4800
4801
4802
4803static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
4804{
4805 struct tcp_sock *tp = tcp_sk(sk);
4806
4807
4808 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
4809
4810
4811
4812 __tcp_select_window(sk) >= tp->rcv_wnd) ||
4813
4814 tcp_in_quickack_mode(sk) ||
4815
4816 (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
4817
4818 tcp_send_ack(sk);
4819 } else {
4820
4821 tcp_send_delayed_ack(sk);
4822 }
4823}
4824
4825static inline void tcp_ack_snd_check(struct sock *sk)
4826{
4827 if (!inet_csk_ack_scheduled(sk)) {
4828
4829 return;
4830 }
4831 __tcp_ack_snd_check(sk, 1);
4832}
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
4845{
4846 struct tcp_sock *tp = tcp_sk(sk);
4847 u32 ptr = ntohs(th->urg_ptr);
4848
4849 if (ptr && !sysctl_tcp_stdurg)
4850 ptr--;
4851 ptr += ntohl(th->seq);
4852
4853
4854 if (after(tp->copied_seq, ptr))
4855 return;
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867 if (before(ptr, tp->rcv_nxt))
4868 return;
4869
4870
4871 if (tp->urg_data && !after(ptr, tp->urg_seq))
4872 return;
4873
4874
4875 sk_send_sigurg(sk);
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
4893 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
4894 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
4895 tp->copied_seq++;
4896 if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
4897 __skb_unlink(skb, &sk->sk_receive_queue);
4898 __kfree_skb(skb);
4899 }
4900 }
4901
4902 tp->urg_data = TCP_URG_NOTYET;
4903 tp->urg_seq = ptr;
4904
4905
4906 tp->pred_flags = 0;
4907}
4908
4909
4910static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
4911{
4912 struct tcp_sock *tp = tcp_sk(sk);
4913
4914
4915 if (th->urg)
4916 tcp_check_urg(sk, th);
4917
4918
4919 if (tp->urg_data == TCP_URG_NOTYET) {
4920 u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
4921 th->syn;
4922
4923
4924 if (ptr < skb->len) {
4925 u8 tmp;
4926 if (skb_copy_bits(skb, ptr, &tmp, 1))
4927 BUG();
4928 tp->urg_data = TCP_URG_VALID | tmp;
4929 if (!sock_flag(sk, SOCK_DEAD))
4930 sk->sk_data_ready(sk);
4931 }
4932 }
4933}
4934
4935static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
4936{
4937 struct tcp_sock *tp = tcp_sk(sk);
4938 int chunk = skb->len - hlen;
4939 int err;
4940
4941 local_bh_enable();
4942 if (skb_csum_unnecessary(skb))
4943 err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk);
4944 else
4945 err = skb_copy_and_csum_datagram_iovec(skb, hlen,
4946 tp->ucopy.iov);
4947
4948 if (!err) {
4949 tp->ucopy.len -= chunk;
4950 tp->copied_seq += chunk;
4951 tcp_rcv_space_adjust(sk);
4952 }
4953
4954 local_bh_disable();
4955 return err;
4956}
4957
4958static __sum16 __tcp_checksum_complete_user(struct sock *sk,
4959 struct sk_buff *skb)
4960{
4961 __sum16 result;
4962
4963 if (sock_owned_by_user(sk)) {
4964 local_bh_enable();
4965 result = __tcp_checksum_complete(skb);
4966 local_bh_disable();
4967 } else {
4968 result = __tcp_checksum_complete(skb);
4969 }
4970 return result;
4971}
4972
4973static inline bool tcp_checksum_complete_user(struct sock *sk,
4974 struct sk_buff *skb)
4975{
4976 return !skb_csum_unnecessary(skb) &&
4977 __tcp_checksum_complete_user(sk, skb);
4978}
4979
4980
4981
4982
4983static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
4984 const struct tcphdr *th, int syn_inerr)
4985{
4986 struct tcp_sock *tp = tcp_sk(sk);
4987
4988
4989 if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
4990 tcp_paws_discard(sk, skb)) {
4991 if (!th->rst) {
4992 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
4993 tcp_send_dupack(sk, skb);
4994 goto discard;
4995 }
4996
4997 }
4998
4999
5000 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
5001
5002
5003
5004
5005
5006
5007 if (!th->rst) {
5008 if (th->syn)
5009 goto syn_challenge;
5010 tcp_send_dupack(sk, skb);
5011 }
5012 goto discard;
5013 }
5014
5015
5016 if (th->rst) {
5017
5018
5019
5020
5021
5022
5023 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt)
5024 tcp_reset(sk);
5025 else
5026 tcp_send_challenge_ack(sk);
5027 goto discard;
5028 }
5029
5030
5031
5032
5033
5034
5035 if (th->syn) {
5036syn_challenge:
5037 if (syn_inerr)
5038 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5039 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
5040 tcp_send_challenge_ack(sk);
5041 goto discard;
5042 }
5043
5044 return true;
5045
5046discard:
5047 __kfree_skb(skb);
5048 return false;
5049}
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5075 const struct tcphdr *th, unsigned int len)
5076{
5077 struct tcp_sock *tp = tcp_sk(sk);
5078
5079 if (unlikely(sk->sk_rx_dst == NULL))
5080 inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096 tp->rx_opt.saw_tstamp = 0;
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
5108 TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
5109 !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
5110 int tcp_header_len = tp->tcp_header_len;
5111
5112
5113
5114
5115
5116
5117
5118 if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
5119
5120 if (!tcp_parse_aligned_timestamp(tp, th))
5121 goto slow_path;
5122
5123
5124 if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
5125 goto slow_path;
5126
5127
5128
5129
5130
5131
5132 }
5133
5134 if (len <= tcp_header_len) {
5135
5136 if (len == tcp_header_len) {
5137
5138
5139
5140
5141 if (tcp_header_len ==
5142 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5143 tp->rcv_nxt == tp->rcv_wup)
5144 tcp_store_ts_recent(tp);
5145
5146
5147
5148
5149 tcp_ack(sk, skb, 0);
5150 __kfree_skb(skb);
5151 tcp_data_snd_check(sk);
5152 return;
5153 } else {
5154 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5155 goto discard;
5156 }
5157 } else {
5158 int eaten = 0;
5159 bool fragstolen = false;
5160
5161 if (tp->ucopy.task == current &&
5162 tp->copied_seq == tp->rcv_nxt &&
5163 len - tcp_header_len <= tp->ucopy.len &&
5164 sock_owned_by_user(sk)) {
5165 __set_current_state(TASK_RUNNING);
5166
5167 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) {
5168
5169
5170
5171
5172 if (tcp_header_len ==
5173 (sizeof(struct tcphdr) +
5174 TCPOLEN_TSTAMP_ALIGNED) &&
5175 tp->rcv_nxt == tp->rcv_wup)
5176 tcp_store_ts_recent(tp);
5177
5178 tcp_rcv_rtt_measure_ts(sk, skb);
5179
5180 __skb_pull(skb, tcp_header_len);
5181 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
5182 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
5183 eaten = 1;
5184 }
5185 }
5186 if (!eaten) {
5187 if (tcp_checksum_complete_user(sk, skb))
5188 goto csum_error;
5189
5190 if ((int)skb->truesize > sk->sk_forward_alloc)
5191 goto step5;
5192
5193
5194
5195
5196
5197 if (tcp_header_len ==
5198 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5199 tp->rcv_nxt == tp->rcv_wup)
5200 tcp_store_ts_recent(tp);
5201
5202 tcp_rcv_rtt_measure_ts(sk, skb);
5203
5204 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
5205
5206
5207 eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
5208 &fragstolen);
5209 }
5210
5211 tcp_event_data_recv(sk, skb);
5212
5213 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
5214
5215 tcp_ack(sk, skb, FLAG_DATA);
5216 tcp_data_snd_check(sk);
5217 if (!inet_csk_ack_scheduled(sk))
5218 goto no_ack;
5219 }
5220
5221 __tcp_ack_snd_check(sk, 0);
5222no_ack:
5223 if (eaten)
5224 kfree_skb_partial(skb, fragstolen);
5225 sk->sk_data_ready(sk);
5226 return;
5227 }
5228 }
5229
5230slow_path:
5231 if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
5232 goto csum_error;
5233
5234 if (!th->ack && !th->rst && !th->syn)
5235 goto discard;
5236
5237
5238
5239
5240
5241 if (!tcp_validate_incoming(sk, skb, th, 1))
5242 return;
5243
5244step5:
5245 if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
5246 goto discard;
5247
5248 tcp_rcv_rtt_measure_ts(sk, skb);
5249
5250
5251 tcp_urg(sk, skb, th);
5252
5253
5254 tcp_data_queue(sk, skb);
5255
5256 tcp_data_snd_check(sk);
5257 tcp_ack_snd_check(sk);
5258 return;
5259
5260csum_error:
5261 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
5262 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5263
5264discard:
5265 __kfree_skb(skb);
5266}
5267EXPORT_SYMBOL(tcp_rcv_established);
5268
5269void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
5270{
5271 struct tcp_sock *tp = tcp_sk(sk);
5272 struct inet_connection_sock *icsk = inet_csk(sk);
5273
5274 tcp_set_state(sk, TCP_ESTABLISHED);
5275
5276 if (skb != NULL) {
5277 icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
5278 security_inet_conn_established(sk, skb);
5279 }
5280
5281
5282 icsk->icsk_af_ops->rebuild_header(sk);
5283
5284 tcp_init_metrics(sk);
5285
5286 tcp_init_congestion_control(sk);
5287
5288
5289
5290
5291 tp->lsndtime = tcp_time_stamp;
5292
5293 tcp_init_buffer_space(sk);
5294
5295 if (sock_flag(sk, SOCK_KEEPOPEN))
5296 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
5297
5298 if (!tp->rx_opt.snd_wscale)
5299 __tcp_fast_path_on(tp, tp->snd_wnd);
5300 else
5301 tp->pred_flags = 0;
5302
5303 if (!sock_flag(sk, SOCK_DEAD)) {
5304 sk->sk_state_change(sk);
5305 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
5306 }
5307}
5308
5309static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5310 struct tcp_fastopen_cookie *cookie)
5311{
5312 struct tcp_sock *tp = tcp_sk(sk);
5313 struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL;
5314 u16 mss = tp->rx_opt.mss_clamp;
5315 bool syn_drop;
5316
5317 if (mss == tp->rx_opt.user_mss) {
5318 struct tcp_options_received opt;
5319
5320
5321 tcp_clear_options(&opt);
5322 opt.user_mss = opt.mss_clamp = 0;
5323 tcp_parse_options(synack, &opt, 0, NULL);
5324 mss = opt.mss_clamp;
5325 }
5326
5327 if (!tp->syn_fastopen)
5328 cookie->len = -1;
5329
5330
5331
5332
5333
5334 syn_drop = (cookie->len <= 0 && data && tp->total_retrans);
5335
5336 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop);
5337
5338 if (data) {
5339 tcp_for_write_queue_from(data, sk) {
5340 if (data == tcp_send_head(sk) ||
5341 __tcp_retransmit_skb(sk, data))
5342 break;
5343 }
5344 tcp_rearm_rto(sk);
5345 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL);
5346 return true;
5347 }
5348 tp->syn_data_acked = tp->syn_data;
5349 if (tp->syn_data_acked)
5350 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
5351 return false;
5352}
5353
5354static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5355 const struct tcphdr *th, unsigned int len)
5356{
5357 struct inet_connection_sock *icsk = inet_csk(sk);
5358 struct tcp_sock *tp = tcp_sk(sk);
5359 struct tcp_fastopen_cookie foc = { .len = -1 };
5360 int saved_clamp = tp->rx_opt.mss_clamp;
5361
5362 tcp_parse_options(skb, &tp->rx_opt, 0, &foc);
5363 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
5364 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
5365
5366 if (th->ack) {
5367
5368
5369
5370
5371
5372
5373
5374
5375 if (!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_una) ||
5376 after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt))
5377 goto reset_and_undo;
5378
5379 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
5380 !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
5381 tcp_time_stamp)) {
5382 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED);
5383 goto reset_and_undo;
5384 }
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394 if (th->rst) {
5395 tcp_reset(sk);
5396 goto discard;
5397 }
5398
5399
5400
5401
5402
5403
5404
5405
5406 if (!th->syn)
5407 goto discard_and_undo;
5408
5409
5410
5411
5412
5413
5414
5415
5416 tcp_ecn_rcv_synack(tp, th);
5417
5418 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5419 tcp_ack(sk, skb, FLAG_SLOWPATH);
5420
5421
5422
5423
5424 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5425 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5426
5427
5428
5429
5430 tp->snd_wnd = ntohs(th->window);
5431
5432 if (!tp->rx_opt.wscale_ok) {
5433 tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
5434 tp->window_clamp = min(tp->window_clamp, 65535U);
5435 }
5436
5437 if (tp->rx_opt.saw_tstamp) {
5438 tp->rx_opt.tstamp_ok = 1;
5439 tp->tcp_header_len =
5440 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5441 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5442 tcp_store_ts_recent(tp);
5443 } else {
5444 tp->tcp_header_len = sizeof(struct tcphdr);
5445 }
5446
5447 if (tcp_is_sack(tp) && sysctl_tcp_fack)
5448 tcp_enable_fack(tp);
5449
5450 tcp_mtup_init(sk);
5451 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5452 tcp_initialize_rcv_mss(sk);
5453
5454
5455
5456
5457 tp->copied_seq = tp->rcv_nxt;
5458
5459 smp_mb();
5460
5461 tcp_finish_connect(sk, skb);
5462
5463 if ((tp->syn_fastopen || tp->syn_data) &&
5464 tcp_rcv_fastopen_synack(sk, skb, &foc))
5465 return -1;
5466
5467 if (sk->sk_write_pending ||
5468 icsk->icsk_accept_queue.rskq_defer_accept ||
5469 icsk->icsk_ack.pingpong) {
5470
5471
5472
5473
5474
5475
5476
5477 inet_csk_schedule_ack(sk);
5478 icsk->icsk_ack.lrcvtime = tcp_time_stamp;
5479 tcp_enter_quickack_mode(sk);
5480 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
5481 TCP_DELACK_MAX, TCP_RTO_MAX);
5482
5483discard:
5484 __kfree_skb(skb);
5485 return 0;
5486 } else {
5487 tcp_send_ack(sk);
5488 }
5489 return -1;
5490 }
5491
5492
5493
5494 if (th->rst) {
5495
5496
5497
5498
5499
5500
5501 goto discard_and_undo;
5502 }
5503
5504
5505 if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
5506 tcp_paws_reject(&tp->rx_opt, 0))
5507 goto discard_and_undo;
5508
5509 if (th->syn) {
5510
5511
5512
5513
5514 tcp_set_state(sk, TCP_SYN_RECV);
5515
5516 if (tp->rx_opt.saw_tstamp) {
5517 tp->rx_opt.tstamp_ok = 1;
5518 tcp_store_ts_recent(tp);
5519 tp->tcp_header_len =
5520 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5521 } else {
5522 tp->tcp_header_len = sizeof(struct tcphdr);
5523 }
5524
5525 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5526 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5527
5528
5529
5530
5531 tp->snd_wnd = ntohs(th->window);
5532 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
5533 tp->max_window = tp->snd_wnd;
5534
5535 tcp_ecn_rcv_syn(tp, th);
5536
5537 tcp_mtup_init(sk);
5538 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5539 tcp_initialize_rcv_mss(sk);
5540
5541 tcp_send_synack(sk);
5542#if 0
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554 return -1;
5555#else
5556 goto discard;
5557#endif
5558 }
5559
5560
5561
5562
5563discard_and_undo:
5564 tcp_clear_options(&tp->rx_opt);
5565 tp->rx_opt.mss_clamp = saved_clamp;
5566 goto discard;
5567
5568reset_and_undo:
5569 tcp_clear_options(&tp->rx_opt);
5570 tp->rx_opt.mss_clamp = saved_clamp;
5571 return 1;
5572}
5573
5574
5575
5576
5577
5578
5579
5580
5581int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5582 const struct tcphdr *th, unsigned int len)
5583{
5584 struct tcp_sock *tp = tcp_sk(sk);
5585 struct inet_connection_sock *icsk = inet_csk(sk);
5586 struct request_sock *req;
5587 int queued = 0;
5588 bool acceptable;
5589 u32 synack_stamp;
5590
5591 tp->rx_opt.saw_tstamp = 0;
5592
5593 switch (sk->sk_state) {
5594 case TCP_CLOSE:
5595 goto discard;
5596
5597 case TCP_LISTEN:
5598 if (th->ack)
5599 return 1;
5600
5601 if (th->rst)
5602 goto discard;
5603
5604 if (th->syn) {
5605 if (th->fin)
5606 goto discard;
5607 if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
5608 return 1;
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627 kfree_skb(skb);
5628 return 0;
5629 }
5630 goto discard;
5631
5632 case TCP_SYN_SENT:
5633 queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
5634 if (queued >= 0)
5635 return queued;
5636
5637
5638 tcp_urg(sk, skb, th);
5639 __kfree_skb(skb);
5640 tcp_data_snd_check(sk);
5641 return 0;
5642 }
5643
5644 req = tp->fastopen_rsk;
5645 if (req != NULL) {
5646 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
5647 sk->sk_state != TCP_FIN_WAIT1);
5648
5649 if (tcp_check_req(sk, skb, req, NULL, true) == NULL)
5650 goto discard;
5651 }
5652
5653 if (!th->ack && !th->rst && !th->syn)
5654 goto discard;
5655
5656 if (!tcp_validate_incoming(sk, skb, th, 0))
5657 return 0;
5658
5659
5660 acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
5661 FLAG_UPDATE_TS_RECENT) > 0;
5662
5663 switch (sk->sk_state) {
5664 case TCP_SYN_RECV:
5665 if (!acceptable)
5666 return 1;
5667
5668
5669
5670
5671 if (req) {
5672 synack_stamp = tcp_rsk(req)->snt_synack;
5673 tp->total_retrans = req->num_retrans;
5674 reqsk_fastopen_remove(sk, req, false);
5675 } else {
5676 synack_stamp = tp->lsndtime;
5677
5678 icsk->icsk_af_ops->rebuild_header(sk);
5679 tcp_init_congestion_control(sk);
5680
5681 tcp_mtup_init(sk);
5682 tp->copied_seq = tp->rcv_nxt;
5683 tcp_init_buffer_space(sk);
5684 }
5685 smp_mb();
5686 tcp_set_state(sk, TCP_ESTABLISHED);
5687 sk->sk_state_change(sk);
5688
5689
5690
5691
5692
5693 if (sk->sk_socket)
5694 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
5695
5696 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
5697 tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
5698 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5699 tcp_synack_rtt_meas(sk, synack_stamp);
5700
5701 if (tp->rx_opt.tstamp_ok)
5702 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5703
5704 if (req) {
5705
5706
5707
5708
5709
5710
5711
5712
5713 tcp_rearm_rto(sk);
5714 } else
5715 tcp_init_metrics(sk);
5716
5717 tcp_update_pacing_rate(sk);
5718
5719
5720 tp->lsndtime = tcp_time_stamp;
5721
5722 tcp_initialize_rcv_mss(sk);
5723 tcp_fast_path_on(tp);
5724 break;
5725
5726 case TCP_FIN_WAIT1: {
5727 struct dst_entry *dst;
5728 int tmo;
5729
5730
5731
5732
5733
5734
5735 if (req != NULL) {
5736
5737
5738
5739
5740
5741
5742 if (!acceptable)
5743 return 1;
5744
5745 reqsk_fastopen_remove(sk, req, false);
5746 tcp_rearm_rto(sk);
5747 }
5748 if (tp->snd_una != tp->write_seq)
5749 break;
5750
5751 tcp_set_state(sk, TCP_FIN_WAIT2);
5752 sk->sk_shutdown |= SEND_SHUTDOWN;
5753
5754 dst = __sk_dst_get(sk);
5755 if (dst)
5756 dst_confirm(dst);
5757
5758 if (!sock_flag(sk, SOCK_DEAD)) {
5759
5760 sk->sk_state_change(sk);
5761 break;
5762 }
5763
5764 if (tp->linger2 < 0 ||
5765 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5766 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
5767 tcp_done(sk);
5768 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5769 return 1;
5770 }
5771
5772 tmo = tcp_fin_time(sk);
5773 if (tmo > TCP_TIMEWAIT_LEN) {
5774 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
5775 } else if (th->fin || sock_owned_by_user(sk)) {
5776
5777
5778
5779
5780
5781
5782 inet_csk_reset_keepalive_timer(sk, tmo);
5783 } else {
5784 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
5785 goto discard;
5786 }
5787 break;
5788 }
5789
5790 case TCP_CLOSING:
5791 if (tp->snd_una == tp->write_seq) {
5792 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
5793 goto discard;
5794 }
5795 break;
5796
5797 case TCP_LAST_ACK:
5798 if (tp->snd_una == tp->write_seq) {
5799 tcp_update_metrics(sk);
5800 tcp_done(sk);
5801 goto discard;
5802 }
5803 break;
5804 }
5805
5806
5807 tcp_urg(sk, skb, th);
5808
5809
5810 switch (sk->sk_state) {
5811 case TCP_CLOSE_WAIT:
5812 case TCP_CLOSING:
5813 case TCP_LAST_ACK:
5814 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
5815 break;
5816 case TCP_FIN_WAIT1:
5817 case TCP_FIN_WAIT2:
5818
5819
5820
5821
5822 if (sk->sk_shutdown & RCV_SHUTDOWN) {
5823 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5824 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
5825 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5826 tcp_reset(sk);
5827 return 1;
5828 }
5829 }
5830
5831 case TCP_ESTABLISHED:
5832 tcp_data_queue(sk, skb);
5833 queued = 1;
5834 break;
5835 }
5836
5837
5838 if (sk->sk_state != TCP_CLOSE) {
5839 tcp_data_snd_check(sk);
5840 tcp_ack_snd_check(sk);
5841 }
5842
5843 if (!queued) {
5844discard:
5845 __kfree_skb(skb);
5846 }
5847 return 0;
5848}
5849EXPORT_SYMBOL(tcp_rcv_state_process);
5850
5851static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
5852{
5853 struct inet_request_sock *ireq = inet_rsk(req);
5854
5855 if (family == AF_INET)
5856 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
5857 &ireq->ir_rmt_addr, port);
5858#if IS_ENABLED(CONFIG_IPV6)
5859 else if (family == AF_INET6)
5860 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI6/%u\n"),
5861 &ireq->ir_v6_rmt_addr, port);
5862#endif
5863}
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878static void tcp_ecn_create_request(struct request_sock *req,
5879 const struct sk_buff *skb,
5880 const struct sock *listen_sk)
5881{
5882 const struct tcphdr *th = tcp_hdr(skb);
5883 const struct net *net = sock_net(listen_sk);
5884 bool th_ecn = th->ece && th->cwr;
5885 bool ect, need_ecn;
5886
5887 if (!th_ecn)
5888 return;
5889
5890 ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
5891 need_ecn = tcp_ca_needs_ecn(listen_sk);
5892
5893 if (!ect && !need_ecn && net->ipv4.sysctl_tcp_ecn)
5894 inet_rsk(req)->ecn_ok = 1;
5895 else if (ect && need_ecn)
5896 inet_rsk(req)->ecn_ok = 1;
5897}
5898
5899int tcp_conn_request(struct request_sock_ops *rsk_ops,
5900 const struct tcp_request_sock_ops *af_ops,
5901 struct sock *sk, struct sk_buff *skb)
5902{
5903 struct tcp_options_received tmp_opt;
5904 struct request_sock *req;
5905 struct tcp_sock *tp = tcp_sk(sk);
5906 struct dst_entry *dst = NULL;
5907 __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
5908 bool want_cookie = false, fastopen;
5909 struct flowi fl;
5910 struct tcp_fastopen_cookie foc = { .len = -1 };
5911 int err;
5912
5913
5914
5915
5916
5917
5918 if ((sysctl_tcp_syncookies == 2 ||
5919 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
5920 want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name);
5921 if (!want_cookie)
5922 goto drop;
5923 }
5924
5925
5926
5927
5928
5929
5930
5931 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
5932 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
5933 goto drop;
5934 }
5935
5936 req = inet_reqsk_alloc(rsk_ops);
5937 if (!req)
5938 goto drop;
5939
5940 tcp_rsk(req)->af_specific = af_ops;
5941
5942 tcp_clear_options(&tmp_opt);
5943 tmp_opt.mss_clamp = af_ops->mss_clamp;
5944 tmp_opt.user_mss = tp->rx_opt.user_mss;
5945 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
5946
5947 if (want_cookie && !tmp_opt.saw_tstamp)
5948 tcp_clear_options(&tmp_opt);
5949
5950 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
5951 tcp_openreq_init(req, &tmp_opt, skb, sk);
5952
5953 af_ops->init_req(req, sk, skb);
5954
5955 if (security_inet_conn_request(sk, skb, req))
5956 goto drop_and_free;
5957
5958 if (!want_cookie || tmp_opt.tstamp_ok)
5959 tcp_ecn_create_request(req, skb, sk);
5960
5961 if (want_cookie) {
5962 isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
5963 req->cookie_ts = tmp_opt.tstamp_ok;
5964 } else if (!isn) {
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974 if (tcp_death_row.sysctl_tw_recycle) {
5975 bool strict;
5976
5977 dst = af_ops->route_req(sk, &fl, req, &strict);
5978
5979 if (dst && strict &&
5980 !tcp_peer_is_proven(req, dst, true,
5981 tmp_opt.saw_tstamp)) {
5982 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
5983 goto drop_and_release;
5984 }
5985 }
5986
5987 else if (!sysctl_tcp_syncookies &&
5988 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
5989 (sysctl_max_syn_backlog >> 2)) &&
5990 !tcp_peer_is_proven(req, dst, false,
5991 tmp_opt.saw_tstamp)) {
5992
5993
5994
5995
5996
5997
5998
5999 pr_drop_req(req, ntohs(tcp_hdr(skb)->source),
6000 rsk_ops->family);
6001 goto drop_and_release;
6002 }
6003
6004 isn = af_ops->init_seq(skb);
6005 }
6006 if (!dst) {
6007 dst = af_ops->route_req(sk, &fl, req, NULL);
6008 if (!dst)
6009 goto drop_and_free;
6010 }
6011
6012 tcp_rsk(req)->snt_isn = isn;
6013 tcp_openreq_init_rwin(req, sk, dst);
6014 fastopen = !want_cookie &&
6015 tcp_try_fastopen(sk, skb, req, &foc, dst);
6016 err = af_ops->send_synack(sk, dst, &fl, req,
6017 skb_get_queue_mapping(skb), &foc);
6018 if (!fastopen) {
6019 if (err || want_cookie)
6020 goto drop_and_free;
6021
6022 tcp_rsk(req)->listener = NULL;
6023 af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
6024 }
6025
6026 return 0;
6027
6028drop_and_release:
6029 dst_release(dst);
6030drop_and_free:
6031 reqsk_free(req);
6032drop:
6033 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
6034 return 0;
6035}
6036EXPORT_SYMBOL(tcp_conn_request);
6037