1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64#define pr_fmt(fmt) "TCP: " fmt
65
66#include <linux/mm.h>
67#include <linux/slab.h>
68#include <linux/module.h>
69#include <linux/sysctl.h>
70#include <linux/kernel.h>
71#include <linux/prefetch.h>
72#include <net/dst.h>
73#include <net/tcp.h>
74#include <net/inet_common.h>
75#include <linux/ipsec.h>
76#include <asm/unaligned.h>
77#include <linux/errqueue.h>
78
79int sysctl_tcp_timestamps __read_mostly = 1;
80int sysctl_tcp_window_scaling __read_mostly = 1;
81int sysctl_tcp_sack __read_mostly = 1;
82int sysctl_tcp_fack __read_mostly = 1;
83int sysctl_tcp_max_reordering __read_mostly = 300;
84int sysctl_tcp_dsack __read_mostly = 1;
85int sysctl_tcp_app_win __read_mostly = 31;
86int sysctl_tcp_adv_win_scale __read_mostly = 1;
87EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
88
89
90int sysctl_tcp_challenge_ack_limit = 1000;
91
92int sysctl_tcp_stdurg __read_mostly;
93int sysctl_tcp_rfc1337 __read_mostly;
94int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
95int sysctl_tcp_frto __read_mostly = 2;
96int sysctl_tcp_min_rtt_wlen __read_mostly = 300;
97
98int sysctl_tcp_thin_dupack __read_mostly;
99
100int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
101int sysctl_tcp_early_retrans __read_mostly = 3;
102int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
103
104#define FLAG_DATA 0x01
105#define FLAG_WIN_UPDATE 0x02
106#define FLAG_DATA_ACKED 0x04
107#define FLAG_RETRANS_DATA_ACKED 0x08
108#define FLAG_SYN_ACKED 0x10
109#define FLAG_DATA_SACKED 0x20
110#define FLAG_ECE 0x40
111#define FLAG_LOST_RETRANS 0x80
112#define FLAG_SLOWPATH 0x100
113#define FLAG_ORIG_SACK_ACKED 0x200
114#define FLAG_SND_UNA_ADVANCED 0x400
115#define FLAG_DSACKING_ACK 0x800
116#define FLAG_SACK_RENEGING 0x2000
117#define FLAG_UPDATE_TS_RECENT 0x4000
118
119#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
120#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
121#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
122#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
123
124#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
125#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
126
127#define REXMIT_NONE 0
128#define REXMIT_LOST 1
129#define REXMIT_NEW 2
130
131
132
133
134static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
135{
136 struct inet_connection_sock *icsk = inet_csk(sk);
137 const unsigned int lss = icsk->icsk_ack.last_seg_size;
138 unsigned int len;
139
140 icsk->icsk_ack.last_seg_size = 0;
141
142
143
144
145 len = skb_shinfo(skb)->gso_size ? : skb->len;
146 if (len >= icsk->icsk_ack.rcv_mss) {
147 icsk->icsk_ack.rcv_mss = len;
148 } else {
149
150
151
152
153
154 len += skb->data - skb_transport_header(skb);
155 if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
156
157
158
159
160
161 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
162 !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
163
164
165
166
167 len -= tcp_sk(sk)->tcp_header_len;
168 icsk->icsk_ack.last_seg_size = len;
169 if (len == lss) {
170 icsk->icsk_ack.rcv_mss = len;
171 return;
172 }
173 }
174 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
175 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
176 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
177 }
178}
179
180static void tcp_incr_quickack(struct sock *sk)
181{
182 struct inet_connection_sock *icsk = inet_csk(sk);
183 unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
184
185 if (quickacks == 0)
186 quickacks = 2;
187 if (quickacks > icsk->icsk_ack.quick)
188 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
189}
190
191static void tcp_enter_quickack_mode(struct sock *sk)
192{
193 struct inet_connection_sock *icsk = inet_csk(sk);
194 tcp_incr_quickack(sk);
195 icsk->icsk_ack.pingpong = 0;
196 icsk->icsk_ack.ato = TCP_ATO_MIN;
197}
198
199
200
201
202
203static bool tcp_in_quickack_mode(struct sock *sk)
204{
205 const struct inet_connection_sock *icsk = inet_csk(sk);
206 const struct dst_entry *dst = __sk_dst_get(sk);
207
208 return (dst && dst_metric(dst, RTAX_QUICKACK)) ||
209 (icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong);
210}
211
212static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
213{
214 if (tp->ecn_flags & TCP_ECN_OK)
215 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
216}
217
218static void tcp_ecn_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb)
219{
220 if (tcp_hdr(skb)->cwr)
221 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
222}
223
224static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
225{
226 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
227}
228
229static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
230{
231 switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
232 case INET_ECN_NOT_ECT:
233
234
235
236
237 if (tp->ecn_flags & TCP_ECN_SEEN)
238 tcp_enter_quickack_mode((struct sock *)tp);
239 break;
240 case INET_ECN_CE:
241 if (tcp_ca_needs_ecn((struct sock *)tp))
242 tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_IS_CE);
243
244 if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
245
246 tcp_enter_quickack_mode((struct sock *)tp);
247 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
248 }
249 tp->ecn_flags |= TCP_ECN_SEEN;
250 break;
251 default:
252 if (tcp_ca_needs_ecn((struct sock *)tp))
253 tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_NO_CE);
254 tp->ecn_flags |= TCP_ECN_SEEN;
255 break;
256 }
257}
258
259static void tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
260{
261 if (tp->ecn_flags & TCP_ECN_OK)
262 __tcp_ecn_check_ce(tp, skb);
263}
264
265static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
266{
267 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
268 tp->ecn_flags &= ~TCP_ECN_OK;
269}
270
271static void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
272{
273 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
274 tp->ecn_flags &= ~TCP_ECN_OK;
275}
276
277static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
278{
279 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
280 return true;
281 return false;
282}
283
284
285
286
287
288
289static void tcp_sndbuf_expand(struct sock *sk)
290{
291 const struct tcp_sock *tp = tcp_sk(sk);
292 int sndmem, per_mss;
293 u32 nr_segs;
294
295
296
297
298 per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
299 MAX_TCP_HEADER +
300 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
301
302 per_mss = roundup_pow_of_two(per_mss) +
303 SKB_DATA_ALIGN(sizeof(struct sk_buff));
304
305 nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
306 nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
307
308
309
310
311
312 sndmem = 2 * nr_segs * per_mss;
313
314 if (sk->sk_sndbuf < sndmem)
315 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
316}
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
345{
346 struct tcp_sock *tp = tcp_sk(sk);
347
348 int truesize = tcp_win_from_space(skb->truesize) >> 1;
349 int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
350
351 while (tp->rcv_ssthresh <= window) {
352 if (truesize <= skb->len)
353 return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
354
355 truesize >>= 1;
356 window >>= 1;
357 }
358 return 0;
359}
360
361static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
362{
363 struct tcp_sock *tp = tcp_sk(sk);
364
365
366 if (tp->rcv_ssthresh < tp->window_clamp &&
367 (int)tp->rcv_ssthresh < tcp_space(sk) &&
368 !tcp_under_memory_pressure(sk)) {
369 int incr;
370
371
372
373
374 if (tcp_win_from_space(skb->truesize) <= skb->len)
375 incr = 2 * tp->advmss;
376 else
377 incr = __tcp_grow_window(sk, skb);
378
379 if (incr) {
380 incr = max_t(int, incr, 2 * skb->len);
381 tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
382 tp->window_clamp);
383 inet_csk(sk)->icsk_ack.quick |= 1;
384 }
385 }
386}
387
388
389static void tcp_fixup_rcvbuf(struct sock *sk)
390{
391 u32 mss = tcp_sk(sk)->advmss;
392 int rcvmem;
393
394 rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) *
395 tcp_default_init_rwnd(mss);
396
397
398
399
400 if (sysctl_tcp_moderate_rcvbuf)
401 rcvmem <<= 2;
402
403 if (sk->sk_rcvbuf < rcvmem)
404 sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
405}
406
407
408
409
410void tcp_init_buffer_space(struct sock *sk)
411{
412 struct tcp_sock *tp = tcp_sk(sk);
413 int maxwin;
414
415 if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
416 tcp_fixup_rcvbuf(sk);
417 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
418 tcp_sndbuf_expand(sk);
419
420 tp->rcvq_space.space = tp->rcv_wnd;
421 tp->rcvq_space.time = tcp_time_stamp;
422 tp->rcvq_space.seq = tp->copied_seq;
423
424 maxwin = tcp_full_space(sk);
425
426 if (tp->window_clamp >= maxwin) {
427 tp->window_clamp = maxwin;
428
429 if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
430 tp->window_clamp = max(maxwin -
431 (maxwin >> sysctl_tcp_app_win),
432 4 * tp->advmss);
433 }
434
435
436 if (sysctl_tcp_app_win &&
437 tp->window_clamp > 2 * tp->advmss &&
438 tp->window_clamp + tp->advmss > maxwin)
439 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
440
441 tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
442 tp->snd_cwnd_stamp = tcp_time_stamp;
443}
444
445
446static void tcp_clamp_window(struct sock *sk)
447{
448 struct tcp_sock *tp = tcp_sk(sk);
449 struct inet_connection_sock *icsk = inet_csk(sk);
450
451 icsk->icsk_ack.quick = 0;
452
453 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
454 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
455 !tcp_under_memory_pressure(sk) &&
456 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
457 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
458 sysctl_tcp_rmem[2]);
459 }
460 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
461 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
462}
463
464
465
466
467
468
469
470
471void tcp_initialize_rcv_mss(struct sock *sk)
472{
473 const struct tcp_sock *tp = tcp_sk(sk);
474 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
475
476 hint = min(hint, tp->rcv_wnd / 2);
477 hint = min(hint, TCP_MSS_DEFAULT);
478 hint = max(hint, TCP_MIN_MSS);
479
480 inet_csk(sk)->icsk_ack.rcv_mss = hint;
481}
482EXPORT_SYMBOL(tcp_initialize_rcv_mss);
483
484
485
486
487
488
489
490
491
492
493
494
495static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
496{
497 u32 new_sample = tp->rcv_rtt_est.rtt;
498 long m = sample;
499
500 if (m == 0)
501 m = 1;
502
503 if (new_sample != 0) {
504
505
506
507
508
509
510
511
512
513
514 if (!win_dep) {
515 m -= (new_sample >> 3);
516 new_sample += m;
517 } else {
518 m <<= 3;
519 if (m < new_sample)
520 new_sample = m;
521 }
522 } else {
523
524 new_sample = m << 3;
525 }
526
527 if (tp->rcv_rtt_est.rtt != new_sample)
528 tp->rcv_rtt_est.rtt = new_sample;
529}
530
531static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
532{
533 if (tp->rcv_rtt_est.time == 0)
534 goto new_measure;
535 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
536 return;
537 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_rtt_est.time, 1);
538
539new_measure:
540 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
541 tp->rcv_rtt_est.time = tcp_time_stamp;
542}
543
544static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
545 const struct sk_buff *skb)
546{
547 struct tcp_sock *tp = tcp_sk(sk);
548 if (tp->rx_opt.rcv_tsecr &&
549 (TCP_SKB_CB(skb)->end_seq -
550 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
551 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
552}
553
554
555
556
557
558void tcp_rcv_space_adjust(struct sock *sk)
559{
560 struct tcp_sock *tp = tcp_sk(sk);
561 int time;
562 int copied;
563
564 time = tcp_time_stamp - tp->rcvq_space.time;
565 if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
566 return;
567
568
569 copied = tp->copied_seq - tp->rcvq_space.seq;
570 if (copied <= tp->rcvq_space.space)
571 goto new_measure;
572
573
574
575
576
577
578
579
580
581
582 if (sysctl_tcp_moderate_rcvbuf &&
583 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
584 int rcvwin, rcvmem, rcvbuf;
585
586
587
588
589 rcvwin = (copied << 1) + 16 * tp->advmss;
590
591
592
593
594
595
596 if (copied >=
597 tp->rcvq_space.space + (tp->rcvq_space.space >> 2)) {
598 if (copied >=
599 tp->rcvq_space.space + (tp->rcvq_space.space >> 1))
600 rcvwin <<= 1;
601 else
602 rcvwin += (rcvwin >> 1);
603 }
604
605 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
606 while (tcp_win_from_space(rcvmem) < tp->advmss)
607 rcvmem += 128;
608
609 rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]);
610 if (rcvbuf > sk->sk_rcvbuf) {
611 sk->sk_rcvbuf = rcvbuf;
612
613
614 tp->window_clamp = rcvwin;
615 }
616 }
617 tp->rcvq_space.space = copied;
618
619new_measure:
620 tp->rcvq_space.seq = tp->copied_seq;
621 tp->rcvq_space.time = tcp_time_stamp;
622}
623
624
625
626
627
628
629
630
631
632
633
634static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
635{
636 struct tcp_sock *tp = tcp_sk(sk);
637 struct inet_connection_sock *icsk = inet_csk(sk);
638 u32 now;
639
640 inet_csk_schedule_ack(sk);
641
642 tcp_measure_rcv_mss(sk, skb);
643
644 tcp_rcv_rtt_measure(tp);
645
646 now = tcp_time_stamp;
647
648 if (!icsk->icsk_ack.ato) {
649
650
651
652 tcp_incr_quickack(sk);
653 icsk->icsk_ack.ato = TCP_ATO_MIN;
654 } else {
655 int m = now - icsk->icsk_ack.lrcvtime;
656
657 if (m <= TCP_ATO_MIN / 2) {
658
659 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
660 } else if (m < icsk->icsk_ack.ato) {
661 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
662 if (icsk->icsk_ack.ato > icsk->icsk_rto)
663 icsk->icsk_ack.ato = icsk->icsk_rto;
664 } else if (m > icsk->icsk_rto) {
665
666
667
668 tcp_incr_quickack(sk);
669 sk_mem_reclaim(sk);
670 }
671 }
672 icsk->icsk_ack.lrcvtime = now;
673
674 tcp_ecn_check_ce(tp, skb);
675
676 if (skb->len >= 128)
677 tcp_grow_window(sk, skb);
678}
679
680
681
682
683
684
685
686
687
688
689static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
690{
691 struct tcp_sock *tp = tcp_sk(sk);
692 long m = mrtt_us;
693 u32 srtt = tp->srtt_us;
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711 if (srtt != 0) {
712 m -= (srtt >> 3);
713 srtt += m;
714 if (m < 0) {
715 m = -m;
716 m -= (tp->mdev_us >> 2);
717
718
719
720
721
722
723
724
725 if (m > 0)
726 m >>= 3;
727 } else {
728 m -= (tp->mdev_us >> 2);
729 }
730 tp->mdev_us += m;
731 if (tp->mdev_us > tp->mdev_max_us) {
732 tp->mdev_max_us = tp->mdev_us;
733 if (tp->mdev_max_us > tp->rttvar_us)
734 tp->rttvar_us = tp->mdev_max_us;
735 }
736 if (after(tp->snd_una, tp->rtt_seq)) {
737 if (tp->mdev_max_us < tp->rttvar_us)
738 tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
739 tp->rtt_seq = tp->snd_nxt;
740 tp->mdev_max_us = tcp_rto_min_us(sk);
741 }
742 } else {
743
744 srtt = m << 3;
745 tp->mdev_us = m << 1;
746 tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
747 tp->mdev_max_us = tp->rttvar_us;
748 tp->rtt_seq = tp->snd_nxt;
749 }
750 tp->srtt_us = max(1U, srtt);
751}
752
753
754
755
756
757
758
759int sysctl_tcp_pacing_ss_ratio __read_mostly = 200;
760int sysctl_tcp_pacing_ca_ratio __read_mostly = 120;
761
762static void tcp_update_pacing_rate(struct sock *sk)
763{
764 const struct tcp_sock *tp = tcp_sk(sk);
765 u64 rate;
766
767
768 rate = (u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3);
769
770
771
772
773
774
775
776
777
778 if (tp->snd_cwnd < tp->snd_ssthresh / 2)
779 rate *= sysctl_tcp_pacing_ss_ratio;
780 else
781 rate *= sysctl_tcp_pacing_ca_ratio;
782
783 rate *= max(tp->snd_cwnd, tp->packets_out);
784
785 if (likely(tp->srtt_us))
786 do_div(rate, tp->srtt_us);
787
788
789
790
791
792 ACCESS_ONCE(sk->sk_pacing_rate) = min_t(u64, rate,
793 sk->sk_max_pacing_rate);
794}
795
796
797
798
799static void tcp_set_rto(struct sock *sk)
800{
801 const struct tcp_sock *tp = tcp_sk(sk);
802
803
804
805
806
807
808
809
810
811
812 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
813
814
815
816
817
818
819
820
821
822
823 tcp_bound_rto(sk);
824}
825
826__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
827{
828 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
829
830 if (!cwnd)
831 cwnd = TCP_INIT_CWND;
832 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
833}
834
835
836
837
838
839void tcp_disable_fack(struct tcp_sock *tp)
840{
841
842 if (tcp_is_fack(tp))
843 tp->lost_skb_hint = NULL;
844 tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;
845}
846
847
848static void tcp_dsack_seen(struct tcp_sock *tp)
849{
850 tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
851}
852
853static void tcp_update_reordering(struct sock *sk, const int metric,
854 const int ts)
855{
856 struct tcp_sock *tp = tcp_sk(sk);
857 if (metric > tp->reordering) {
858 int mib_idx;
859
860 tp->reordering = min(sysctl_tcp_max_reordering, metric);
861
862
863 if (ts)
864 mib_idx = LINUX_MIB_TCPTSREORDER;
865 else if (tcp_is_reno(tp))
866 mib_idx = LINUX_MIB_TCPRENOREORDER;
867 else if (tcp_is_fack(tp))
868 mib_idx = LINUX_MIB_TCPFACKREORDER;
869 else
870 mib_idx = LINUX_MIB_TCPSACKREORDER;
871
872 NET_INC_STATS(sock_net(sk), mib_idx);
873#if FASTRETRANS_DEBUG > 1
874 pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
875 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
876 tp->reordering,
877 tp->fackets_out,
878 tp->sacked_out,
879 tp->undo_marker ? tp->undo_retrans : 0);
880#endif
881 tcp_disable_fack(tp);
882 }
883
884 if (metric > 0)
885 tcp_disable_early_retrans(tp);
886 tp->rack.reord = 1;
887}
888
889
890static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
891{
892 if (!tp->retransmit_skb_hint ||
893 before(TCP_SKB_CB(skb)->seq,
894 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
895 tp->retransmit_skb_hint = skb;
896
897 if (!tp->lost_out ||
898 after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high))
899 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
900}
901
902static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
903{
904 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
905 tcp_verify_retransmit_hint(tp, skb);
906
907 tp->lost_out += tcp_skb_pcount(skb);
908 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
909 }
910}
911
912void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb)
913{
914 tcp_verify_retransmit_hint(tp, skb);
915
916 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
917 tp->lost_out += tcp_skb_pcount(skb);
918 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
919 }
920}
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
1017 u32 start_seq, u32 end_seq)
1018{
1019
1020 if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
1021 return false;
1022
1023
1024 if (!before(start_seq, tp->snd_nxt))
1025 return false;
1026
1027
1028
1029
1030 if (after(start_seq, tp->snd_una))
1031 return true;
1032
1033 if (!is_dsack || !tp->undo_marker)
1034 return false;
1035
1036
1037 if (after(end_seq, tp->snd_una))
1038 return false;
1039
1040 if (!before(start_seq, tp->undo_marker))
1041 return true;
1042
1043
1044 if (!after(end_seq, tp->undo_marker))
1045 return false;
1046
1047
1048
1049
1050 return !before(start_seq, end_seq - tp->max_window);
1051}
1052
1053static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1054 struct tcp_sack_block_wire *sp, int num_sacks,
1055 u32 prior_snd_una)
1056{
1057 struct tcp_sock *tp = tcp_sk(sk);
1058 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
1059 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
1060 bool dup_sack = false;
1061
1062 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1063 dup_sack = true;
1064 tcp_dsack_seen(tp);
1065 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
1066 } else if (num_sacks > 1) {
1067 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
1068 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
1069
1070 if (!after(end_seq_0, end_seq_1) &&
1071 !before(start_seq_0, start_seq_1)) {
1072 dup_sack = true;
1073 tcp_dsack_seen(tp);
1074 NET_INC_STATS(sock_net(sk),
1075 LINUX_MIB_TCPDSACKOFORECV);
1076 }
1077 }
1078
1079
1080 if (dup_sack && tp->undo_marker && tp->undo_retrans > 0 &&
1081 !after(end_seq_0, prior_snd_una) &&
1082 after(end_seq_0, tp->undo_marker))
1083 tp->undo_retrans--;
1084
1085 return dup_sack;
1086}
1087
1088struct tcp_sacktag_state {
1089 int reord;
1090 int fack_count;
1091
1092
1093
1094
1095 struct skb_mstamp first_sackt;
1096 struct skb_mstamp last_sackt;
1097 int flag;
1098};
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1109 u32 start_seq, u32 end_seq)
1110{
1111 int err;
1112 bool in_sack;
1113 unsigned int pkt_len;
1114 unsigned int mss;
1115
1116 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1117 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1118
1119 if (tcp_skb_pcount(skb) > 1 && !in_sack &&
1120 after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
1121 mss = tcp_skb_mss(skb);
1122 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1123
1124 if (!in_sack) {
1125 pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
1126 if (pkt_len < mss)
1127 pkt_len = mss;
1128 } else {
1129 pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
1130 if (pkt_len < mss)
1131 return -EINVAL;
1132 }
1133
1134
1135
1136
1137 if (pkt_len > mss) {
1138 unsigned int new_len = (pkt_len / mss) * mss;
1139 if (!in_sack && new_len < pkt_len) {
1140 new_len += mss;
1141 if (new_len >= skb->len)
1142 return 0;
1143 }
1144 pkt_len = new_len;
1145 }
1146 err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC);
1147 if (err < 0)
1148 return err;
1149 }
1150
1151 return in_sack;
1152}
1153
1154
1155static u8 tcp_sacktag_one(struct sock *sk,
1156 struct tcp_sacktag_state *state, u8 sacked,
1157 u32 start_seq, u32 end_seq,
1158 int dup_sack, int pcount,
1159 const struct skb_mstamp *xmit_time)
1160{
1161 struct tcp_sock *tp = tcp_sk(sk);
1162 int fack_count = state->fack_count;
1163
1164
1165 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1166 if (tp->undo_marker && tp->undo_retrans > 0 &&
1167 after(end_seq, tp->undo_marker))
1168 tp->undo_retrans--;
1169 if (sacked & TCPCB_SACKED_ACKED)
1170 state->reord = min(fack_count, state->reord);
1171 }
1172
1173
1174 if (!after(end_seq, tp->snd_una))
1175 return sacked;
1176
1177 if (!(sacked & TCPCB_SACKED_ACKED)) {
1178 tcp_rack_advance(tp, xmit_time, sacked);
1179
1180 if (sacked & TCPCB_SACKED_RETRANS) {
1181
1182
1183
1184
1185 if (sacked & TCPCB_LOST) {
1186 sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1187 tp->lost_out -= pcount;
1188 tp->retrans_out -= pcount;
1189 }
1190 } else {
1191 if (!(sacked & TCPCB_RETRANS)) {
1192
1193
1194
1195 if (before(start_seq,
1196 tcp_highest_sack_seq(tp)))
1197 state->reord = min(fack_count,
1198 state->reord);
1199 if (!after(end_seq, tp->high_seq))
1200 state->flag |= FLAG_ORIG_SACK_ACKED;
1201 if (state->first_sackt.v64 == 0)
1202 state->first_sackt = *xmit_time;
1203 state->last_sackt = *xmit_time;
1204 }
1205
1206 if (sacked & TCPCB_LOST) {
1207 sacked &= ~TCPCB_LOST;
1208 tp->lost_out -= pcount;
1209 }
1210 }
1211
1212 sacked |= TCPCB_SACKED_ACKED;
1213 state->flag |= FLAG_DATA_SACKED;
1214 tp->sacked_out += pcount;
1215 tp->delivered += pcount;
1216
1217 fack_count += pcount;
1218
1219
1220 if (!tcp_is_fack(tp) && tp->lost_skb_hint &&
1221 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
1222 tp->lost_cnt_hint += pcount;
1223
1224 if (fack_count > tp->fackets_out)
1225 tp->fackets_out = fack_count;
1226 }
1227
1228
1229
1230
1231
1232 if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
1233 sacked &= ~TCPCB_SACKED_RETRANS;
1234 tp->retrans_out -= pcount;
1235 }
1236
1237 return sacked;
1238}
1239
1240
1241
1242
1243static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1244 struct tcp_sacktag_state *state,
1245 unsigned int pcount, int shifted, int mss,
1246 bool dup_sack)
1247{
1248 struct tcp_sock *tp = tcp_sk(sk);
1249 struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
1250 u32 start_seq = TCP_SKB_CB(skb)->seq;
1251 u32 end_seq = start_seq + shifted;
1252
1253 BUG_ON(!pcount);
1254
1255
1256
1257
1258
1259
1260
1261 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1262 start_seq, end_seq, dup_sack, pcount,
1263 &skb->skb_mstamp);
1264
1265 if (skb == tp->lost_skb_hint)
1266 tp->lost_cnt_hint += pcount;
1267
1268 TCP_SKB_CB(prev)->end_seq += shifted;
1269 TCP_SKB_CB(skb)->seq += shifted;
1270
1271 tcp_skb_pcount_add(prev, pcount);
1272 BUG_ON(tcp_skb_pcount(skb) < pcount);
1273 tcp_skb_pcount_add(skb, -pcount);
1274
1275
1276
1277
1278
1279
1280 if (!TCP_SKB_CB(prev)->tcp_gso_size)
1281 TCP_SKB_CB(prev)->tcp_gso_size = mss;
1282
1283
1284 if (tcp_skb_pcount(skb) <= 1)
1285 TCP_SKB_CB(skb)->tcp_gso_size = 0;
1286
1287
1288 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
1289
1290 if (skb->len > 0) {
1291 BUG_ON(!tcp_skb_pcount(skb));
1292 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTED);
1293 return false;
1294 }
1295
1296
1297
1298 if (skb == tp->retransmit_skb_hint)
1299 tp->retransmit_skb_hint = prev;
1300 if (skb == tp->lost_skb_hint) {
1301 tp->lost_skb_hint = prev;
1302 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1303 }
1304
1305 TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1306 TCP_SKB_CB(prev)->eor = TCP_SKB_CB(skb)->eor;
1307 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1308 TCP_SKB_CB(prev)->end_seq++;
1309
1310 if (skb == tcp_highest_sack(sk))
1311 tcp_advance_highest_sack(sk, skb);
1312
1313 tcp_skb_collapse_tstamp(prev, skb);
1314 tcp_unlink_write_queue(skb, sk);
1315 sk_wmem_free_skb(sk, skb);
1316
1317 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
1318
1319 return true;
1320}
1321
1322
1323
1324
1325static int tcp_skb_seglen(const struct sk_buff *skb)
1326{
1327 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
1328}
1329
1330
1331static int skb_can_shift(const struct sk_buff *skb)
1332{
1333 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1334}
1335
1336
1337
1338
1339static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1340 struct tcp_sacktag_state *state,
1341 u32 start_seq, u32 end_seq,
1342 bool dup_sack)
1343{
1344 struct tcp_sock *tp = tcp_sk(sk);
1345 struct sk_buff *prev;
1346 int mss;
1347 int pcount = 0;
1348 int len;
1349 int in_sack;
1350
1351 if (!sk_can_gso(sk))
1352 goto fallback;
1353
1354
1355 if (!dup_sack &&
1356 (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
1357 goto fallback;
1358 if (!skb_can_shift(skb))
1359 goto fallback;
1360
1361 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1362 goto fallback;
1363
1364
1365 if (unlikely(skb == tcp_write_queue_head(sk)))
1366 goto fallback;
1367 prev = tcp_write_queue_prev(sk, skb);
1368
1369 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1370 goto fallback;
1371
1372 if (!tcp_skb_can_collapse_to(prev))
1373 goto fallback;
1374
1375 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1376 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1377
1378 if (in_sack) {
1379 len = skb->len;
1380 pcount = tcp_skb_pcount(skb);
1381 mss = tcp_skb_seglen(skb);
1382
1383
1384
1385
1386 if (mss != tcp_skb_seglen(prev))
1387 goto fallback;
1388 } else {
1389 if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
1390 goto noop;
1391
1392
1393
1394
1395 if (tcp_skb_pcount(skb) <= 1)
1396 goto noop;
1397
1398 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1399 if (!in_sack) {
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411 goto fallback;
1412 }
1413
1414 len = end_seq - TCP_SKB_CB(skb)->seq;
1415 BUG_ON(len < 0);
1416 BUG_ON(len > skb->len);
1417
1418
1419
1420
1421
1422 mss = tcp_skb_mss(skb);
1423
1424
1425
1426
1427 if (mss != tcp_skb_seglen(prev))
1428 goto fallback;
1429
1430 if (len == mss) {
1431 pcount = 1;
1432 } else if (len < mss) {
1433 goto noop;
1434 } else {
1435 pcount = len / mss;
1436 len = pcount * mss;
1437 }
1438 }
1439
1440
1441 if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
1442 goto fallback;
1443
1444 if (!skb_shift(prev, skb, len))
1445 goto fallback;
1446 if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
1447 goto out;
1448
1449
1450
1451
1452 if (prev == tcp_write_queue_tail(sk))
1453 goto out;
1454 skb = tcp_write_queue_next(sk, prev);
1455
1456 if (!skb_can_shift(skb) ||
1457 (skb == tcp_send_head(sk)) ||
1458 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1459 (mss != tcp_skb_seglen(skb)))
1460 goto out;
1461
1462 len = skb->len;
1463 if (skb_shift(prev, skb, len)) {
1464 pcount += tcp_skb_pcount(skb);
1465 tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
1466 }
1467
1468out:
1469 state->fack_count += pcount;
1470 return prev;
1471
1472noop:
1473 return skb;
1474
1475fallback:
1476 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
1477 return NULL;
1478}
1479
1480static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1481 struct tcp_sack_block *next_dup,
1482 struct tcp_sacktag_state *state,
1483 u32 start_seq, u32 end_seq,
1484 bool dup_sack_in)
1485{
1486 struct tcp_sock *tp = tcp_sk(sk);
1487 struct sk_buff *tmp;
1488
1489 tcp_for_write_queue_from(skb, sk) {
1490 int in_sack = 0;
1491 bool dup_sack = dup_sack_in;
1492
1493 if (skb == tcp_send_head(sk))
1494 break;
1495
1496
1497 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1498 break;
1499
1500 if (next_dup &&
1501 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
1502 in_sack = tcp_match_skb_to_sack(sk, skb,
1503 next_dup->start_seq,
1504 next_dup->end_seq);
1505 if (in_sack > 0)
1506 dup_sack = true;
1507 }
1508
1509
1510
1511
1512
1513 if (in_sack <= 0) {
1514 tmp = tcp_shift_skb_data(sk, skb, state,
1515 start_seq, end_seq, dup_sack);
1516 if (tmp) {
1517 if (tmp != skb) {
1518 skb = tmp;
1519 continue;
1520 }
1521
1522 in_sack = 0;
1523 } else {
1524 in_sack = tcp_match_skb_to_sack(sk, skb,
1525 start_seq,
1526 end_seq);
1527 }
1528 }
1529
1530 if (unlikely(in_sack < 0))
1531 break;
1532
1533 if (in_sack) {
1534 TCP_SKB_CB(skb)->sacked =
1535 tcp_sacktag_one(sk,
1536 state,
1537 TCP_SKB_CB(skb)->sacked,
1538 TCP_SKB_CB(skb)->seq,
1539 TCP_SKB_CB(skb)->end_seq,
1540 dup_sack,
1541 tcp_skb_pcount(skb),
1542 &skb->skb_mstamp);
1543
1544 if (!before(TCP_SKB_CB(skb)->seq,
1545 tcp_highest_sack_seq(tp)))
1546 tcp_advance_highest_sack(sk, skb);
1547 }
1548
1549 state->fack_count += tcp_skb_pcount(skb);
1550 }
1551 return skb;
1552}
1553
1554
1555
1556
1557static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1558 struct tcp_sacktag_state *state,
1559 u32 skip_to_seq)
1560{
1561 tcp_for_write_queue_from(skb, sk) {
1562 if (skb == tcp_send_head(sk))
1563 break;
1564
1565 if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
1566 break;
1567
1568 state->fack_count += tcp_skb_pcount(skb);
1569 }
1570 return skb;
1571}
1572
1573static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1574 struct sock *sk,
1575 struct tcp_sack_block *next_dup,
1576 struct tcp_sacktag_state *state,
1577 u32 skip_to_seq)
1578{
1579 if (!next_dup)
1580 return skb;
1581
1582 if (before(next_dup->start_seq, skip_to_seq)) {
1583 skb = tcp_sacktag_skip(skb, sk, state, next_dup->start_seq);
1584 skb = tcp_sacktag_walk(skb, sk, NULL, state,
1585 next_dup->start_seq, next_dup->end_seq,
1586 1);
1587 }
1588
1589 return skb;
1590}
1591
1592static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
1593{
1594 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1595}
1596
1597static int
1598tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1599 u32 prior_snd_una, struct tcp_sacktag_state *state)
1600{
1601 struct tcp_sock *tp = tcp_sk(sk);
1602 const unsigned char *ptr = (skb_transport_header(ack_skb) +
1603 TCP_SKB_CB(ack_skb)->sacked);
1604 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1605 struct tcp_sack_block sp[TCP_NUM_SACKS];
1606 struct tcp_sack_block *cache;
1607 struct sk_buff *skb;
1608 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1609 int used_sacks;
1610 bool found_dup_sack = false;
1611 int i, j;
1612 int first_sack_index;
1613
1614 state->flag = 0;
1615 state->reord = tp->packets_out;
1616
1617 if (!tp->sacked_out) {
1618 if (WARN_ON(tp->fackets_out))
1619 tp->fackets_out = 0;
1620 tcp_highest_sack_reset(sk);
1621 }
1622
1623 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1624 num_sacks, prior_snd_una);
1625 if (found_dup_sack)
1626 state->flag |= FLAG_DSACKING_ACK;
1627
1628
1629
1630
1631
1632 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
1633 return 0;
1634
1635 if (!tp->packets_out)
1636 goto out;
1637
1638 used_sacks = 0;
1639 first_sack_index = 0;
1640 for (i = 0; i < num_sacks; i++) {
1641 bool dup_sack = !i && found_dup_sack;
1642
1643 sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
1644 sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
1645
1646 if (!tcp_is_sackblock_valid(tp, dup_sack,
1647 sp[used_sacks].start_seq,
1648 sp[used_sacks].end_seq)) {
1649 int mib_idx;
1650
1651 if (dup_sack) {
1652 if (!tp->undo_marker)
1653 mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
1654 else
1655 mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
1656 } else {
1657
1658 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1659 !after(sp[used_sacks].end_seq, tp->snd_una))
1660 continue;
1661 mib_idx = LINUX_MIB_TCPSACKDISCARD;
1662 }
1663
1664 NET_INC_STATS(sock_net(sk), mib_idx);
1665 if (i == 0)
1666 first_sack_index = -1;
1667 continue;
1668 }
1669
1670
1671 if (!after(sp[used_sacks].end_seq, prior_snd_una))
1672 continue;
1673
1674 used_sacks++;
1675 }
1676
1677
1678 for (i = used_sacks - 1; i > 0; i--) {
1679 for (j = 0; j < i; j++) {
1680 if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
1681 swap(sp[j], sp[j + 1]);
1682
1683
1684 if (j == first_sack_index)
1685 first_sack_index = j + 1;
1686 }
1687 }
1688 }
1689
1690 skb = tcp_write_queue_head(sk);
1691 state->fack_count = 0;
1692 i = 0;
1693
1694 if (!tp->sacked_out) {
1695
1696 cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1697 } else {
1698 cache = tp->recv_sack_cache;
1699
1700 while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
1701 !cache->end_seq)
1702 cache++;
1703 }
1704
1705 while (i < used_sacks) {
1706 u32 start_seq = sp[i].start_seq;
1707 u32 end_seq = sp[i].end_seq;
1708 bool dup_sack = (found_dup_sack && (i == first_sack_index));
1709 struct tcp_sack_block *next_dup = NULL;
1710
1711 if (found_dup_sack && ((i + 1) == first_sack_index))
1712 next_dup = &sp[i + 1];
1713
1714
1715 while (tcp_sack_cache_ok(tp, cache) &&
1716 !before(start_seq, cache->end_seq))
1717 cache++;
1718
1719
1720 if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
1721 after(end_seq, cache->start_seq)) {
1722
1723
1724 if (before(start_seq, cache->start_seq)) {
1725 skb = tcp_sacktag_skip(skb, sk, state,
1726 start_seq);
1727 skb = tcp_sacktag_walk(skb, sk, next_dup,
1728 state,
1729 start_seq,
1730 cache->start_seq,
1731 dup_sack);
1732 }
1733
1734
1735 if (!after(end_seq, cache->end_seq))
1736 goto advance_sp;
1737
1738 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1739 state,
1740 cache->end_seq);
1741
1742
1743 if (tcp_highest_sack_seq(tp) == cache->end_seq) {
1744
1745 skb = tcp_highest_sack(sk);
1746 if (!skb)
1747 break;
1748 state->fack_count = tp->fackets_out;
1749 cache++;
1750 goto walk;
1751 }
1752
1753 skb = tcp_sacktag_skip(skb, sk, state, cache->end_seq);
1754
1755 cache++;
1756 continue;
1757 }
1758
1759 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
1760 skb = tcp_highest_sack(sk);
1761 if (!skb)
1762 break;
1763 state->fack_count = tp->fackets_out;
1764 }
1765 skb = tcp_sacktag_skip(skb, sk, state, start_seq);
1766
1767walk:
1768 skb = tcp_sacktag_walk(skb, sk, next_dup, state,
1769 start_seq, end_seq, dup_sack);
1770
1771advance_sp:
1772 i++;
1773 }
1774
1775
1776 for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
1777 tp->recv_sack_cache[i].start_seq = 0;
1778 tp->recv_sack_cache[i].end_seq = 0;
1779 }
1780 for (j = 0; j < used_sacks; j++)
1781 tp->recv_sack_cache[i++] = sp[j];
1782
1783 if ((state->reord < tp->fackets_out) &&
1784 ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
1785 tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
1786
1787 tcp_verify_left_out(tp);
1788out:
1789
1790#if FASTRETRANS_DEBUG > 0
1791 WARN_ON((int)tp->sacked_out < 0);
1792 WARN_ON((int)tp->lost_out < 0);
1793 WARN_ON((int)tp->retrans_out < 0);
1794 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1795#endif
1796 return state->flag;
1797}
1798
1799
1800
1801
1802static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
1803{
1804 u32 holes;
1805
1806 holes = max(tp->lost_out, 1U);
1807 holes = min(holes, tp->packets_out);
1808
1809 if ((tp->sacked_out + holes) > tp->packets_out) {
1810 tp->sacked_out = tp->packets_out - holes;
1811 return true;
1812 }
1813 return false;
1814}
1815
1816
1817
1818
1819
1820static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1821{
1822 struct tcp_sock *tp = tcp_sk(sk);
1823 if (tcp_limit_reno_sacked(tp))
1824 tcp_update_reordering(sk, tp->packets_out + addend, 0);
1825}
1826
1827
1828
1829static void tcp_add_reno_sack(struct sock *sk)
1830{
1831 struct tcp_sock *tp = tcp_sk(sk);
1832 u32 prior_sacked = tp->sacked_out;
1833
1834 tp->sacked_out++;
1835 tcp_check_reno_reordering(sk, 0);
1836 if (tp->sacked_out > prior_sacked)
1837 tp->delivered++;
1838 tcp_verify_left_out(tp);
1839}
1840
1841
1842
1843static void tcp_remove_reno_sacks(struct sock *sk, int acked)
1844{
1845 struct tcp_sock *tp = tcp_sk(sk);
1846
1847 if (acked > 0) {
1848
1849 tp->delivered += max_t(int, acked - tp->sacked_out, 1);
1850 if (acked - 1 >= tp->sacked_out)
1851 tp->sacked_out = 0;
1852 else
1853 tp->sacked_out -= acked - 1;
1854 }
1855 tcp_check_reno_reordering(sk, acked);
1856 tcp_verify_left_out(tp);
1857}
1858
1859static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1860{
1861 tp->sacked_out = 0;
1862}
1863
1864void tcp_clear_retrans(struct tcp_sock *tp)
1865{
1866 tp->retrans_out = 0;
1867 tp->lost_out = 0;
1868 tp->undo_marker = 0;
1869 tp->undo_retrans = -1;
1870 tp->fackets_out = 0;
1871 tp->sacked_out = 0;
1872}
1873
1874static inline void tcp_init_undo(struct tcp_sock *tp)
1875{
1876 tp->undo_marker = tp->snd_una;
1877
1878 tp->undo_retrans = tp->retrans_out ? : -1;
1879}
1880
1881
1882
1883
1884
1885void tcp_enter_loss(struct sock *sk)
1886{
1887 const struct inet_connection_sock *icsk = inet_csk(sk);
1888 struct tcp_sock *tp = tcp_sk(sk);
1889 struct net *net = sock_net(sk);
1890 struct sk_buff *skb;
1891 bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
1892 bool is_reneg;
1893
1894
1895 if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
1896 !after(tp->high_seq, tp->snd_una) ||
1897 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
1898 tp->prior_ssthresh = tcp_current_ssthresh(sk);
1899 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1900 tcp_ca_event(sk, CA_EVENT_LOSS);
1901 tcp_init_undo(tp);
1902 }
1903 tp->snd_cwnd = 1;
1904 tp->snd_cwnd_cnt = 0;
1905 tp->snd_cwnd_stamp = tcp_time_stamp;
1906
1907 tp->retrans_out = 0;
1908 tp->lost_out = 0;
1909
1910 if (tcp_is_reno(tp))
1911 tcp_reset_reno_sack(tp);
1912
1913 skb = tcp_write_queue_head(sk);
1914 is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
1915 if (is_reneg) {
1916 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
1917 tp->sacked_out = 0;
1918 tp->fackets_out = 0;
1919 }
1920 tcp_clear_all_retrans_hints(tp);
1921
1922 tcp_for_write_queue(skb, sk) {
1923 if (skb == tcp_send_head(sk))
1924 break;
1925
1926 TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
1927 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) {
1928 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
1929 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1930 tp->lost_out += tcp_skb_pcount(skb);
1931 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
1932 }
1933 }
1934 tcp_verify_left_out(tp);
1935
1936
1937
1938
1939 if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
1940 tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
1941 tp->reordering = min_t(unsigned int, tp->reordering,
1942 net->ipv4.sysctl_tcp_reordering);
1943 tcp_set_ca_state(sk, TCP_CA_Loss);
1944 tp->high_seq = tp->snd_nxt;
1945 tcp_ecn_queue_cwr(tp);
1946
1947
1948
1949
1950
1951 tp->frto = sysctl_tcp_frto &&
1952 (new_recovery || icsk->icsk_retransmits) &&
1953 !inet_csk(sk)->icsk_mtup.probe_size;
1954}
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966static bool tcp_check_sack_reneging(struct sock *sk, int flag)
1967{
1968 if (flag & FLAG_SACK_RENEGING) {
1969 struct tcp_sock *tp = tcp_sk(sk);
1970 unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
1971 msecs_to_jiffies(10));
1972
1973 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
1974 delay, TCP_RTO_MAX);
1975 return true;
1976 }
1977 return false;
1978}
1979
1980static inline int tcp_fackets_out(const struct tcp_sock *tp)
1981{
1982 return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
1983}
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
2001{
2002 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
2003}
2004
2005static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
2006{
2007 struct tcp_sock *tp = tcp_sk(sk);
2008 unsigned long delay;
2009
2010
2011
2012
2013
2014 if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 ||
2015 (flag & FLAG_ECE) || !tp->srtt_us)
2016 return false;
2017
2018 delay = max(usecs_to_jiffies(tp->srtt_us >> 5),
2019 msecs_to_jiffies(2));
2020
2021 if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
2022 return false;
2023
2024 inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay,
2025 TCP_RTO_MAX);
2026 return true;
2027}
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122static bool tcp_time_to_recover(struct sock *sk, int flag)
2123{
2124 struct tcp_sock *tp = tcp_sk(sk);
2125 __u32 packets_out;
2126 int tcp_reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
2127
2128
2129 if (tp->lost_out)
2130 return true;
2131
2132
2133 if (tcp_dupack_heuristics(tp) > tp->reordering)
2134 return true;
2135
2136
2137
2138
2139 packets_out = tp->packets_out;
2140 if (packets_out <= tp->reordering &&
2141 tp->sacked_out >= max_t(__u32, packets_out/2, tcp_reordering) &&
2142 !tcp_may_send_now(sk)) {
2143
2144
2145
2146 return true;
2147 }
2148
2149
2150
2151
2152
2153
2154 if ((tp->thin_dupack || sysctl_tcp_thin_dupack) &&
2155 tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 &&
2156 tcp_is_sack(tp) && !tcp_send_head(sk))
2157 return true;
2158
2159
2160
2161
2162
2163
2164 if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
2165 (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) &&
2166 !tcp_may_send_now(sk))
2167 return !tcp_pause_early_retransmit(sk, flag);
2168
2169 return false;
2170}
2171
2172
2173
2174
2175
2176
2177
2178static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2179{
2180 struct tcp_sock *tp = tcp_sk(sk);
2181 struct sk_buff *skb;
2182 int cnt, oldcnt, lost;
2183 unsigned int mss;
2184
2185 const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
2186
2187 WARN_ON(packets > tp->packets_out);
2188 if (tp->lost_skb_hint) {
2189 skb = tp->lost_skb_hint;
2190 cnt = tp->lost_cnt_hint;
2191
2192 if (mark_head && skb != tcp_write_queue_head(sk))
2193 return;
2194 } else {
2195 skb = tcp_write_queue_head(sk);
2196 cnt = 0;
2197 }
2198
2199 tcp_for_write_queue_from(skb, sk) {
2200 if (skb == tcp_send_head(sk))
2201 break;
2202
2203
2204 tp->lost_skb_hint = skb;
2205 tp->lost_cnt_hint = cnt;
2206
2207 if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
2208 break;
2209
2210 oldcnt = cnt;
2211 if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
2212 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2213 cnt += tcp_skb_pcount(skb);
2214
2215 if (cnt > packets) {
2216 if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
2217 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
2218 (oldcnt >= packets))
2219 break;
2220
2221 mss = tcp_skb_mss(skb);
2222
2223 lost = (packets - oldcnt) * mss;
2224 if (lost < skb->len &&
2225 tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0)
2226 break;
2227 cnt = packets;
2228 }
2229
2230 tcp_skb_mark_lost(tp, skb);
2231
2232 if (mark_head)
2233 break;
2234 }
2235 tcp_verify_left_out(tp);
2236}
2237
2238
2239
2240static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2241{
2242 struct tcp_sock *tp = tcp_sk(sk);
2243
2244 if (tcp_is_reno(tp)) {
2245 tcp_mark_head_lost(sk, 1, 1);
2246 } else if (tcp_is_fack(tp)) {
2247 int lost = tp->fackets_out - tp->reordering;
2248 if (lost <= 0)
2249 lost = 1;
2250 tcp_mark_head_lost(sk, lost, 0);
2251 } else {
2252 int sacked_upto = tp->sacked_out - tp->reordering;
2253 if (sacked_upto >= 0)
2254 tcp_mark_head_lost(sk, sacked_upto, 0);
2255 else if (fast_rexmit)
2256 tcp_mark_head_lost(sk, 1, 1);
2257 }
2258}
2259
2260static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when)
2261{
2262 return tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2263 before(tp->rx_opt.rcv_tsecr, when);
2264}
2265
2266
2267
2268
2269static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
2270 const struct sk_buff *skb)
2271{
2272 return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) &&
2273 tcp_tsopt_ecr_before(tp, tcp_skb_timestamp(skb));
2274}
2275
2276
2277
2278
2279static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
2280{
2281 return !tp->retrans_stamp ||
2282 tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
2283}
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301static bool tcp_any_retrans_done(const struct sock *sk)
2302{
2303 const struct tcp_sock *tp = tcp_sk(sk);
2304 struct sk_buff *skb;
2305
2306 if (tp->retrans_out)
2307 return true;
2308
2309 skb = tcp_write_queue_head(sk);
2310 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2311 return true;
2312
2313 return false;
2314}
2315
2316#if FASTRETRANS_DEBUG > 1
2317static void DBGUNDO(struct sock *sk, const char *msg)
2318{
2319 struct tcp_sock *tp = tcp_sk(sk);
2320 struct inet_sock *inet = inet_sk(sk);
2321
2322 if (sk->sk_family == AF_INET) {
2323 pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2324 msg,
2325 &inet->inet_daddr, ntohs(inet->inet_dport),
2326 tp->snd_cwnd, tcp_left_out(tp),
2327 tp->snd_ssthresh, tp->prior_ssthresh,
2328 tp->packets_out);
2329 }
2330#if IS_ENABLED(CONFIG_IPV6)
2331 else if (sk->sk_family == AF_INET6) {
2332 pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2333 msg,
2334 &sk->sk_v6_daddr, ntohs(inet->inet_dport),
2335 tp->snd_cwnd, tcp_left_out(tp),
2336 tp->snd_ssthresh, tp->prior_ssthresh,
2337 tp->packets_out);
2338 }
2339#endif
2340}
2341#else
2342#define DBGUNDO(x...) do { } while (0)
2343#endif
2344
2345static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
2346{
2347 struct tcp_sock *tp = tcp_sk(sk);
2348
2349 if (unmark_loss) {
2350 struct sk_buff *skb;
2351
2352 tcp_for_write_queue(skb, sk) {
2353 if (skb == tcp_send_head(sk))
2354 break;
2355 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2356 }
2357 tp->lost_out = 0;
2358 tcp_clear_all_retrans_hints(tp);
2359 }
2360
2361 if (tp->prior_ssthresh) {
2362 const struct inet_connection_sock *icsk = inet_csk(sk);
2363
2364 if (icsk->icsk_ca_ops->undo_cwnd)
2365 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
2366 else
2367 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
2368
2369 if (tp->prior_ssthresh > tp->snd_ssthresh) {
2370 tp->snd_ssthresh = tp->prior_ssthresh;
2371 tcp_ecn_withdraw_cwr(tp);
2372 }
2373 }
2374 tp->snd_cwnd_stamp = tcp_time_stamp;
2375 tp->undo_marker = 0;
2376}
2377
2378static inline bool tcp_may_undo(const struct tcp_sock *tp)
2379{
2380 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2381}
2382
2383
2384static bool tcp_try_undo_recovery(struct sock *sk)
2385{
2386 struct tcp_sock *tp = tcp_sk(sk);
2387
2388 if (tcp_may_undo(tp)) {
2389 int mib_idx;
2390
2391
2392
2393
2394 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2395 tcp_undo_cwnd_reduction(sk, false);
2396 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2397 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2398 else
2399 mib_idx = LINUX_MIB_TCPFULLUNDO;
2400
2401 NET_INC_STATS(sock_net(sk), mib_idx);
2402 }
2403 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2404
2405
2406
2407 if (!tcp_any_retrans_done(sk))
2408 tp->retrans_stamp = 0;
2409 return true;
2410 }
2411 tcp_set_ca_state(sk, TCP_CA_Open);
2412 return false;
2413}
2414
2415
2416static bool tcp_try_undo_dsack(struct sock *sk)
2417{
2418 struct tcp_sock *tp = tcp_sk(sk);
2419
2420 if (tp->undo_marker && !tp->undo_retrans) {
2421 DBGUNDO(sk, "D-SACK");
2422 tcp_undo_cwnd_reduction(sk, false);
2423 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2424 return true;
2425 }
2426 return false;
2427}
2428
2429
2430static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
2431{
2432 struct tcp_sock *tp = tcp_sk(sk);
2433
2434 if (frto_undo || tcp_may_undo(tp)) {
2435 tcp_undo_cwnd_reduction(sk, true);
2436
2437 DBGUNDO(sk, "partial loss");
2438 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2439 if (frto_undo)
2440 NET_INC_STATS(sock_net(sk),
2441 LINUX_MIB_TCPSPURIOUSRTOS);
2442 inet_csk(sk)->icsk_retransmits = 0;
2443 if (frto_undo || tcp_is_sack(tp))
2444 tcp_set_ca_state(sk, TCP_CA_Open);
2445 return true;
2446 }
2447 return false;
2448}
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459static void tcp_init_cwnd_reduction(struct sock *sk)
2460{
2461 struct tcp_sock *tp = tcp_sk(sk);
2462
2463 tp->high_seq = tp->snd_nxt;
2464 tp->tlp_high_seq = 0;
2465 tp->snd_cwnd_cnt = 0;
2466 tp->prior_cwnd = tp->snd_cwnd;
2467 tp->prr_delivered = 0;
2468 tp->prr_out = 0;
2469 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
2470 tcp_ecn_queue_cwr(tp);
2471}
2472
2473static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
2474 int flag)
2475{
2476 struct tcp_sock *tp = tcp_sk(sk);
2477 int sndcnt = 0;
2478 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2479
2480 if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd))
2481 return;
2482
2483 tp->prr_delivered += newly_acked_sacked;
2484 if (delta < 0) {
2485 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2486 tp->prior_cwnd - 1;
2487 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2488 } else if ((flag & FLAG_RETRANS_DATA_ACKED) &&
2489 !(flag & FLAG_LOST_RETRANS)) {
2490 sndcnt = min_t(int, delta,
2491 max_t(int, tp->prr_delivered - tp->prr_out,
2492 newly_acked_sacked) + 1);
2493 } else {
2494 sndcnt = min(delta, newly_acked_sacked);
2495 }
2496
2497 sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
2498 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2499}
2500
2501static inline void tcp_end_cwnd_reduction(struct sock *sk)
2502{
2503 struct tcp_sock *tp = tcp_sk(sk);
2504
2505
2506 if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
2507 (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
2508 tp->snd_cwnd = tp->snd_ssthresh;
2509 tp->snd_cwnd_stamp = tcp_time_stamp;
2510 }
2511 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2512}
2513
2514
2515void tcp_enter_cwr(struct sock *sk)
2516{
2517 struct tcp_sock *tp = tcp_sk(sk);
2518
2519 tp->prior_ssthresh = 0;
2520 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2521 tp->undo_marker = 0;
2522 tcp_init_cwnd_reduction(sk);
2523 tcp_set_ca_state(sk, TCP_CA_CWR);
2524 }
2525}
2526EXPORT_SYMBOL(tcp_enter_cwr);
2527
2528static void tcp_try_keep_open(struct sock *sk)
2529{
2530 struct tcp_sock *tp = tcp_sk(sk);
2531 int state = TCP_CA_Open;
2532
2533 if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
2534 state = TCP_CA_Disorder;
2535
2536 if (inet_csk(sk)->icsk_ca_state != state) {
2537 tcp_set_ca_state(sk, state);
2538 tp->high_seq = tp->snd_nxt;
2539 }
2540}
2541
2542static void tcp_try_to_open(struct sock *sk, int flag)
2543{
2544 struct tcp_sock *tp = tcp_sk(sk);
2545
2546 tcp_verify_left_out(tp);
2547
2548 if (!tcp_any_retrans_done(sk))
2549 tp->retrans_stamp = 0;
2550
2551 if (flag & FLAG_ECE)
2552 tcp_enter_cwr(sk);
2553
2554 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2555 tcp_try_keep_open(sk);
2556 }
2557}
2558
2559static void tcp_mtup_probe_failed(struct sock *sk)
2560{
2561 struct inet_connection_sock *icsk = inet_csk(sk);
2562
2563 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2564 icsk->icsk_mtup.probe_size = 0;
2565 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPFAIL);
2566}
2567
2568static void tcp_mtup_probe_success(struct sock *sk)
2569{
2570 struct tcp_sock *tp = tcp_sk(sk);
2571 struct inet_connection_sock *icsk = inet_csk(sk);
2572
2573
2574 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2575 tp->snd_cwnd = tp->snd_cwnd *
2576 tcp_mss_to_mtu(sk, tp->mss_cache) /
2577 icsk->icsk_mtup.probe_size;
2578 tp->snd_cwnd_cnt = 0;
2579 tp->snd_cwnd_stamp = tcp_time_stamp;
2580 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2581
2582 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2583 icsk->icsk_mtup.probe_size = 0;
2584 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2585 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
2586}
2587
2588
2589
2590
2591
2592void tcp_simple_retransmit(struct sock *sk)
2593{
2594 const struct inet_connection_sock *icsk = inet_csk(sk);
2595 struct tcp_sock *tp = tcp_sk(sk);
2596 struct sk_buff *skb;
2597 unsigned int mss = tcp_current_mss(sk);
2598 u32 prior_lost = tp->lost_out;
2599
2600 tcp_for_write_queue(skb, sk) {
2601 if (skb == tcp_send_head(sk))
2602 break;
2603 if (tcp_skb_seglen(skb) > mss &&
2604 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2605 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2606 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2607 tp->retrans_out -= tcp_skb_pcount(skb);
2608 }
2609 tcp_skb_mark_lost_uncond_verify(tp, skb);
2610 }
2611 }
2612
2613 tcp_clear_retrans_hints_partial(tp);
2614
2615 if (prior_lost == tp->lost_out)
2616 return;
2617
2618 if (tcp_is_reno(tp))
2619 tcp_limit_reno_sacked(tp);
2620
2621 tcp_verify_left_out(tp);
2622
2623
2624
2625
2626
2627
2628 if (icsk->icsk_ca_state != TCP_CA_Loss) {
2629 tp->high_seq = tp->snd_nxt;
2630 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2631 tp->prior_ssthresh = 0;
2632 tp->undo_marker = 0;
2633 tcp_set_ca_state(sk, TCP_CA_Loss);
2634 }
2635 tcp_xmit_retransmit_queue(sk);
2636}
2637EXPORT_SYMBOL(tcp_simple_retransmit);
2638
2639static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2640{
2641 struct tcp_sock *tp = tcp_sk(sk);
2642 int mib_idx;
2643
2644 if (tcp_is_reno(tp))
2645 mib_idx = LINUX_MIB_TCPRENORECOVERY;
2646 else
2647 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
2648
2649 NET_INC_STATS(sock_net(sk), mib_idx);
2650
2651 tp->prior_ssthresh = 0;
2652 tcp_init_undo(tp);
2653
2654 if (!tcp_in_cwnd_reduction(sk)) {
2655 if (!ece_ack)
2656 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2657 tcp_init_cwnd_reduction(sk);
2658 }
2659 tcp_set_ca_state(sk, TCP_CA_Recovery);
2660}
2661
2662
2663
2664
2665static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
2666 int *rexmit)
2667{
2668 struct tcp_sock *tp = tcp_sk(sk);
2669 bool recovered = !before(tp->snd_una, tp->high_seq);
2670
2671 if ((flag & FLAG_SND_UNA_ADVANCED) &&
2672 tcp_try_undo_loss(sk, false))
2673 return;
2674
2675 if (tp->frto) {
2676
2677
2678
2679 if ((flag & FLAG_ORIG_SACK_ACKED) &&
2680 tcp_try_undo_loss(sk, true))
2681 return;
2682
2683 if (after(tp->snd_nxt, tp->high_seq)) {
2684 if (flag & FLAG_DATA_SACKED || is_dupack)
2685 tp->frto = 0;
2686 } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
2687 tp->high_seq = tp->snd_nxt;
2688
2689
2690
2691
2692 if (tcp_send_head(sk) &&
2693 after(tcp_wnd_end(tp), tp->snd_nxt)) {
2694 *rexmit = REXMIT_NEW;
2695 return;
2696 }
2697 tp->frto = 0;
2698 }
2699 }
2700
2701 if (recovered) {
2702
2703 tcp_try_undo_recovery(sk);
2704 return;
2705 }
2706 if (tcp_is_reno(tp)) {
2707
2708
2709
2710 if (after(tp->snd_nxt, tp->high_seq) && is_dupack)
2711 tcp_add_reno_sack(sk);
2712 else if (flag & FLAG_SND_UNA_ADVANCED)
2713 tcp_reset_reno_sack(tp);
2714 }
2715 *rexmit = REXMIT_LOST;
2716}
2717
2718
2719static bool tcp_try_undo_partial(struct sock *sk, const int acked)
2720{
2721 struct tcp_sock *tp = tcp_sk(sk);
2722
2723 if (tp->undo_marker && tcp_packet_delayed(tp)) {
2724
2725
2726
2727 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
2728
2729
2730
2731
2732
2733
2734 if (tp->retrans_out)
2735 return true;
2736
2737 if (!tcp_any_retrans_done(sk))
2738 tp->retrans_stamp = 0;
2739
2740 DBGUNDO(sk, "partial recovery");
2741 tcp_undo_cwnd_reduction(sk, true);
2742 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2743 tcp_try_keep_open(sk);
2744 return true;
2745 }
2746 return false;
2747}
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2762 bool is_dupack, int *ack_flag, int *rexmit)
2763{
2764 struct inet_connection_sock *icsk = inet_csk(sk);
2765 struct tcp_sock *tp = tcp_sk(sk);
2766 int fast_rexmit = 0, flag = *ack_flag;
2767 bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
2768 (tcp_fackets_out(tp) > tp->reordering));
2769
2770 if (WARN_ON(!tp->packets_out && tp->sacked_out))
2771 tp->sacked_out = 0;
2772 if (WARN_ON(!tp->sacked_out && tp->fackets_out))
2773 tp->fackets_out = 0;
2774
2775
2776
2777 if (flag & FLAG_ECE)
2778 tp->prior_ssthresh = 0;
2779
2780
2781 if (tcp_check_sack_reneging(sk, flag))
2782 return;
2783
2784
2785 tcp_verify_left_out(tp);
2786
2787
2788
2789 if (icsk->icsk_ca_state == TCP_CA_Open) {
2790 WARN_ON(tp->retrans_out != 0);
2791 tp->retrans_stamp = 0;
2792 } else if (!before(tp->snd_una, tp->high_seq)) {
2793 switch (icsk->icsk_ca_state) {
2794 case TCP_CA_CWR:
2795
2796
2797 if (tp->snd_una != tp->high_seq) {
2798 tcp_end_cwnd_reduction(sk);
2799 tcp_set_ca_state(sk, TCP_CA_Open);
2800 }
2801 break;
2802
2803 case TCP_CA_Recovery:
2804 if (tcp_is_reno(tp))
2805 tcp_reset_reno_sack(tp);
2806 if (tcp_try_undo_recovery(sk))
2807 return;
2808 tcp_end_cwnd_reduction(sk);
2809 break;
2810 }
2811 }
2812
2813
2814 if (sysctl_tcp_recovery & TCP_RACK_LOST_RETRANS &&
2815 tcp_rack_mark_lost(sk)) {
2816 flag |= FLAG_LOST_RETRANS;
2817 *ack_flag |= FLAG_LOST_RETRANS;
2818 }
2819
2820
2821 switch (icsk->icsk_ca_state) {
2822 case TCP_CA_Recovery:
2823 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
2824 if (tcp_is_reno(tp) && is_dupack)
2825 tcp_add_reno_sack(sk);
2826 } else {
2827 if (tcp_try_undo_partial(sk, acked))
2828 return;
2829
2830 do_lost = tcp_is_reno(tp) ||
2831 tcp_fackets_out(tp) > tp->reordering;
2832 }
2833 if (tcp_try_undo_dsack(sk)) {
2834 tcp_try_keep_open(sk);
2835 return;
2836 }
2837 break;
2838 case TCP_CA_Loss:
2839 tcp_process_loss(sk, flag, is_dupack, rexmit);
2840 if (icsk->icsk_ca_state != TCP_CA_Open &&
2841 !(flag & FLAG_LOST_RETRANS))
2842 return;
2843
2844 default:
2845 if (tcp_is_reno(tp)) {
2846 if (flag & FLAG_SND_UNA_ADVANCED)
2847 tcp_reset_reno_sack(tp);
2848 if (is_dupack)
2849 tcp_add_reno_sack(sk);
2850 }
2851
2852 if (icsk->icsk_ca_state <= TCP_CA_Disorder)
2853 tcp_try_undo_dsack(sk);
2854
2855 if (!tcp_time_to_recover(sk, flag)) {
2856 tcp_try_to_open(sk, flag);
2857 return;
2858 }
2859
2860
2861 if (icsk->icsk_ca_state < TCP_CA_CWR &&
2862 icsk->icsk_mtup.probe_size &&
2863 tp->snd_una == tp->mtu_probe.probe_seq_start) {
2864 tcp_mtup_probe_failed(sk);
2865
2866 tp->snd_cwnd++;
2867 tcp_simple_retransmit(sk);
2868 return;
2869 }
2870
2871
2872 tcp_enter_recovery(sk, (flag & FLAG_ECE));
2873 fast_rexmit = 1;
2874 }
2875
2876 if (do_lost)
2877 tcp_update_scoreboard(sk, fast_rexmit);
2878 *rexmit = REXMIT_LOST;
2879}
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
2900{
2901 const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ;
2902 struct rtt_meas *m = tcp_sk(sk)->rtt_min;
2903 struct rtt_meas rttm = {
2904 .rtt = likely(rtt_us) ? rtt_us : jiffies_to_usecs(1),
2905 .ts = now,
2906 };
2907 u32 elapsed;
2908
2909
2910 if (unlikely(rttm.rtt <= m[0].rtt))
2911 m[0] = m[1] = m[2] = rttm;
2912 else if (rttm.rtt <= m[1].rtt)
2913 m[1] = m[2] = rttm;
2914 else if (rttm.rtt <= m[2].rtt)
2915 m[2] = rttm;
2916
2917 elapsed = now - m[0].ts;
2918 if (unlikely(elapsed > wlen)) {
2919
2920
2921
2922 m[0] = m[1];
2923 m[1] = m[2];
2924 m[2] = rttm;
2925 if (now - m[0].ts > wlen) {
2926 m[0] = m[1];
2927 m[1] = rttm;
2928 if (now - m[0].ts > wlen)
2929 m[0] = rttm;
2930 }
2931 } else if (m[1].ts == m[0].ts && elapsed > wlen / 4) {
2932
2933
2934
2935 m[2] = m[1] = rttm;
2936 } else if (m[2].ts == m[1].ts && elapsed > wlen / 2) {
2937
2938
2939
2940 m[2] = rttm;
2941 }
2942}
2943
2944static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
2945 long seq_rtt_us, long sack_rtt_us,
2946 long ca_rtt_us)
2947{
2948 const struct tcp_sock *tp = tcp_sk(sk);
2949
2950
2951
2952
2953
2954
2955 if (seq_rtt_us < 0)
2956 seq_rtt_us = sack_rtt_us;
2957
2958
2959
2960
2961
2962
2963
2964 if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2965 flag & FLAG_ACKED)
2966 seq_rtt_us = ca_rtt_us = jiffies_to_usecs(tcp_time_stamp -
2967 tp->rx_opt.rcv_tsecr);
2968 if (seq_rtt_us < 0)
2969 return false;
2970
2971
2972
2973
2974
2975 tcp_update_rtt_min(sk, ca_rtt_us);
2976 tcp_rtt_estimator(sk, seq_rtt_us);
2977 tcp_set_rto(sk);
2978
2979
2980 inet_csk(sk)->icsk_backoff = 0;
2981 return true;
2982}
2983
2984
2985void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
2986{
2987 long rtt_us = -1L;
2988
2989 if (req && !req->num_retrans && tcp_rsk(req)->snt_synack.v64) {
2990 struct skb_mstamp now;
2991
2992 skb_mstamp_get(&now);
2993 rtt_us = skb_mstamp_us_delta(&now, &tcp_rsk(req)->snt_synack);
2994 }
2995
2996 tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us);
2997}
2998
2999
3000static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
3001{
3002 const struct inet_connection_sock *icsk = inet_csk(sk);
3003
3004 icsk->icsk_ca_ops->cong_avoid(sk, ack, acked);
3005 tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
3006}
3007
3008
3009
3010
3011void tcp_rearm_rto(struct sock *sk)
3012{
3013 const struct inet_connection_sock *icsk = inet_csk(sk);
3014 struct tcp_sock *tp = tcp_sk(sk);
3015
3016
3017
3018
3019 if (tp->fastopen_rsk)
3020 return;
3021
3022 if (!tp->packets_out) {
3023 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
3024 } else {
3025 u32 rto = inet_csk(sk)->icsk_rto;
3026
3027 if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
3028 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
3029 struct sk_buff *skb = tcp_write_queue_head(sk);
3030 const u32 rto_time_stamp =
3031 tcp_skb_timestamp(skb) + rto;
3032 s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);
3033
3034
3035
3036 if (delta > 0)
3037 rto = delta;
3038 }
3039 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
3040 TCP_RTO_MAX);
3041 }
3042}
3043
3044
3045
3046
3047void tcp_resume_early_retransmit(struct sock *sk)
3048{
3049 struct tcp_sock *tp = tcp_sk(sk);
3050
3051 tcp_rearm_rto(sk);
3052
3053
3054 if (!tp->do_early_retrans)
3055 return;
3056
3057 tcp_enter_recovery(sk, false);
3058 tcp_update_scoreboard(sk, 1);
3059 tcp_xmit_retransmit_queue(sk);
3060}
3061
3062
3063static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
3064{
3065 struct tcp_sock *tp = tcp_sk(sk);
3066 u32 packets_acked;
3067
3068 BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
3069
3070 packets_acked = tcp_skb_pcount(skb);
3071 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
3072 return 0;
3073 packets_acked -= tcp_skb_pcount(skb);
3074
3075 if (packets_acked) {
3076 BUG_ON(tcp_skb_pcount(skb) == 0);
3077 BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
3078 }
3079
3080 return packets_acked;
3081}
3082
3083static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
3084 u32 prior_snd_una)
3085{
3086 const struct skb_shared_info *shinfo;
3087
3088
3089 if (likely(!TCP_SKB_CB(skb)->txstamp_ack))
3090 return;
3091
3092 shinfo = skb_shinfo(skb);
3093 if (!before(shinfo->tskey, prior_snd_una) &&
3094 before(shinfo->tskey, tcp_sk(sk)->snd_una))
3095 __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
3096}
3097
3098
3099
3100
3101
3102static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3103 u32 prior_snd_una, int *acked,
3104 struct tcp_sacktag_state *sack)
3105{
3106 const struct inet_connection_sock *icsk = inet_csk(sk);
3107 struct skb_mstamp first_ackt, last_ackt, now;
3108 struct tcp_sock *tp = tcp_sk(sk);
3109 u32 prior_sacked = tp->sacked_out;
3110 u32 reord = tp->packets_out;
3111 bool fully_acked = true;
3112 long sack_rtt_us = -1L;
3113 long seq_rtt_us = -1L;
3114 long ca_rtt_us = -1L;
3115 struct sk_buff *skb;
3116 u32 pkts_acked = 0;
3117 u32 last_in_flight = 0;
3118 bool rtt_update;
3119 int flag = 0;
3120
3121 first_ackt.v64 = 0;
3122
3123 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
3124 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3125 u8 sacked = scb->sacked;
3126 u32 acked_pcount;
3127
3128 tcp_ack_tstamp(sk, skb, prior_snd_una);
3129
3130
3131 if (after(scb->end_seq, tp->snd_una)) {
3132 if (tcp_skb_pcount(skb) == 1 ||
3133 !after(tp->snd_una, scb->seq))
3134 break;
3135
3136 acked_pcount = tcp_tso_acked(sk, skb);
3137 if (!acked_pcount)
3138 break;
3139
3140 fully_acked = false;
3141 } else {
3142
3143 prefetchw(skb->next);
3144 acked_pcount = tcp_skb_pcount(skb);
3145 }
3146
3147 if (unlikely(sacked & TCPCB_RETRANS)) {
3148 if (sacked & TCPCB_SACKED_RETRANS)
3149 tp->retrans_out -= acked_pcount;
3150 flag |= FLAG_RETRANS_DATA_ACKED;
3151 } else if (!(sacked & TCPCB_SACKED_ACKED)) {
3152 last_ackt = skb->skb_mstamp;
3153 WARN_ON_ONCE(last_ackt.v64 == 0);
3154 if (!first_ackt.v64)
3155 first_ackt = last_ackt;
3156
3157 last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
3158 reord = min(pkts_acked, reord);
3159 if (!after(scb->end_seq, tp->high_seq))
3160 flag |= FLAG_ORIG_SACK_ACKED;
3161 }
3162
3163 if (sacked & TCPCB_SACKED_ACKED) {
3164 tp->sacked_out -= acked_pcount;
3165 } else if (tcp_is_sack(tp)) {
3166 tp->delivered += acked_pcount;
3167 if (!tcp_skb_spurious_retrans(tp, skb))
3168 tcp_rack_advance(tp, &skb->skb_mstamp, sacked);
3169 }
3170 if (sacked & TCPCB_LOST)
3171 tp->lost_out -= acked_pcount;
3172
3173 tp->packets_out -= acked_pcount;
3174 pkts_acked += acked_pcount;
3175
3176
3177
3178
3179
3180
3181
3182
3183 if (likely(!(scb->tcp_flags & TCPHDR_SYN))) {
3184 flag |= FLAG_DATA_ACKED;
3185 } else {
3186 flag |= FLAG_SYN_ACKED;
3187 tp->retrans_stamp = 0;
3188 }
3189
3190 if (!fully_acked)
3191 break;
3192
3193 tcp_unlink_write_queue(skb, sk);
3194 sk_wmem_free_skb(sk, skb);
3195 if (unlikely(skb == tp->retransmit_skb_hint))
3196 tp->retransmit_skb_hint = NULL;
3197 if (unlikely(skb == tp->lost_skb_hint))
3198 tp->lost_skb_hint = NULL;
3199 }
3200
3201 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
3202 tp->snd_up = tp->snd_una;
3203
3204 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
3205 flag |= FLAG_SACK_RENEGING;
3206
3207 skb_mstamp_get(&now);
3208 if (likely(first_ackt.v64) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
3209 seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
3210 ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
3211 }
3212 if (sack->first_sackt.v64) {
3213 sack_rtt_us = skb_mstamp_us_delta(&now, &sack->first_sackt);
3214 ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt);
3215 }
3216
3217 rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
3218 ca_rtt_us);
3219
3220 if (flag & FLAG_ACKED) {
3221 tcp_rearm_rto(sk);
3222 if (unlikely(icsk->icsk_mtup.probe_size &&
3223 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
3224 tcp_mtup_probe_success(sk);
3225 }
3226
3227 if (tcp_is_reno(tp)) {
3228 tcp_remove_reno_sacks(sk, pkts_acked);
3229 } else {
3230 int delta;
3231
3232
3233 if (reord < prior_fackets)
3234 tcp_update_reordering(sk, tp->fackets_out - reord, 0);
3235
3236 delta = tcp_is_fack(tp) ? pkts_acked :
3237 prior_sacked - tp->sacked_out;
3238 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3239 }
3240
3241 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
3242
3243 } else if (skb && rtt_update && sack_rtt_us >= 0 &&
3244 sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
3245
3246
3247
3248
3249 tcp_rearm_rto(sk);
3250 }
3251
3252 if (icsk->icsk_ca_ops->pkts_acked) {
3253 struct ack_sample sample = { .pkts_acked = pkts_acked,
3254 .rtt_us = ca_rtt_us,
3255 .in_flight = last_in_flight };
3256
3257 icsk->icsk_ca_ops->pkts_acked(sk, &sample);
3258 }
3259
3260#if FASTRETRANS_DEBUG > 0
3261 WARN_ON((int)tp->sacked_out < 0);
3262 WARN_ON((int)tp->lost_out < 0);
3263 WARN_ON((int)tp->retrans_out < 0);
3264 if (!tp->packets_out && tcp_is_sack(tp)) {
3265 icsk = inet_csk(sk);
3266 if (tp->lost_out) {
3267 pr_debug("Leak l=%u %d\n",
3268 tp->lost_out, icsk->icsk_ca_state);
3269 tp->lost_out = 0;
3270 }
3271 if (tp->sacked_out) {
3272 pr_debug("Leak s=%u %d\n",
3273 tp->sacked_out, icsk->icsk_ca_state);
3274 tp->sacked_out = 0;
3275 }
3276 if (tp->retrans_out) {
3277 pr_debug("Leak r=%u %d\n",
3278 tp->retrans_out, icsk->icsk_ca_state);
3279 tp->retrans_out = 0;
3280 }
3281 }
3282#endif
3283 *acked = pkts_acked;
3284 return flag;
3285}
3286
3287static void tcp_ack_probe(struct sock *sk)
3288{
3289 const struct tcp_sock *tp = tcp_sk(sk);
3290 struct inet_connection_sock *icsk = inet_csk(sk);
3291
3292
3293
3294 if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
3295 icsk->icsk_backoff = 0;
3296 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
3297
3298
3299
3300 } else {
3301 unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX);
3302
3303 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3304 when, TCP_RTO_MAX);
3305 }
3306}
3307
3308static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
3309{
3310 return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3311 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3312}
3313
3314
3315static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3316{
3317
3318
3319
3320
3321
3322
3323 if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
3324 return flag & FLAG_FORWARD_PROGRESS;
3325
3326 return flag & FLAG_DATA_ACKED;
3327}
3328
3329
3330
3331
3332
3333
3334static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
3335 int flag)
3336{
3337 if (tcp_in_cwnd_reduction(sk)) {
3338
3339 tcp_cwnd_reduction(sk, acked_sacked, flag);
3340 } else if (tcp_may_raise_cwnd(sk, flag)) {
3341
3342 tcp_cong_avoid(sk, ack, acked_sacked);
3343 }
3344 tcp_update_pacing_rate(sk);
3345}
3346
3347
3348
3349
3350static inline bool tcp_may_update_window(const struct tcp_sock *tp,
3351 const u32 ack, const u32 ack_seq,
3352 const u32 nwin)
3353{
3354 return after(ack, tp->snd_una) ||
3355 after(ack_seq, tp->snd_wl1) ||
3356 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
3357}
3358
3359
3360static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
3361{
3362 u32 delta = ack - tp->snd_una;
3363
3364 sock_owned_by_me((struct sock *)tp);
3365 u64_stats_update_begin_raw(&tp->syncp);
3366 tp->bytes_acked += delta;
3367 u64_stats_update_end_raw(&tp->syncp);
3368 tp->snd_una = ack;
3369}
3370
3371
3372static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
3373{
3374 u32 delta = seq - tp->rcv_nxt;
3375
3376 sock_owned_by_me((struct sock *)tp);
3377 u64_stats_update_begin_raw(&tp->syncp);
3378 tp->bytes_received += delta;
3379 u64_stats_update_end_raw(&tp->syncp);
3380 tp->rcv_nxt = seq;
3381}
3382
3383
3384
3385
3386
3387
3388static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
3389 u32 ack_seq)
3390{
3391 struct tcp_sock *tp = tcp_sk(sk);
3392 int flag = 0;
3393 u32 nwin = ntohs(tcp_hdr(skb)->window);
3394
3395 if (likely(!tcp_hdr(skb)->syn))
3396 nwin <<= tp->rx_opt.snd_wscale;
3397
3398 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
3399 flag |= FLAG_WIN_UPDATE;
3400 tcp_update_wl(tp, ack_seq);
3401
3402 if (tp->snd_wnd != nwin) {
3403 tp->snd_wnd = nwin;
3404
3405
3406
3407
3408 tp->pred_flags = 0;
3409 tcp_fast_path_check(sk);
3410
3411 if (tcp_send_head(sk))
3412 tcp_slow_start_after_idle_check(sk);
3413
3414 if (nwin > tp->max_window) {
3415 tp->max_window = nwin;
3416 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
3417 }
3418 }
3419 }
3420
3421 tcp_snd_una_update(tp, ack);
3422
3423 return flag;
3424}
3425
3426static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
3427 u32 *last_oow_ack_time)
3428{
3429 if (*last_oow_ack_time) {
3430 s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
3431
3432 if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
3433 NET_INC_STATS(net, mib_idx);
3434 return true;
3435 }
3436 }
3437
3438 *last_oow_ack_time = tcp_time_stamp;
3439
3440 return false;
3441}
3442
3443
3444
3445
3446
3447
3448
3449
3450bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
3451 int mib_idx, u32 *last_oow_ack_time)
3452{
3453
3454 if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
3455 !tcp_hdr(skb)->syn)
3456 return false;
3457
3458 return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time);
3459}
3460
3461
3462static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
3463{
3464
3465 static u32 challenge_timestamp;
3466 static unsigned int challenge_count;
3467 struct tcp_sock *tp = tcp_sk(sk);
3468 u32 count, now;
3469
3470
3471 if (__tcp_oow_rate_limited(sock_net(sk),
3472 LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
3473 &tp->last_oow_ack_time))
3474 return;
3475
3476
3477 now = jiffies / HZ;
3478 if (now != challenge_timestamp) {
3479 u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1;
3480
3481 challenge_timestamp = now;
3482 WRITE_ONCE(challenge_count, half +
3483 prandom_u32_max(sysctl_tcp_challenge_ack_limit));
3484 }
3485 count = READ_ONCE(challenge_count);
3486 if (count > 0) {
3487 WRITE_ONCE(challenge_count, count - 1);
3488 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
3489 tcp_send_ack(sk);
3490 }
3491}
3492
3493static void tcp_store_ts_recent(struct tcp_sock *tp)
3494{
3495 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
3496 tp->rx_opt.ts_recent_stamp = get_seconds();
3497}
3498
3499static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3500{
3501 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
3502
3503
3504
3505
3506
3507
3508
3509 if (tcp_paws_check(&tp->rx_opt, 0))
3510 tcp_store_ts_recent(tp);
3511 }
3512}
3513
3514
3515
3516
3517
3518
3519static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
3520{
3521 struct tcp_sock *tp = tcp_sk(sk);
3522
3523 if (before(ack, tp->tlp_high_seq))
3524 return;
3525
3526 if (flag & FLAG_DSACKING_ACK) {
3527
3528 tp->tlp_high_seq = 0;
3529 } else if (after(ack, tp->tlp_high_seq)) {
3530
3531
3532
3533 tcp_init_cwnd_reduction(sk);
3534 tcp_set_ca_state(sk, TCP_CA_CWR);
3535 tcp_end_cwnd_reduction(sk);
3536 tcp_try_keep_open(sk);
3537 NET_INC_STATS(sock_net(sk),
3538 LINUX_MIB_TCPLOSSPROBERECOVERY);
3539 } else if (!(flag & (FLAG_SND_UNA_ADVANCED |
3540 FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
3541
3542 tp->tlp_high_seq = 0;
3543 }
3544}
3545
3546static inline void tcp_in_ack_event(struct sock *sk, u32 flags)
3547{
3548 const struct inet_connection_sock *icsk = inet_csk(sk);
3549
3550 if (icsk->icsk_ca_ops->in_ack_event)
3551 icsk->icsk_ca_ops->in_ack_event(sk, flags);
3552}
3553
3554
3555
3556
3557
3558static void tcp_xmit_recovery(struct sock *sk, int rexmit)
3559{
3560 struct tcp_sock *tp = tcp_sk(sk);
3561
3562 if (rexmit == REXMIT_NONE)
3563 return;
3564
3565 if (unlikely(rexmit == 2)) {
3566 __tcp_push_pending_frames(sk, tcp_current_mss(sk),
3567 TCP_NAGLE_OFF);
3568 if (after(tp->snd_nxt, tp->high_seq))
3569 return;
3570 tp->frto = 0;
3571 }
3572 tcp_xmit_retransmit_queue(sk);
3573}
3574
3575
3576static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3577{
3578 struct inet_connection_sock *icsk = inet_csk(sk);
3579 struct tcp_sock *tp = tcp_sk(sk);
3580 struct tcp_sacktag_state sack_state;
3581 u32 prior_snd_una = tp->snd_una;
3582 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3583 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3584 bool is_dupack = false;
3585 u32 prior_fackets;
3586 int prior_packets = tp->packets_out;
3587 u32 prior_delivered = tp->delivered;
3588 int acked = 0;
3589 int rexmit = REXMIT_NONE;
3590
3591 sack_state.first_sackt.v64 = 0;
3592
3593
3594 prefetchw(sk->sk_write_queue.next);
3595
3596
3597
3598
3599 if (before(ack, prior_snd_una)) {
3600
3601 if (before(ack, prior_snd_una - tp->max_window)) {
3602 tcp_send_challenge_ack(sk, skb);
3603 return -1;
3604 }
3605 goto old_ack;
3606 }
3607
3608
3609
3610
3611 if (after(ack, tp->snd_nxt))
3612 goto invalid_ack;
3613
3614 if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
3615 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
3616 tcp_rearm_rto(sk);
3617
3618 if (after(ack, prior_snd_una)) {
3619 flag |= FLAG_SND_UNA_ADVANCED;
3620 icsk->icsk_retransmits = 0;
3621 }
3622
3623 prior_fackets = tp->fackets_out;
3624
3625
3626
3627
3628 if (flag & FLAG_UPDATE_TS_RECENT)
3629 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
3630
3631 if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
3632
3633
3634
3635
3636 tcp_update_wl(tp, ack_seq);
3637 tcp_snd_una_update(tp, ack);
3638 flag |= FLAG_WIN_UPDATE;
3639
3640 tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
3641
3642 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
3643 } else {
3644 u32 ack_ev_flags = CA_ACK_SLOWPATH;
3645
3646 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3647 flag |= FLAG_DATA;
3648 else
3649 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
3650
3651 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3652
3653 if (TCP_SKB_CB(skb)->sacked)
3654 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3655 &sack_state);
3656
3657 if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
3658 flag |= FLAG_ECE;
3659 ack_ev_flags |= CA_ACK_ECE;
3660 }
3661
3662 if (flag & FLAG_WIN_UPDATE)
3663 ack_ev_flags |= CA_ACK_WIN_UPDATE;
3664
3665 tcp_in_ack_event(sk, ack_ev_flags);
3666 }
3667
3668
3669
3670
3671 sk->sk_err_soft = 0;
3672 icsk->icsk_probes_out = 0;
3673 tp->rcv_tstamp = tcp_time_stamp;
3674 if (!prior_packets)
3675 goto no_queue;
3676
3677
3678 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
3679 &sack_state);
3680
3681 if (tcp_ack_is_dubious(sk, flag)) {
3682 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3683 tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
3684 }
3685 if (tp->tlp_high_seq)
3686 tcp_process_tlp_ack(sk, ack, flag);
3687
3688 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
3689 struct dst_entry *dst = __sk_dst_get(sk);
3690 if (dst)
3691 dst_confirm(dst);
3692 }
3693
3694 if (icsk->icsk_pending == ICSK_TIME_RETRANS)
3695 tcp_schedule_loss_probe(sk);
3696 tcp_cong_control(sk, ack, tp->delivered - prior_delivered, flag);
3697 tcp_xmit_recovery(sk, rexmit);
3698 return 1;
3699
3700no_queue:
3701
3702 if (flag & FLAG_DSACKING_ACK)
3703 tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
3704
3705
3706
3707
3708 if (tcp_send_head(sk))
3709 tcp_ack_probe(sk);
3710
3711 if (tp->tlp_high_seq)
3712 tcp_process_tlp_ack(sk, ack, flag);
3713 return 1;
3714
3715invalid_ack:
3716 SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3717 return -1;
3718
3719old_ack:
3720
3721
3722
3723 if (TCP_SKB_CB(skb)->sacked) {
3724 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3725 &sack_state);
3726 tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
3727 tcp_xmit_recovery(sk, rexmit);
3728 }
3729
3730 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3731 return 0;
3732}
3733
3734static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
3735 bool syn, struct tcp_fastopen_cookie *foc,
3736 bool exp_opt)
3737{
3738
3739 if (!foc || !syn || len < 0 || (len & 1))
3740 return;
3741
3742 if (len >= TCP_FASTOPEN_COOKIE_MIN &&
3743 len <= TCP_FASTOPEN_COOKIE_MAX)
3744 memcpy(foc->val, cookie, len);
3745 else if (len != 0)
3746 len = -1;
3747 foc->len = len;
3748 foc->exp = exp_opt;
3749}
3750
3751
3752
3753
3754
3755void tcp_parse_options(const struct sk_buff *skb,
3756 struct tcp_options_received *opt_rx, int estab,
3757 struct tcp_fastopen_cookie *foc)
3758{
3759 const unsigned char *ptr;
3760 const struct tcphdr *th = tcp_hdr(skb);
3761 int length = (th->doff * 4) - sizeof(struct tcphdr);
3762
3763 ptr = (const unsigned char *)(th + 1);
3764 opt_rx->saw_tstamp = 0;
3765
3766 while (length > 0) {
3767 int opcode = *ptr++;
3768 int opsize;
3769
3770 switch (opcode) {
3771 case TCPOPT_EOL:
3772 return;
3773 case TCPOPT_NOP:
3774 length--;
3775 continue;
3776 default:
3777 opsize = *ptr++;
3778 if (opsize < 2)
3779 return;
3780 if (opsize > length)
3781 return;
3782 switch (opcode) {
3783 case TCPOPT_MSS:
3784 if (opsize == TCPOLEN_MSS && th->syn && !estab) {
3785 u16 in_mss = get_unaligned_be16(ptr);
3786 if (in_mss) {
3787 if (opt_rx->user_mss &&
3788 opt_rx->user_mss < in_mss)
3789 in_mss = opt_rx->user_mss;
3790 opt_rx->mss_clamp = in_mss;
3791 }
3792 }
3793 break;
3794 case TCPOPT_WINDOW:
3795 if (opsize == TCPOLEN_WINDOW && th->syn &&
3796 !estab && sysctl_tcp_window_scaling) {
3797 __u8 snd_wscale = *(__u8 *)ptr;
3798 opt_rx->wscale_ok = 1;
3799 if (snd_wscale > 14) {
3800 net_info_ratelimited("%s: Illegal window scaling value %d >14 received\n",
3801 __func__,
3802 snd_wscale);
3803 snd_wscale = 14;
3804 }
3805 opt_rx->snd_wscale = snd_wscale;
3806 }
3807 break;
3808 case TCPOPT_TIMESTAMP:
3809 if ((opsize == TCPOLEN_TIMESTAMP) &&
3810 ((estab && opt_rx->tstamp_ok) ||
3811 (!estab && sysctl_tcp_timestamps))) {
3812 opt_rx->saw_tstamp = 1;
3813 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
3814 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
3815 }
3816 break;
3817 case TCPOPT_SACK_PERM:
3818 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3819 !estab && sysctl_tcp_sack) {
3820 opt_rx->sack_ok = TCP_SACK_SEEN;
3821 tcp_sack_reset(opt_rx);
3822 }
3823 break;
3824
3825 case TCPOPT_SACK:
3826 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
3827 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
3828 opt_rx->sack_ok) {
3829 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
3830 }
3831 break;
3832#ifdef CONFIG_TCP_MD5SIG
3833 case TCPOPT_MD5SIG:
3834
3835
3836
3837
3838 break;
3839#endif
3840 case TCPOPT_FASTOPEN:
3841 tcp_parse_fastopen_option(
3842 opsize - TCPOLEN_FASTOPEN_BASE,
3843 ptr, th->syn, foc, false);
3844 break;
3845
3846 case TCPOPT_EXP:
3847
3848
3849
3850 if (opsize >= TCPOLEN_EXP_FASTOPEN_BASE &&
3851 get_unaligned_be16(ptr) ==
3852 TCPOPT_FASTOPEN_MAGIC)
3853 tcp_parse_fastopen_option(opsize -
3854 TCPOLEN_EXP_FASTOPEN_BASE,
3855 ptr + 2, th->syn, foc, true);
3856 break;
3857
3858 }
3859 ptr += opsize-2;
3860 length -= opsize;
3861 }
3862 }
3863}
3864EXPORT_SYMBOL(tcp_parse_options);
3865
3866static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
3867{
3868 const __be32 *ptr = (const __be32 *)(th + 1);
3869
3870 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
3871 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
3872 tp->rx_opt.saw_tstamp = 1;
3873 ++ptr;
3874 tp->rx_opt.rcv_tsval = ntohl(*ptr);
3875 ++ptr;
3876 if (*ptr)
3877 tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
3878 else
3879 tp->rx_opt.rcv_tsecr = 0;
3880 return true;
3881 }
3882 return false;
3883}
3884
3885
3886
3887
3888static bool tcp_fast_parse_options(const struct sk_buff *skb,
3889 const struct tcphdr *th, struct tcp_sock *tp)
3890{
3891
3892
3893
3894 if (th->doff == (sizeof(*th) / 4)) {
3895 tp->rx_opt.saw_tstamp = 0;
3896 return false;
3897 } else if (tp->rx_opt.tstamp_ok &&
3898 th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
3899 if (tcp_parse_aligned_timestamp(tp, th))
3900 return true;
3901 }
3902
3903 tcp_parse_options(skb, &tp->rx_opt, 1, NULL);
3904 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
3905 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
3906
3907 return true;
3908}
3909
3910#ifdef CONFIG_TCP_MD5SIG
3911
3912
3913
3914const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
3915{
3916 int length = (th->doff << 2) - sizeof(*th);
3917 const u8 *ptr = (const u8 *)(th + 1);
3918
3919
3920 if (length < TCPOLEN_MD5SIG)
3921 return NULL;
3922
3923 while (length > 0) {
3924 int opcode = *ptr++;
3925 int opsize;
3926
3927 switch (opcode) {
3928 case TCPOPT_EOL:
3929 return NULL;
3930 case TCPOPT_NOP:
3931 length--;
3932 continue;
3933 default:
3934 opsize = *ptr++;
3935 if (opsize < 2 || opsize > length)
3936 return NULL;
3937 if (opcode == TCPOPT_MD5SIG)
3938 return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
3939 }
3940 ptr += opsize - 2;
3941 length -= opsize;
3942 }
3943 return NULL;
3944}
3945EXPORT_SYMBOL(tcp_parse_md5sig_option);
3946#endif
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
3972{
3973 const struct tcp_sock *tp = tcp_sk(sk);
3974 const struct tcphdr *th = tcp_hdr(skb);
3975 u32 seq = TCP_SKB_CB(skb)->seq;
3976 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3977
3978 return (
3979 (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
3980
3981
3982 ack == tp->snd_una &&
3983
3984
3985 !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
3986
3987
3988 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
3989}
3990
3991static inline bool tcp_paws_discard(const struct sock *sk,
3992 const struct sk_buff *skb)
3993{
3994 const struct tcp_sock *tp = tcp_sk(sk);
3995
3996 return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
3997 !tcp_disordered_ack(sk, skb);
3998}
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
4014{
4015 return !before(end_seq, tp->rcv_wup) &&
4016 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
4017}
4018
4019
4020void tcp_reset(struct sock *sk)
4021{
4022
4023 switch (sk->sk_state) {
4024 case TCP_SYN_SENT:
4025 sk->sk_err = ECONNREFUSED;
4026 break;
4027 case TCP_CLOSE_WAIT:
4028 sk->sk_err = EPIPE;
4029 break;
4030 case TCP_CLOSE:
4031 return;
4032 default:
4033 sk->sk_err = ECONNRESET;
4034 }
4035
4036 smp_wmb();
4037
4038 if (!sock_flag(sk, SOCK_DEAD))
4039 sk->sk_error_report(sk);
4040
4041 tcp_done(sk);
4042}
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058void tcp_fin(struct sock *sk)
4059{
4060 struct tcp_sock *tp = tcp_sk(sk);
4061
4062 inet_csk_schedule_ack(sk);
4063
4064 sk->sk_shutdown |= RCV_SHUTDOWN;
4065 sock_set_flag(sk, SOCK_DONE);
4066
4067 switch (sk->sk_state) {
4068 case TCP_SYN_RECV:
4069 case TCP_ESTABLISHED:
4070
4071 tcp_set_state(sk, TCP_CLOSE_WAIT);
4072 inet_csk(sk)->icsk_ack.pingpong = 1;
4073 break;
4074
4075 case TCP_CLOSE_WAIT:
4076 case TCP_CLOSING:
4077
4078
4079
4080 break;
4081 case TCP_LAST_ACK:
4082
4083 break;
4084
4085 case TCP_FIN_WAIT1:
4086
4087
4088
4089
4090 tcp_send_ack(sk);
4091 tcp_set_state(sk, TCP_CLOSING);
4092 break;
4093 case TCP_FIN_WAIT2:
4094
4095 tcp_send_ack(sk);
4096 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
4097 break;
4098 default:
4099
4100
4101
4102 pr_err("%s: Impossible, sk->sk_state=%d\n",
4103 __func__, sk->sk_state);
4104 break;
4105 }
4106
4107
4108
4109
4110 __skb_queue_purge(&tp->out_of_order_queue);
4111 if (tcp_is_sack(tp))
4112 tcp_sack_reset(&tp->rx_opt);
4113 sk_mem_reclaim(sk);
4114
4115 if (!sock_flag(sk, SOCK_DEAD)) {
4116 sk->sk_state_change(sk);
4117
4118
4119 if (sk->sk_shutdown == SHUTDOWN_MASK ||
4120 sk->sk_state == TCP_CLOSE)
4121 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
4122 else
4123 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
4124 }
4125}
4126
4127static inline bool tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
4128 u32 end_seq)
4129{
4130 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
4131 if (before(seq, sp->start_seq))
4132 sp->start_seq = seq;
4133 if (after(end_seq, sp->end_seq))
4134 sp->end_seq = end_seq;
4135 return true;
4136 }
4137 return false;
4138}
4139
4140static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4141{
4142 struct tcp_sock *tp = tcp_sk(sk);
4143
4144 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
4145 int mib_idx;
4146
4147 if (before(seq, tp->rcv_nxt))
4148 mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
4149 else
4150 mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
4151
4152 NET_INC_STATS(sock_net(sk), mib_idx);
4153
4154 tp->rx_opt.dsack = 1;
4155 tp->duplicate_sack[0].start_seq = seq;
4156 tp->duplicate_sack[0].end_seq = end_seq;
4157 }
4158}
4159
4160static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
4161{
4162 struct tcp_sock *tp = tcp_sk(sk);
4163
4164 if (!tp->rx_opt.dsack)
4165 tcp_dsack_set(sk, seq, end_seq);
4166 else
4167 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
4168}
4169
4170static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
4171{
4172 struct tcp_sock *tp = tcp_sk(sk);
4173
4174 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
4175 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4176 NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4177 tcp_enter_quickack_mode(sk);
4178
4179 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
4180 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4181
4182 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
4183 end_seq = tp->rcv_nxt;
4184 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
4185 }
4186 }
4187
4188 tcp_send_ack(sk);
4189}
4190
4191
4192
4193
4194static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
4195{
4196 int this_sack;
4197 struct tcp_sack_block *sp = &tp->selective_acks[0];
4198 struct tcp_sack_block *swalk = sp + 1;
4199
4200
4201
4202
4203 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
4204 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
4205 int i;
4206
4207
4208
4209
4210 tp->rx_opt.num_sacks--;
4211 for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
4212 sp[i] = sp[i + 1];
4213 continue;
4214 }
4215 this_sack++, swalk++;
4216 }
4217}
4218
4219static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
4220{
4221 struct tcp_sock *tp = tcp_sk(sk);
4222 struct tcp_sack_block *sp = &tp->selective_acks[0];
4223 int cur_sacks = tp->rx_opt.num_sacks;
4224 int this_sack;
4225
4226 if (!cur_sacks)
4227 goto new_sack;
4228
4229 for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
4230 if (tcp_sack_extend(sp, seq, end_seq)) {
4231
4232 for (; this_sack > 0; this_sack--, sp--)
4233 swap(*sp, *(sp - 1));
4234 if (cur_sacks > 1)
4235 tcp_sack_maybe_coalesce(tp);
4236 return;
4237 }
4238 }
4239
4240
4241
4242
4243
4244
4245
4246 if (this_sack >= TCP_NUM_SACKS) {
4247 this_sack--;
4248 tp->rx_opt.num_sacks--;
4249 sp--;
4250 }
4251 for (; this_sack > 0; this_sack--, sp--)
4252 *sp = *(sp - 1);
4253
4254new_sack:
4255
4256 sp->start_seq = seq;
4257 sp->end_seq = end_seq;
4258 tp->rx_opt.num_sacks++;
4259}
4260
4261
4262
4263static void tcp_sack_remove(struct tcp_sock *tp)
4264{
4265 struct tcp_sack_block *sp = &tp->selective_acks[0];
4266 int num_sacks = tp->rx_opt.num_sacks;
4267 int this_sack;
4268
4269
4270 if (skb_queue_empty(&tp->out_of_order_queue)) {
4271 tp->rx_opt.num_sacks = 0;
4272 return;
4273 }
4274
4275 for (this_sack = 0; this_sack < num_sacks;) {
4276
4277 if (!before(tp->rcv_nxt, sp->start_seq)) {
4278 int i;
4279
4280
4281 WARN_ON(before(tp->rcv_nxt, sp->end_seq));
4282
4283
4284 for (i = this_sack+1; i < num_sacks; i++)
4285 tp->selective_acks[i-1] = tp->selective_acks[i];
4286 num_sacks--;
4287 continue;
4288 }
4289 this_sack++;
4290 sp++;
4291 }
4292 tp->rx_opt.num_sacks = num_sacks;
4293}
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308static bool tcp_try_coalesce(struct sock *sk,
4309 struct sk_buff *to,
4310 struct sk_buff *from,
4311 bool *fragstolen)
4312{
4313 int delta;
4314
4315 *fragstolen = false;
4316
4317
4318 if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
4319 return false;
4320
4321 if (!skb_try_coalesce(to, from, fragstolen, &delta))
4322 return false;
4323
4324 atomic_add(delta, &sk->sk_rmem_alloc);
4325 sk_mem_charge(sk, delta);
4326 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
4327 TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
4328 TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
4329 TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
4330 return true;
4331}
4332
4333static void tcp_drop(struct sock *sk, struct sk_buff *skb)
4334{
4335 sk_drops_add(sk, skb);
4336 __kfree_skb(skb);
4337}
4338
4339
4340
4341
4342static void tcp_ofo_queue(struct sock *sk)
4343{
4344 struct tcp_sock *tp = tcp_sk(sk);
4345 __u32 dsack_high = tp->rcv_nxt;
4346 struct sk_buff *skb, *tail;
4347 bool fragstolen, eaten;
4348
4349 while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
4350 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
4351 break;
4352
4353 if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
4354 __u32 dsack = dsack_high;
4355 if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
4356 dsack_high = TCP_SKB_CB(skb)->end_seq;
4357 tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
4358 }
4359
4360 __skb_unlink(skb, &tp->out_of_order_queue);
4361 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4362 SOCK_DEBUG(sk, "ofo packet was already received\n");
4363 tcp_drop(sk, skb);
4364 continue;
4365 }
4366 SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
4367 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4368 TCP_SKB_CB(skb)->end_seq);
4369
4370 tail = skb_peek_tail(&sk->sk_receive_queue);
4371 eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
4372 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
4373 if (!eaten)
4374 __skb_queue_tail(&sk->sk_receive_queue, skb);
4375 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
4376 tcp_fin(sk);
4377 if (eaten)
4378 kfree_skb_partial(skb, fragstolen);
4379 }
4380}
4381
4382static bool tcp_prune_ofo_queue(struct sock *sk);
4383static int tcp_prune_queue(struct sock *sk);
4384
4385static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
4386 unsigned int size)
4387{
4388 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
4389 !sk_rmem_schedule(sk, skb, size)) {
4390
4391 if (tcp_prune_queue(sk) < 0)
4392 return -1;
4393
4394 if (!sk_rmem_schedule(sk, skb, size)) {
4395 if (!tcp_prune_ofo_queue(sk))
4396 return -1;
4397
4398 if (!sk_rmem_schedule(sk, skb, size))
4399 return -1;
4400 }
4401 }
4402 return 0;
4403}
4404
4405static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4406{
4407 struct tcp_sock *tp = tcp_sk(sk);
4408 struct sk_buff *skb1;
4409 u32 seq, end_seq;
4410
4411 tcp_ecn_check_ce(tp, skb);
4412
4413 if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
4414 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
4415 tcp_drop(sk, skb);
4416 return;
4417 }
4418
4419
4420 tp->pred_flags = 0;
4421 inet_csk_schedule_ack(sk);
4422
4423 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
4424 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
4425 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4426
4427 skb1 = skb_peek_tail(&tp->out_of_order_queue);
4428 if (!skb1) {
4429
4430 if (tcp_is_sack(tp)) {
4431 tp->rx_opt.num_sacks = 1;
4432 tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
4433 tp->selective_acks[0].end_seq =
4434 TCP_SKB_CB(skb)->end_seq;
4435 }
4436 __skb_queue_head(&tp->out_of_order_queue, skb);
4437 goto end;
4438 }
4439
4440 seq = TCP_SKB_CB(skb)->seq;
4441 end_seq = TCP_SKB_CB(skb)->end_seq;
4442
4443 if (seq == TCP_SKB_CB(skb1)->end_seq) {
4444 bool fragstolen;
4445
4446 if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
4447 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4448 } else {
4449 tcp_grow_window(sk, skb);
4450 kfree_skb_partial(skb, fragstolen);
4451 skb = NULL;
4452 }
4453
4454 if (!tp->rx_opt.num_sacks ||
4455 tp->selective_acks[0].end_seq != seq)
4456 goto add_sack;
4457
4458
4459 tp->selective_acks[0].end_seq = end_seq;
4460 goto end;
4461 }
4462
4463
4464 while (1) {
4465 if (!after(TCP_SKB_CB(skb1)->seq, seq))
4466 break;
4467 if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
4468 skb1 = NULL;
4469 break;
4470 }
4471 skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
4472 }
4473
4474
4475 if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4476 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4477
4478 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
4479 tcp_drop(sk, skb);
4480 skb = NULL;
4481 tcp_dsack_set(sk, seq, end_seq);
4482 goto add_sack;
4483 }
4484 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4485
4486 tcp_dsack_set(sk, seq,
4487 TCP_SKB_CB(skb1)->end_seq);
4488 } else {
4489 if (skb_queue_is_first(&tp->out_of_order_queue,
4490 skb1))
4491 skb1 = NULL;
4492 else
4493 skb1 = skb_queue_prev(
4494 &tp->out_of_order_queue,
4495 skb1);
4496 }
4497 }
4498 if (!skb1)
4499 __skb_queue_head(&tp->out_of_order_queue, skb);
4500 else
4501 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4502
4503
4504 while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
4505 skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
4506
4507 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4508 break;
4509 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4510 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4511 end_seq);
4512 break;
4513 }
4514 __skb_unlink(skb1, &tp->out_of_order_queue);
4515 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4516 TCP_SKB_CB(skb1)->end_seq);
4517 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
4518 tcp_drop(sk, skb1);
4519 }
4520
4521add_sack:
4522 if (tcp_is_sack(tp))
4523 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4524end:
4525 if (skb) {
4526 tcp_grow_window(sk, skb);
4527 skb_set_owner_r(skb, sk);
4528 }
4529}
4530
4531static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
4532 bool *fragstolen)
4533{
4534 int eaten;
4535 struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
4536
4537 __skb_pull(skb, hdrlen);
4538 eaten = (tail &&
4539 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
4540 tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
4541 if (!eaten) {
4542 __skb_queue_tail(&sk->sk_receive_queue, skb);
4543 skb_set_owner_r(skb, sk);
4544 }
4545 return eaten;
4546}
4547
4548int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
4549{
4550 struct sk_buff *skb;
4551 int err = -ENOMEM;
4552 int data_len = 0;
4553 bool fragstolen;
4554
4555 if (size == 0)
4556 return 0;
4557
4558 if (size > PAGE_SIZE) {
4559 int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS);
4560
4561 data_len = npages << PAGE_SHIFT;
4562 size = data_len + (size & ~PAGE_MASK);
4563 }
4564 skb = alloc_skb_with_frags(size - data_len, data_len,
4565 PAGE_ALLOC_COSTLY_ORDER,
4566 &err, sk->sk_allocation);
4567 if (!skb)
4568 goto err;
4569
4570 skb_put(skb, size - data_len);
4571 skb->data_len = data_len;
4572 skb->len = size;
4573
4574 if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
4575 goto err_free;
4576
4577 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
4578 if (err)
4579 goto err_free;
4580
4581 TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
4582 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
4583 TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
4584
4585 if (tcp_queue_rcv(sk, skb, 0, &fragstolen)) {
4586 WARN_ON_ONCE(fragstolen);
4587 __kfree_skb(skb);
4588 }
4589 return size;
4590
4591err_free:
4592 kfree_skb(skb);
4593err:
4594 return err;
4595
4596}
4597
4598static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4599{
4600 struct tcp_sock *tp = tcp_sk(sk);
4601 bool fragstolen = false;
4602 int eaten = -1;
4603
4604 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
4605 __kfree_skb(skb);
4606 return;
4607 }
4608 skb_dst_drop(skb);
4609 __skb_pull(skb, tcp_hdr(skb)->doff * 4);
4610
4611 tcp_ecn_accept_cwr(tp, skb);
4612
4613 tp->rx_opt.dsack = 0;
4614
4615
4616
4617
4618
4619 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
4620 if (tcp_receive_window(tp) == 0)
4621 goto out_of_window;
4622
4623
4624 if (tp->ucopy.task == current &&
4625 tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
4626 sock_owned_by_user(sk) && !tp->urg_data) {
4627 int chunk = min_t(unsigned int, skb->len,
4628 tp->ucopy.len);
4629
4630 __set_current_state(TASK_RUNNING);
4631
4632 if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) {
4633 tp->ucopy.len -= chunk;
4634 tp->copied_seq += chunk;
4635 eaten = (chunk == skb->len);
4636 tcp_rcv_space_adjust(sk);
4637 }
4638 }
4639
4640 if (eaten <= 0) {
4641queue_and_out:
4642 if (eaten < 0) {
4643 if (skb_queue_len(&sk->sk_receive_queue) == 0)
4644 sk_forced_mem_schedule(sk, skb->truesize);
4645 else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
4646 goto drop;
4647 }
4648 eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
4649 }
4650 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
4651 if (skb->len)
4652 tcp_event_data_recv(sk, skb);
4653 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
4654 tcp_fin(sk);
4655
4656 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4657 tcp_ofo_queue(sk);
4658
4659
4660
4661
4662 if (skb_queue_empty(&tp->out_of_order_queue))
4663 inet_csk(sk)->icsk_ack.pingpong = 0;
4664 }
4665
4666 if (tp->rx_opt.num_sacks)
4667 tcp_sack_remove(tp);
4668
4669 tcp_fast_path_check(sk);
4670
4671 if (eaten > 0)
4672 kfree_skb_partial(skb, fragstolen);
4673 if (!sock_flag(sk, SOCK_DEAD))
4674 sk->sk_data_ready(sk);
4675 return;
4676 }
4677
4678 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4679
4680 NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4681 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4682
4683out_of_window:
4684 tcp_enter_quickack_mode(sk);
4685 inet_csk_schedule_ack(sk);
4686drop:
4687 tcp_drop(sk, skb);
4688 return;
4689 }
4690
4691
4692 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
4693 goto out_of_window;
4694
4695 tcp_enter_quickack_mode(sk);
4696
4697 if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4698
4699 SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
4700 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4701 TCP_SKB_CB(skb)->end_seq);
4702
4703 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
4704
4705
4706
4707
4708 if (!tcp_receive_window(tp))
4709 goto out_of_window;
4710 goto queue_and_out;
4711 }
4712
4713 tcp_data_queue_ofo(sk, skb);
4714}
4715
4716static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
4717 struct sk_buff_head *list)
4718{
4719 struct sk_buff *next = NULL;
4720
4721 if (!skb_queue_is_last(list, skb))
4722 next = skb_queue_next(list, skb);
4723
4724 __skb_unlink(skb, list);
4725 __kfree_skb(skb);
4726 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
4727
4728 return next;
4729}
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739static void
4740tcp_collapse(struct sock *sk, struct sk_buff_head *list,
4741 struct sk_buff *head, struct sk_buff *tail,
4742 u32 start, u32 end)
4743{
4744 struct sk_buff *skb, *n;
4745 bool end_of_skbs;
4746
4747
4748
4749 skb = head;
4750restart:
4751 end_of_skbs = true;
4752 skb_queue_walk_from_safe(list, skb, n) {
4753 if (skb == tail)
4754 break;
4755
4756 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4757 skb = tcp_collapse_one(sk, skb, list);
4758 if (!skb)
4759 break;
4760 goto restart;
4761 }
4762
4763
4764
4765
4766
4767
4768 if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
4769 (tcp_win_from_space(skb->truesize) > skb->len ||
4770 before(TCP_SKB_CB(skb)->seq, start))) {
4771 end_of_skbs = false;
4772 break;
4773 }
4774
4775 if (!skb_queue_is_last(list, skb)) {
4776 struct sk_buff *next = skb_queue_next(list, skb);
4777 if (next != tail &&
4778 TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) {
4779 end_of_skbs = false;
4780 break;
4781 }
4782 }
4783
4784
4785 start = TCP_SKB_CB(skb)->end_seq;
4786 }
4787 if (end_of_skbs ||
4788 (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
4789 return;
4790
4791 while (before(start, end)) {
4792 int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start);
4793 struct sk_buff *nskb;
4794
4795 nskb = alloc_skb(copy, GFP_ATOMIC);
4796 if (!nskb)
4797 return;
4798
4799 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
4800 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
4801 __skb_queue_before(list, skb, nskb);
4802 skb_set_owner_r(nskb, sk);
4803
4804
4805 while (copy > 0) {
4806 int offset = start - TCP_SKB_CB(skb)->seq;
4807 int size = TCP_SKB_CB(skb)->end_seq - start;
4808
4809 BUG_ON(offset < 0);
4810 if (size > 0) {
4811 size = min(copy, size);
4812 if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
4813 BUG();
4814 TCP_SKB_CB(nskb)->end_seq += size;
4815 copy -= size;
4816 start += size;
4817 }
4818 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4819 skb = tcp_collapse_one(sk, skb, list);
4820 if (!skb ||
4821 skb == tail ||
4822 (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
4823 return;
4824 }
4825 }
4826 }
4827}
4828
4829
4830
4831
4832static void tcp_collapse_ofo_queue(struct sock *sk)
4833{
4834 struct tcp_sock *tp = tcp_sk(sk);
4835 struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
4836 struct sk_buff *head;
4837 u32 start, end;
4838
4839 if (!skb)
4840 return;
4841
4842 start = TCP_SKB_CB(skb)->seq;
4843 end = TCP_SKB_CB(skb)->end_seq;
4844 head = skb;
4845
4846 for (;;) {
4847 struct sk_buff *next = NULL;
4848
4849 if (!skb_queue_is_last(&tp->out_of_order_queue, skb))
4850 next = skb_queue_next(&tp->out_of_order_queue, skb);
4851 skb = next;
4852
4853
4854
4855 if (!skb ||
4856 after(TCP_SKB_CB(skb)->seq, end) ||
4857 before(TCP_SKB_CB(skb)->end_seq, start)) {
4858 tcp_collapse(sk, &tp->out_of_order_queue,
4859 head, skb, start, end);
4860 head = skb;
4861 if (!skb)
4862 break;
4863
4864 start = TCP_SKB_CB(skb)->seq;
4865 end = TCP_SKB_CB(skb)->end_seq;
4866 } else {
4867 if (before(TCP_SKB_CB(skb)->seq, start))
4868 start = TCP_SKB_CB(skb)->seq;
4869 if (after(TCP_SKB_CB(skb)->end_seq, end))
4870 end = TCP_SKB_CB(skb)->end_seq;
4871 }
4872 }
4873}
4874
4875
4876
4877
4878
4879static bool tcp_prune_ofo_queue(struct sock *sk)
4880{
4881 struct tcp_sock *tp = tcp_sk(sk);
4882 bool res = false;
4883
4884 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4885 NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
4886 __skb_queue_purge(&tp->out_of_order_queue);
4887
4888
4889
4890
4891
4892
4893 if (tp->rx_opt.sack_ok)
4894 tcp_sack_reset(&tp->rx_opt);
4895 sk_mem_reclaim(sk);
4896 res = true;
4897 }
4898 return res;
4899}
4900
4901
4902
4903
4904
4905
4906
4907
4908static int tcp_prune_queue(struct sock *sk)
4909{
4910 struct tcp_sock *tp = tcp_sk(sk);
4911
4912 SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
4913
4914 NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
4915
4916 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
4917 tcp_clamp_window(sk);
4918 else if (tcp_under_memory_pressure(sk))
4919 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
4920
4921 tcp_collapse_ofo_queue(sk);
4922 if (!skb_queue_empty(&sk->sk_receive_queue))
4923 tcp_collapse(sk, &sk->sk_receive_queue,
4924 skb_peek(&sk->sk_receive_queue),
4925 NULL,
4926 tp->copied_seq, tp->rcv_nxt);
4927 sk_mem_reclaim(sk);
4928
4929 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4930 return 0;
4931
4932
4933
4934
4935 tcp_prune_ofo_queue(sk);
4936
4937 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4938 return 0;
4939
4940
4941
4942
4943
4944 NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED);
4945
4946
4947 tp->pred_flags = 0;
4948 return -1;
4949}
4950
4951static bool tcp_should_expand_sndbuf(const struct sock *sk)
4952{
4953 const struct tcp_sock *tp = tcp_sk(sk);
4954
4955
4956
4957
4958 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
4959 return false;
4960
4961
4962 if (tcp_under_memory_pressure(sk))
4963 return false;
4964
4965
4966 if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
4967 return false;
4968
4969
4970 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
4971 return false;
4972
4973 return true;
4974}
4975
4976
4977
4978
4979
4980
4981
4982static void tcp_new_space(struct sock *sk)
4983{
4984 struct tcp_sock *tp = tcp_sk(sk);
4985
4986 if (tcp_should_expand_sndbuf(sk)) {
4987 tcp_sndbuf_expand(sk);
4988 tp->snd_cwnd_stamp = tcp_time_stamp;
4989 }
4990
4991 sk->sk_write_space(sk);
4992}
4993
4994static void tcp_check_space(struct sock *sk)
4995{
4996 if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
4997 sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
4998
4999 smp_mb__after_atomic();
5000 if (sk->sk_socket &&
5001 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
5002 tcp_new_space(sk);
5003 }
5004}
5005
5006static inline void tcp_data_snd_check(struct sock *sk)
5007{
5008 tcp_push_pending_frames(sk);
5009 tcp_check_space(sk);
5010}
5011
5012
5013
5014
5015static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
5016{
5017 struct tcp_sock *tp = tcp_sk(sk);
5018
5019
5020 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
5021
5022
5023
5024 __tcp_select_window(sk) >= tp->rcv_wnd) ||
5025
5026 tcp_in_quickack_mode(sk) ||
5027
5028 (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
5029
5030 tcp_send_ack(sk);
5031 } else {
5032
5033 tcp_send_delayed_ack(sk);
5034 }
5035}
5036
5037static inline void tcp_ack_snd_check(struct sock *sk)
5038{
5039 if (!inet_csk_ack_scheduled(sk)) {
5040
5041 return;
5042 }
5043 __tcp_ack_snd_check(sk, 1);
5044}
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
5057{
5058 struct tcp_sock *tp = tcp_sk(sk);
5059 u32 ptr = ntohs(th->urg_ptr);
5060
5061 if (ptr && !sysctl_tcp_stdurg)
5062 ptr--;
5063 ptr += ntohl(th->seq);
5064
5065
5066 if (after(tp->copied_seq, ptr))
5067 return;
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079 if (before(ptr, tp->rcv_nxt))
5080 return;
5081
5082
5083 if (tp->urg_data && !after(ptr, tp->urg_seq))
5084 return;
5085
5086
5087 sk_send_sigurg(sk);
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
5105 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
5106 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
5107 tp->copied_seq++;
5108 if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
5109 __skb_unlink(skb, &sk->sk_receive_queue);
5110 __kfree_skb(skb);
5111 }
5112 }
5113
5114 tp->urg_data = TCP_URG_NOTYET;
5115 tp->urg_seq = ptr;
5116
5117
5118 tp->pred_flags = 0;
5119}
5120
5121
5122static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
5123{
5124 struct tcp_sock *tp = tcp_sk(sk);
5125
5126
5127 if (th->urg)
5128 tcp_check_urg(sk, th);
5129
5130
5131 if (tp->urg_data == TCP_URG_NOTYET) {
5132 u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
5133 th->syn;
5134
5135
5136 if (ptr < skb->len) {
5137 u8 tmp;
5138 if (skb_copy_bits(skb, ptr, &tmp, 1))
5139 BUG();
5140 tp->urg_data = TCP_URG_VALID | tmp;
5141 if (!sock_flag(sk, SOCK_DEAD))
5142 sk->sk_data_ready(sk);
5143 }
5144 }
5145}
5146
5147static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
5148{
5149 struct tcp_sock *tp = tcp_sk(sk);
5150 int chunk = skb->len - hlen;
5151 int err;
5152
5153 if (skb_csum_unnecessary(skb))
5154 err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk);
5155 else
5156 err = skb_copy_and_csum_datagram_msg(skb, hlen, tp->ucopy.msg);
5157
5158 if (!err) {
5159 tp->ucopy.len -= chunk;
5160 tp->copied_seq += chunk;
5161 tcp_rcv_space_adjust(sk);
5162 }
5163
5164 return err;
5165}
5166
5167
5168
5169
5170static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5171 const struct tcphdr *th, int syn_inerr)
5172{
5173 struct tcp_sock *tp = tcp_sk(sk);
5174 bool rst_seq_match = false;
5175
5176
5177 if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
5178 tcp_paws_discard(sk, skb)) {
5179 if (!th->rst) {
5180 NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
5181 if (!tcp_oow_rate_limited(sock_net(sk), skb,
5182 LINUX_MIB_TCPACKSKIPPEDPAWS,
5183 &tp->last_oow_ack_time))
5184 tcp_send_dupack(sk, skb);
5185 goto discard;
5186 }
5187
5188 }
5189
5190
5191 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
5192
5193
5194
5195
5196
5197
5198 if (!th->rst) {
5199 if (th->syn)
5200 goto syn_challenge;
5201 if (!tcp_oow_rate_limited(sock_net(sk), skb,
5202 LINUX_MIB_TCPACKSKIPPEDSEQ,
5203 &tp->last_oow_ack_time))
5204 tcp_send_dupack(sk, skb);
5205 }
5206 goto discard;
5207 }
5208
5209
5210 if (th->rst) {
5211
5212
5213
5214
5215
5216
5217
5218 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
5219 rst_seq_match = true;
5220 } else if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
5221 struct tcp_sack_block *sp = &tp->selective_acks[0];
5222 int max_sack = sp[0].end_seq;
5223 int this_sack;
5224
5225 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;
5226 ++this_sack) {
5227 max_sack = after(sp[this_sack].end_seq,
5228 max_sack) ?
5229 sp[this_sack].end_seq : max_sack;
5230 }
5231
5232 if (TCP_SKB_CB(skb)->seq == max_sack)
5233 rst_seq_match = true;
5234 }
5235
5236 if (rst_seq_match)
5237 tcp_reset(sk);
5238 else
5239 tcp_send_challenge_ack(sk, skb);
5240 goto discard;
5241 }
5242
5243
5244
5245
5246
5247
5248 if (th->syn) {
5249syn_challenge:
5250 if (syn_inerr)
5251 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
5252 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
5253 tcp_send_challenge_ack(sk, skb);
5254 goto discard;
5255 }
5256
5257 return true;
5258
5259discard:
5260 tcp_drop(sk, skb);
5261 return false;
5262}
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5288 const struct tcphdr *th, unsigned int len)
5289{
5290 struct tcp_sock *tp = tcp_sk(sk);
5291
5292 if (unlikely(!sk->sk_rx_dst))
5293 inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309 tp->rx_opt.saw_tstamp = 0;
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
5321 TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
5322 !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
5323 int tcp_header_len = tp->tcp_header_len;
5324
5325
5326
5327
5328
5329
5330
5331 if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
5332
5333 if (!tcp_parse_aligned_timestamp(tp, th))
5334 goto slow_path;
5335
5336
5337 if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
5338 goto slow_path;
5339
5340
5341
5342
5343
5344
5345 }
5346
5347 if (len <= tcp_header_len) {
5348
5349 if (len == tcp_header_len) {
5350
5351
5352
5353
5354 if (tcp_header_len ==
5355 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5356 tp->rcv_nxt == tp->rcv_wup)
5357 tcp_store_ts_recent(tp);
5358
5359
5360
5361
5362 tcp_ack(sk, skb, 0);
5363 __kfree_skb(skb);
5364 tcp_data_snd_check(sk);
5365 return;
5366 } else {
5367 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
5368 goto discard;
5369 }
5370 } else {
5371 int eaten = 0;
5372 bool fragstolen = false;
5373
5374 if (tp->ucopy.task == current &&
5375 tp->copied_seq == tp->rcv_nxt &&
5376 len - tcp_header_len <= tp->ucopy.len &&
5377 sock_owned_by_user(sk)) {
5378 __set_current_state(TASK_RUNNING);
5379
5380 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) {
5381
5382
5383
5384
5385 if (tcp_header_len ==
5386 (sizeof(struct tcphdr) +
5387 TCPOLEN_TSTAMP_ALIGNED) &&
5388 tp->rcv_nxt == tp->rcv_wup)
5389 tcp_store_ts_recent(tp);
5390
5391 tcp_rcv_rtt_measure_ts(sk, skb);
5392
5393 __skb_pull(skb, tcp_header_len);
5394 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
5395 NET_INC_STATS(sock_net(sk),
5396 LINUX_MIB_TCPHPHITSTOUSER);
5397 eaten = 1;
5398 }
5399 }
5400 if (!eaten) {
5401 if (tcp_checksum_complete(skb))
5402 goto csum_error;
5403
5404 if ((int)skb->truesize > sk->sk_forward_alloc)
5405 goto step5;
5406
5407
5408
5409
5410
5411 if (tcp_header_len ==
5412 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5413 tp->rcv_nxt == tp->rcv_wup)
5414 tcp_store_ts_recent(tp);
5415
5416 tcp_rcv_rtt_measure_ts(sk, skb);
5417
5418 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
5419
5420
5421 eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
5422 &fragstolen);
5423 }
5424
5425 tcp_event_data_recv(sk, skb);
5426
5427 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
5428
5429 tcp_ack(sk, skb, FLAG_DATA);
5430 tcp_data_snd_check(sk);
5431 if (!inet_csk_ack_scheduled(sk))
5432 goto no_ack;
5433 }
5434
5435 __tcp_ack_snd_check(sk, 0);
5436no_ack:
5437 if (eaten)
5438 kfree_skb_partial(skb, fragstolen);
5439 sk->sk_data_ready(sk);
5440 return;
5441 }
5442 }
5443
5444slow_path:
5445 if (len < (th->doff << 2) || tcp_checksum_complete(skb))
5446 goto csum_error;
5447
5448 if (!th->ack && !th->rst && !th->syn)
5449 goto discard;
5450
5451
5452
5453
5454
5455 if (!tcp_validate_incoming(sk, skb, th, 1))
5456 return;
5457
5458step5:
5459 if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
5460 goto discard;
5461
5462 tcp_rcv_rtt_measure_ts(sk, skb);
5463
5464
5465 tcp_urg(sk, skb, th);
5466
5467
5468 tcp_data_queue(sk, skb);
5469
5470 tcp_data_snd_check(sk);
5471 tcp_ack_snd_check(sk);
5472 return;
5473
5474csum_error:
5475 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
5476 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
5477
5478discard:
5479 tcp_drop(sk, skb);
5480}
5481EXPORT_SYMBOL(tcp_rcv_established);
5482
5483void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
5484{
5485 struct tcp_sock *tp = tcp_sk(sk);
5486 struct inet_connection_sock *icsk = inet_csk(sk);
5487
5488 tcp_set_state(sk, TCP_ESTABLISHED);
5489
5490 if (skb) {
5491 icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
5492 security_inet_conn_established(sk, skb);
5493 }
5494
5495
5496 icsk->icsk_af_ops->rebuild_header(sk);
5497
5498 tcp_init_metrics(sk);
5499
5500 tcp_init_congestion_control(sk);
5501
5502
5503
5504
5505 tp->lsndtime = tcp_time_stamp;
5506
5507 tcp_init_buffer_space(sk);
5508
5509 if (sock_flag(sk, SOCK_KEEPOPEN))
5510 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
5511
5512 if (!tp->rx_opt.snd_wscale)
5513 __tcp_fast_path_on(tp, tp->snd_wnd);
5514 else
5515 tp->pred_flags = 0;
5516
5517 if (!sock_flag(sk, SOCK_DEAD)) {
5518 sk->sk_state_change(sk);
5519 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
5520 }
5521}
5522
5523static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5524 struct tcp_fastopen_cookie *cookie)
5525{
5526 struct tcp_sock *tp = tcp_sk(sk);
5527 struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL;
5528 u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
5529 bool syn_drop = false;
5530
5531 if (mss == tp->rx_opt.user_mss) {
5532 struct tcp_options_received opt;
5533
5534
5535 tcp_clear_options(&opt);
5536 opt.user_mss = opt.mss_clamp = 0;
5537 tcp_parse_options(synack, &opt, 0, NULL);
5538 mss = opt.mss_clamp;
5539 }
5540
5541 if (!tp->syn_fastopen) {
5542
5543 cookie->len = -1;
5544 } else if (tp->total_retrans) {
5545
5546
5547
5548
5549
5550 syn_drop = (cookie->len < 0 && data);
5551 } else if (cookie->len < 0 && !tp->syn_data) {
5552
5553
5554
5555
5556 try_exp = tp->syn_fastopen_exp ? 2 : 1;
5557 }
5558
5559 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
5560
5561 if (data) {
5562 tcp_for_write_queue_from(data, sk) {
5563 if (data == tcp_send_head(sk) ||
5564 __tcp_retransmit_skb(sk, data, 1))
5565 break;
5566 }
5567 tcp_rearm_rto(sk);
5568 NET_INC_STATS(sock_net(sk),
5569 LINUX_MIB_TCPFASTOPENACTIVEFAIL);
5570 return true;
5571 }
5572 tp->syn_data_acked = tp->syn_data;
5573 if (tp->syn_data_acked)
5574 NET_INC_STATS(sock_net(sk),
5575 LINUX_MIB_TCPFASTOPENACTIVE);
5576
5577 tcp_fastopen_add_skb(sk, synack);
5578
5579 return false;
5580}
5581
5582static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5583 const struct tcphdr *th)
5584{
5585 struct inet_connection_sock *icsk = inet_csk(sk);
5586 struct tcp_sock *tp = tcp_sk(sk);
5587 struct tcp_fastopen_cookie foc = { .len = -1 };
5588 int saved_clamp = tp->rx_opt.mss_clamp;
5589
5590 tcp_parse_options(skb, &tp->rx_opt, 0, &foc);
5591 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
5592 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
5593
5594 if (th->ack) {
5595
5596
5597
5598
5599
5600
5601
5602
5603 if (!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_una) ||
5604 after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt))
5605 goto reset_and_undo;
5606
5607 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
5608 !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
5609 tcp_time_stamp)) {
5610 NET_INC_STATS(sock_net(sk),
5611 LINUX_MIB_PAWSACTIVEREJECTED);
5612 goto reset_and_undo;
5613 }
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623 if (th->rst) {
5624 tcp_reset(sk);
5625 goto discard;
5626 }
5627
5628
5629
5630
5631
5632
5633
5634
5635 if (!th->syn)
5636 goto discard_and_undo;
5637
5638
5639
5640
5641
5642
5643
5644
5645 tcp_ecn_rcv_synack(tp, th);
5646
5647 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5648 tcp_ack(sk, skb, FLAG_SLOWPATH);
5649
5650
5651
5652
5653 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5654 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5655
5656
5657
5658
5659 tp->snd_wnd = ntohs(th->window);
5660
5661 if (!tp->rx_opt.wscale_ok) {
5662 tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
5663 tp->window_clamp = min(tp->window_clamp, 65535U);
5664 }
5665
5666 if (tp->rx_opt.saw_tstamp) {
5667 tp->rx_opt.tstamp_ok = 1;
5668 tp->tcp_header_len =
5669 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5670 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5671 tcp_store_ts_recent(tp);
5672 } else {
5673 tp->tcp_header_len = sizeof(struct tcphdr);
5674 }
5675
5676 if (tcp_is_sack(tp) && sysctl_tcp_fack)
5677 tcp_enable_fack(tp);
5678
5679 tcp_mtup_init(sk);
5680 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5681 tcp_initialize_rcv_mss(sk);
5682
5683
5684
5685
5686 tp->copied_seq = tp->rcv_nxt;
5687
5688 smp_mb();
5689
5690 tcp_finish_connect(sk, skb);
5691
5692 if ((tp->syn_fastopen || tp->syn_data) &&
5693 tcp_rcv_fastopen_synack(sk, skb, &foc))
5694 return -1;
5695
5696 if (sk->sk_write_pending ||
5697 icsk->icsk_accept_queue.rskq_defer_accept ||
5698 icsk->icsk_ack.pingpong) {
5699
5700
5701
5702
5703
5704
5705
5706 inet_csk_schedule_ack(sk);
5707 icsk->icsk_ack.lrcvtime = tcp_time_stamp;
5708 tcp_enter_quickack_mode(sk);
5709 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
5710 TCP_DELACK_MAX, TCP_RTO_MAX);
5711
5712discard:
5713 tcp_drop(sk, skb);
5714 return 0;
5715 } else {
5716 tcp_send_ack(sk);
5717 }
5718 return -1;
5719 }
5720
5721
5722
5723 if (th->rst) {
5724
5725
5726
5727
5728
5729
5730 goto discard_and_undo;
5731 }
5732
5733
5734 if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
5735 tcp_paws_reject(&tp->rx_opt, 0))
5736 goto discard_and_undo;
5737
5738 if (th->syn) {
5739
5740
5741
5742
5743 tcp_set_state(sk, TCP_SYN_RECV);
5744
5745 if (tp->rx_opt.saw_tstamp) {
5746 tp->rx_opt.tstamp_ok = 1;
5747 tcp_store_ts_recent(tp);
5748 tp->tcp_header_len =
5749 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5750 } else {
5751 tp->tcp_header_len = sizeof(struct tcphdr);
5752 }
5753
5754 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5755 tp->copied_seq = tp->rcv_nxt;
5756 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5757
5758
5759
5760
5761 tp->snd_wnd = ntohs(th->window);
5762 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
5763 tp->max_window = tp->snd_wnd;
5764
5765 tcp_ecn_rcv_syn(tp, th);
5766
5767 tcp_mtup_init(sk);
5768 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5769 tcp_initialize_rcv_mss(sk);
5770
5771 tcp_send_synack(sk);
5772#if 0
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784 return -1;
5785#else
5786 goto discard;
5787#endif
5788 }
5789
5790
5791
5792
5793discard_and_undo:
5794 tcp_clear_options(&tp->rx_opt);
5795 tp->rx_opt.mss_clamp = saved_clamp;
5796 goto discard;
5797
5798reset_and_undo:
5799 tcp_clear_options(&tp->rx_opt);
5800 tp->rx_opt.mss_clamp = saved_clamp;
5801 return 1;
5802}
5803
5804
5805
5806
5807
5808
5809
5810
5811int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
5812{
5813 struct tcp_sock *tp = tcp_sk(sk);
5814 struct inet_connection_sock *icsk = inet_csk(sk);
5815 const struct tcphdr *th = tcp_hdr(skb);
5816 struct request_sock *req;
5817 int queued = 0;
5818 bool acceptable;
5819
5820 switch (sk->sk_state) {
5821 case TCP_CLOSE:
5822 goto discard;
5823
5824 case TCP_LISTEN:
5825 if (th->ack)
5826 return 1;
5827
5828 if (th->rst)
5829 goto discard;
5830
5831 if (th->syn) {
5832 if (th->fin)
5833 goto discard;
5834 if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
5835 return 1;
5836
5837 consume_skb(skb);
5838 return 0;
5839 }
5840 goto discard;
5841
5842 case TCP_SYN_SENT:
5843 tp->rx_opt.saw_tstamp = 0;
5844 queued = tcp_rcv_synsent_state_process(sk, skb, th);
5845 if (queued >= 0)
5846 return queued;
5847
5848
5849 tcp_urg(sk, skb, th);
5850 __kfree_skb(skb);
5851 tcp_data_snd_check(sk);
5852 return 0;
5853 }
5854
5855 tp->rx_opt.saw_tstamp = 0;
5856 req = tp->fastopen_rsk;
5857 if (req) {
5858 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
5859 sk->sk_state != TCP_FIN_WAIT1);
5860
5861 if (!tcp_check_req(sk, skb, req, true))
5862 goto discard;
5863 }
5864
5865 if (!th->ack && !th->rst && !th->syn)
5866 goto discard;
5867
5868 if (!tcp_validate_incoming(sk, skb, th, 0))
5869 return 0;
5870
5871
5872 acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
5873 FLAG_UPDATE_TS_RECENT) > 0;
5874
5875 switch (sk->sk_state) {
5876 case TCP_SYN_RECV:
5877 if (!acceptable)
5878 return 1;
5879
5880 if (!tp->srtt_us)
5881 tcp_synack_rtt_meas(sk, req);
5882
5883
5884
5885
5886 if (req) {
5887 inet_csk(sk)->icsk_retransmits = 0;
5888 reqsk_fastopen_remove(sk, req, false);
5889 } else {
5890
5891 icsk->icsk_af_ops->rebuild_header(sk);
5892 tcp_init_congestion_control(sk);
5893
5894 tcp_mtup_init(sk);
5895 tp->copied_seq = tp->rcv_nxt;
5896 tcp_init_buffer_space(sk);
5897 }
5898 smp_mb();
5899 tcp_set_state(sk, TCP_ESTABLISHED);
5900 sk->sk_state_change(sk);
5901
5902
5903
5904
5905
5906 if (sk->sk_socket)
5907 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
5908
5909 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
5910 tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
5911 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5912
5913 if (tp->rx_opt.tstamp_ok)
5914 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5915
5916 if (req) {
5917
5918
5919
5920
5921
5922
5923
5924
5925 tcp_rearm_rto(sk);
5926 } else
5927 tcp_init_metrics(sk);
5928
5929 tcp_update_pacing_rate(sk);
5930
5931
5932 tp->lsndtime = tcp_time_stamp;
5933
5934 tcp_initialize_rcv_mss(sk);
5935 tcp_fast_path_on(tp);
5936 break;
5937
5938 case TCP_FIN_WAIT1: {
5939 struct dst_entry *dst;
5940 int tmo;
5941
5942
5943
5944
5945
5946
5947 if (req) {
5948
5949
5950
5951
5952
5953
5954 if (!acceptable)
5955 return 1;
5956
5957 reqsk_fastopen_remove(sk, req, false);
5958 tcp_rearm_rto(sk);
5959 }
5960 if (tp->snd_una != tp->write_seq)
5961 break;
5962
5963 tcp_set_state(sk, TCP_FIN_WAIT2);
5964 sk->sk_shutdown |= SEND_SHUTDOWN;
5965
5966 dst = __sk_dst_get(sk);
5967 if (dst)
5968 dst_confirm(dst);
5969
5970 if (!sock_flag(sk, SOCK_DEAD)) {
5971
5972 sk->sk_state_change(sk);
5973 break;
5974 }
5975
5976 if (tp->linger2 < 0 ||
5977 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5978 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
5979 tcp_done(sk);
5980 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5981 return 1;
5982 }
5983
5984 tmo = tcp_fin_time(sk);
5985 if (tmo > TCP_TIMEWAIT_LEN) {
5986 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
5987 } else if (th->fin || sock_owned_by_user(sk)) {
5988
5989
5990
5991
5992
5993
5994 inet_csk_reset_keepalive_timer(sk, tmo);
5995 } else {
5996 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
5997 goto discard;
5998 }
5999 break;
6000 }
6001
6002 case TCP_CLOSING:
6003 if (tp->snd_una == tp->write_seq) {
6004 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
6005 goto discard;
6006 }
6007 break;
6008
6009 case TCP_LAST_ACK:
6010 if (tp->snd_una == tp->write_seq) {
6011 tcp_update_metrics(sk);
6012 tcp_done(sk);
6013 goto discard;
6014 }
6015 break;
6016 }
6017
6018
6019 tcp_urg(sk, skb, th);
6020
6021
6022 switch (sk->sk_state) {
6023 case TCP_CLOSE_WAIT:
6024 case TCP_CLOSING:
6025 case TCP_LAST_ACK:
6026 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
6027 break;
6028 case TCP_FIN_WAIT1:
6029 case TCP_FIN_WAIT2:
6030
6031
6032
6033
6034 if (sk->sk_shutdown & RCV_SHUTDOWN) {
6035 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
6036 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
6037 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
6038 tcp_reset(sk);
6039 return 1;
6040 }
6041 }
6042
6043 case TCP_ESTABLISHED:
6044 tcp_data_queue(sk, skb);
6045 queued = 1;
6046 break;
6047 }
6048
6049
6050 if (sk->sk_state != TCP_CLOSE) {
6051 tcp_data_snd_check(sk);
6052 tcp_ack_snd_check(sk);
6053 }
6054
6055 if (!queued) {
6056discard:
6057 tcp_drop(sk, skb);
6058 }
6059 return 0;
6060}
6061EXPORT_SYMBOL(tcp_rcv_state_process);
6062
6063static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
6064{
6065 struct inet_request_sock *ireq = inet_rsk(req);
6066
6067 if (family == AF_INET)
6068 net_dbg_ratelimited("drop open request from %pI4/%u\n",
6069 &ireq->ir_rmt_addr, port);
6070#if IS_ENABLED(CONFIG_IPV6)
6071 else if (family == AF_INET6)
6072 net_dbg_ratelimited("drop open request from %pI6/%u\n",
6073 &ireq->ir_v6_rmt_addr, port);
6074#endif
6075}
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089static void tcp_ecn_create_request(struct request_sock *req,
6090 const struct sk_buff *skb,
6091 const struct sock *listen_sk,
6092 const struct dst_entry *dst)
6093{
6094 const struct tcphdr *th = tcp_hdr(skb);
6095 const struct net *net = sock_net(listen_sk);
6096 bool th_ecn = th->ece && th->cwr;
6097 bool ect, ecn_ok;
6098 u32 ecn_ok_dst;
6099
6100 if (!th_ecn)
6101 return;
6102
6103 ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
6104 ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK);
6105 ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
6106
6107 if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
6108 (ecn_ok_dst & DST_FEATURE_ECN_CA))
6109 inet_rsk(req)->ecn_ok = 1;
6110}
6111
6112static void tcp_openreq_init(struct request_sock *req,
6113 const struct tcp_options_received *rx_opt,
6114 struct sk_buff *skb, const struct sock *sk)
6115{
6116 struct inet_request_sock *ireq = inet_rsk(req);
6117
6118 req->rsk_rcv_wnd = 0;
6119 req->cookie_ts = 0;
6120 tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
6121 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
6122 skb_mstamp_get(&tcp_rsk(req)->snt_synack);
6123 tcp_rsk(req)->last_oow_ack_time = 0;
6124 req->mss = rx_opt->mss_clamp;
6125 req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
6126 ireq->tstamp_ok = rx_opt->tstamp_ok;
6127 ireq->sack_ok = rx_opt->sack_ok;
6128 ireq->snd_wscale = rx_opt->snd_wscale;
6129 ireq->wscale_ok = rx_opt->wscale_ok;
6130 ireq->acked = 0;
6131 ireq->ecn_ok = 0;
6132 ireq->ir_rmt_port = tcp_hdr(skb)->source;
6133 ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
6134 ireq->ir_mark = inet_request_mark(sk, skb);
6135}
6136
6137struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
6138 struct sock *sk_listener,
6139 bool attach_listener)
6140{
6141 struct request_sock *req = reqsk_alloc(ops, sk_listener,
6142 attach_listener);
6143
6144 if (req) {
6145 struct inet_request_sock *ireq = inet_rsk(req);
6146
6147 kmemcheck_annotate_bitfield(ireq, flags);
6148 ireq->opt = NULL;
6149#if IS_ENABLED(CONFIG_IPV6)
6150 ireq->pktopts = NULL;
6151#endif
6152 atomic64_set(&ireq->ir_cookie, 0);
6153 ireq->ireq_state = TCP_NEW_SYN_RECV;
6154 write_pnet(&ireq->ireq_net, sock_net(sk_listener));
6155 ireq->ireq_family = sk_listener->sk_family;
6156 }
6157
6158 return req;
6159}
6160EXPORT_SYMBOL(inet_reqsk_alloc);
6161
6162
6163
6164
6165static bool tcp_syn_flood_action(const struct sock *sk,
6166 const struct sk_buff *skb,
6167 const char *proto)
6168{
6169 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
6170 const char *msg = "Dropping request";
6171 bool want_cookie = false;
6172 struct net *net = sock_net(sk);
6173
6174#ifdef CONFIG_SYN_COOKIES
6175 if (net->ipv4.sysctl_tcp_syncookies) {
6176 msg = "Sending cookies";
6177 want_cookie = true;
6178 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
6179 } else
6180#endif
6181 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
6182
6183 if (!queue->synflood_warned &&
6184 net->ipv4.sysctl_tcp_syncookies != 2 &&
6185 xchg(&queue->synflood_warned, 1) == 0)
6186 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
6187 proto, ntohs(tcp_hdr(skb)->dest), msg);
6188
6189 return want_cookie;
6190}
6191
6192static void tcp_reqsk_record_syn(const struct sock *sk,
6193 struct request_sock *req,
6194 const struct sk_buff *skb)
6195{
6196 if (tcp_sk(sk)->save_syn) {
6197 u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb);
6198 u32 *copy;
6199
6200 copy = kmalloc(len + sizeof(u32), GFP_ATOMIC);
6201 if (copy) {
6202 copy[0] = len;
6203 memcpy(©[1], skb_network_header(skb), len);
6204 req->saved_syn = copy;
6205 }
6206 }
6207}
6208
6209int tcp_conn_request(struct request_sock_ops *rsk_ops,
6210 const struct tcp_request_sock_ops *af_ops,
6211 struct sock *sk, struct sk_buff *skb)
6212{
6213 struct tcp_fastopen_cookie foc = { .len = -1 };
6214 __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
6215 struct tcp_options_received tmp_opt;
6216 struct tcp_sock *tp = tcp_sk(sk);
6217 struct net *net = sock_net(sk);
6218 struct sock *fastopen_sk = NULL;
6219 struct dst_entry *dst = NULL;
6220 struct request_sock *req;
6221 bool want_cookie = false;
6222 struct flowi fl;
6223
6224
6225
6226
6227
6228 if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
6229 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
6230 want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name);
6231 if (!want_cookie)
6232 goto drop;
6233 }
6234
6235
6236
6237
6238
6239
6240
6241 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
6242 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
6243 goto drop;
6244 }
6245
6246 req = inet_reqsk_alloc(rsk_ops, sk, !want_cookie);
6247 if (!req)
6248 goto drop;
6249
6250 tcp_rsk(req)->af_specific = af_ops;
6251
6252 tcp_clear_options(&tmp_opt);
6253 tmp_opt.mss_clamp = af_ops->mss_clamp;
6254 tmp_opt.user_mss = tp->rx_opt.user_mss;
6255 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
6256
6257 if (want_cookie && !tmp_opt.saw_tstamp)
6258 tcp_clear_options(&tmp_opt);
6259
6260 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
6261 tcp_openreq_init(req, &tmp_opt, skb, sk);
6262
6263
6264 inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
6265
6266 af_ops->init_req(req, sk, skb);
6267
6268 if (security_inet_conn_request(sk, skb, req))
6269 goto drop_and_free;
6270
6271 if (!want_cookie && !isn) {
6272
6273
6274
6275
6276
6277
6278
6279
6280
6281 if (tcp_death_row.sysctl_tw_recycle) {
6282 bool strict;
6283
6284 dst = af_ops->route_req(sk, &fl, req, &strict);
6285
6286 if (dst && strict &&
6287 !tcp_peer_is_proven(req, dst, true,
6288 tmp_opt.saw_tstamp)) {
6289 NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
6290 goto drop_and_release;
6291 }
6292 }
6293
6294 else if (!net->ipv4.sysctl_tcp_syncookies &&
6295 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
6296 (sysctl_max_syn_backlog >> 2)) &&
6297 !tcp_peer_is_proven(req, dst, false,
6298 tmp_opt.saw_tstamp)) {
6299
6300
6301
6302
6303
6304
6305
6306 pr_drop_req(req, ntohs(tcp_hdr(skb)->source),
6307 rsk_ops->family);
6308 goto drop_and_release;
6309 }
6310
6311 isn = af_ops->init_seq(skb);
6312 }
6313 if (!dst) {
6314 dst = af_ops->route_req(sk, &fl, req, NULL);
6315 if (!dst)
6316 goto drop_and_free;
6317 }
6318
6319 tcp_ecn_create_request(req, skb, sk, dst);
6320
6321 if (want_cookie) {
6322 isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
6323 req->cookie_ts = tmp_opt.tstamp_ok;
6324 if (!tmp_opt.tstamp_ok)
6325 inet_rsk(req)->ecn_ok = 0;
6326 }
6327
6328 tcp_rsk(req)->snt_isn = isn;
6329 tcp_rsk(req)->txhash = net_tx_rndhash();
6330 tcp_openreq_init_rwin(req, sk, dst);
6331 if (!want_cookie) {
6332 tcp_reqsk_record_syn(sk, req, skb);
6333 fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
6334 }
6335 if (fastopen_sk) {
6336 af_ops->send_synack(fastopen_sk, dst, &fl, req,
6337 &foc, TCP_SYNACK_FASTOPEN);
6338
6339 inet_csk_reqsk_queue_add(sk, req, fastopen_sk);
6340 sk->sk_data_ready(sk);
6341 bh_unlock_sock(fastopen_sk);
6342 sock_put(fastopen_sk);
6343 } else {
6344 tcp_rsk(req)->tfo_listener = false;
6345 if (!want_cookie)
6346 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
6347 af_ops->send_synack(sk, dst, &fl, req, &foc,
6348 !want_cookie ? TCP_SYNACK_NORMAL :
6349 TCP_SYNACK_COOKIE);
6350 if (want_cookie) {
6351 reqsk_free(req);
6352 return 0;
6353 }
6354 }
6355 reqsk_put(req);
6356 return 0;
6357
6358drop_and_release:
6359 dst_release(dst);
6360drop_and_free:
6361 reqsk_free(req);
6362drop:
6363 tcp_listendrop(sk);
6364 return 0;
6365}
6366EXPORT_SYMBOL(tcp_conn_request);
6367