1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65#define pr_fmt(fmt) "TCP: " fmt
66
67#include <linux/mm.h>
68#include <linux/slab.h>
69#include <linux/module.h>
70#include <linux/sysctl.h>
71#include <linux/kernel.h>
72#include <linux/prefetch.h>
73#include <net/dst.h>
74#include <net/tcp.h>
75#include <net/inet_common.h>
76#include <linux/ipsec.h>
77#include <asm/unaligned.h>
78#include <linux/errqueue.h>
79#include <trace/events/tcp.h>
80#include <linux/jump_label_ratelimit.h>
81#include <net/busy_poll.h>
82#include <net/mptcp.h>
83
84int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
85
86#define FLAG_DATA 0x01
87#define FLAG_WIN_UPDATE 0x02
88#define FLAG_DATA_ACKED 0x04
89#define FLAG_RETRANS_DATA_ACKED 0x08
90#define FLAG_SYN_ACKED 0x10
91#define FLAG_DATA_SACKED 0x20
92#define FLAG_ECE 0x40
93#define FLAG_LOST_RETRANS 0x80
94#define FLAG_SLOWPATH 0x100
95#define FLAG_ORIG_SACK_ACKED 0x200
96#define FLAG_SND_UNA_ADVANCED 0x400
97#define FLAG_DSACKING_ACK 0x800
98#define FLAG_SET_XMIT_TIMER 0x1000
99#define FLAG_SACK_RENEGING 0x2000
100#define FLAG_UPDATE_TS_RECENT 0x4000
101#define FLAG_NO_CHALLENGE_ACK 0x8000
102#define FLAG_ACK_MAYBE_DELAYED 0x10000
103
104#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
105#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
106#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK)
107#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
108
109#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
110#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
111
112#define REXMIT_NONE 0
113#define REXMIT_LOST 1
114#define REXMIT_NEW 2
115
116#if IS_ENABLED(CONFIG_TLS_DEVICE)
117static DEFINE_STATIC_KEY_DEFERRED_FALSE(clean_acked_data_enabled, HZ);
118
119void clean_acked_data_enable(struct inet_connection_sock *icsk,
120 void (*cad)(struct sock *sk, u32 ack_seq))
121{
122 icsk->icsk_clean_acked = cad;
123 static_branch_deferred_inc(&clean_acked_data_enabled);
124}
125EXPORT_SYMBOL_GPL(clean_acked_data_enable);
126
127void clean_acked_data_disable(struct inet_connection_sock *icsk)
128{
129 static_branch_slow_dec_deferred(&clean_acked_data_enabled);
130 icsk->icsk_clean_acked = NULL;
131}
132EXPORT_SYMBOL_GPL(clean_acked_data_disable);
133
134void clean_acked_data_flush(void)
135{
136 static_key_deferred_flush(&clean_acked_data_enabled);
137}
138EXPORT_SYMBOL_GPL(clean_acked_data_flush);
139#endif
140
141static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
142 unsigned int len)
143{
144 static bool __once __read_mostly;
145
146 if (!__once) {
147 struct net_device *dev;
148
149 __once = true;
150
151 rcu_read_lock();
152 dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
153 if (!dev || len >= dev->mtu)
154 pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
155 dev ? dev->name : "Unknown driver");
156 rcu_read_unlock();
157 }
158}
159
160
161
162
163static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
164{
165 struct inet_connection_sock *icsk = inet_csk(sk);
166 const unsigned int lss = icsk->icsk_ack.last_seg_size;
167 unsigned int len;
168
169 icsk->icsk_ack.last_seg_size = 0;
170
171
172
173
174 len = skb_shinfo(skb)->gso_size ? : skb->len;
175 if (len >= icsk->icsk_ack.rcv_mss) {
176 icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
177 tcp_sk(sk)->advmss);
178
179 if (unlikely(len > icsk->icsk_ack.rcv_mss +
180 MAX_TCP_OPTION_SPACE))
181 tcp_gro_dev_warn(sk, skb, len);
182 } else {
183
184
185
186
187
188 len += skb->data - skb_transport_header(skb);
189 if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
190
191
192
193
194
195 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
196 !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
197
198
199
200
201 len -= tcp_sk(sk)->tcp_header_len;
202 icsk->icsk_ack.last_seg_size = len;
203 if (len == lss) {
204 icsk->icsk_ack.rcv_mss = len;
205 return;
206 }
207 }
208 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
209 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
210 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
211 }
212}
213
214static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
215{
216 struct inet_connection_sock *icsk = inet_csk(sk);
217 unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
218
219 if (quickacks == 0)
220 quickacks = 2;
221 quickacks = min(quickacks, max_quickacks);
222 if (quickacks > icsk->icsk_ack.quick)
223 icsk->icsk_ack.quick = quickacks;
224}
225
226void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
227{
228 struct inet_connection_sock *icsk = inet_csk(sk);
229
230 tcp_incr_quickack(sk, max_quickacks);
231 inet_csk_exit_pingpong_mode(sk);
232 icsk->icsk_ack.ato = TCP_ATO_MIN;
233}
234EXPORT_SYMBOL(tcp_enter_quickack_mode);
235
236
237
238
239
240static bool tcp_in_quickack_mode(struct sock *sk)
241{
242 const struct inet_connection_sock *icsk = inet_csk(sk);
243 const struct dst_entry *dst = __sk_dst_get(sk);
244
245 return (dst && dst_metric(dst, RTAX_QUICKACK)) ||
246 (icsk->icsk_ack.quick && !inet_csk_in_pingpong_mode(sk));
247}
248
249static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
250{
251 if (tp->ecn_flags & TCP_ECN_OK)
252 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
253}
254
255static void tcp_ecn_accept_cwr(struct sock *sk, const struct sk_buff *skb)
256{
257 if (tcp_hdr(skb)->cwr) {
258 tcp_sk(sk)->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
259
260
261
262
263
264 if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq)
265 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
266 }
267}
268
269static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
270{
271 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
272}
273
274static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
275{
276 struct tcp_sock *tp = tcp_sk(sk);
277
278 switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
279 case INET_ECN_NOT_ECT:
280
281
282
283
284 if (tp->ecn_flags & TCP_ECN_SEEN)
285 tcp_enter_quickack_mode(sk, 2);
286 break;
287 case INET_ECN_CE:
288 if (tcp_ca_needs_ecn(sk))
289 tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
290
291 if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
292
293 tcp_enter_quickack_mode(sk, 2);
294 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
295 }
296 tp->ecn_flags |= TCP_ECN_SEEN;
297 break;
298 default:
299 if (tcp_ca_needs_ecn(sk))
300 tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
301 tp->ecn_flags |= TCP_ECN_SEEN;
302 break;
303 }
304}
305
306static void tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
307{
308 if (tcp_sk(sk)->ecn_flags & TCP_ECN_OK)
309 __tcp_ecn_check_ce(sk, skb);
310}
311
312static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
313{
314 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
315 tp->ecn_flags &= ~TCP_ECN_OK;
316}
317
318static void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
319{
320 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
321 tp->ecn_flags &= ~TCP_ECN_OK;
322}
323
324static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
325{
326 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
327 return true;
328 return false;
329}
330
331
332
333
334
335
336static void tcp_sndbuf_expand(struct sock *sk)
337{
338 const struct tcp_sock *tp = tcp_sk(sk);
339 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
340 int sndmem, per_mss;
341 u32 nr_segs;
342
343
344
345
346 per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
347 MAX_TCP_HEADER +
348 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
349
350 per_mss = roundup_pow_of_two(per_mss) +
351 SKB_DATA_ALIGN(sizeof(struct sk_buff));
352
353 nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
354 nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
355
356
357
358
359
360 sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2;
361 sndmem *= nr_segs * per_mss;
362
363 if (sk->sk_sndbuf < sndmem)
364 WRITE_ONCE(sk->sk_sndbuf,
365 min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]));
366}
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
395{
396 struct tcp_sock *tp = tcp_sk(sk);
397
398 int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
399 int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
400
401 while (tp->rcv_ssthresh <= window) {
402 if (truesize <= skb->len)
403 return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
404
405 truesize >>= 1;
406 window >>= 1;
407 }
408 return 0;
409}
410
411static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
412{
413 struct tcp_sock *tp = tcp_sk(sk);
414 int room;
415
416 room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh;
417
418
419 if (room > 0 && !tcp_under_memory_pressure(sk)) {
420 int incr;
421
422
423
424
425 if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
426 incr = 2 * tp->advmss;
427 else
428 incr = __tcp_grow_window(sk, skb);
429
430 if (incr) {
431 incr = max_t(int, incr, 2 * skb->len);
432 tp->rcv_ssthresh += min(room, incr);
433 inet_csk(sk)->icsk_ack.quick |= 1;
434 }
435 }
436}
437
438
439
440
441static void tcp_init_buffer_space(struct sock *sk)
442{
443 int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
444 struct tcp_sock *tp = tcp_sk(sk);
445 int maxwin;
446
447 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
448 tcp_sndbuf_expand(sk);
449
450 tp->rcvq_space.space = min_t(u32, tp->rcv_wnd, TCP_INIT_CWND * tp->advmss);
451 tcp_mstamp_refresh(tp);
452 tp->rcvq_space.time = tp->tcp_mstamp;
453 tp->rcvq_space.seq = tp->copied_seq;
454
455 maxwin = tcp_full_space(sk);
456
457 if (tp->window_clamp >= maxwin) {
458 tp->window_clamp = maxwin;
459
460 if (tcp_app_win && maxwin > 4 * tp->advmss)
461 tp->window_clamp = max(maxwin -
462 (maxwin >> tcp_app_win),
463 4 * tp->advmss);
464 }
465
466
467 if (tcp_app_win &&
468 tp->window_clamp > 2 * tp->advmss &&
469 tp->window_clamp + tp->advmss > maxwin)
470 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
471
472 tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
473 tp->snd_cwnd_stamp = tcp_jiffies32;
474}
475
476
477static void tcp_clamp_window(struct sock *sk)
478{
479 struct tcp_sock *tp = tcp_sk(sk);
480 struct inet_connection_sock *icsk = inet_csk(sk);
481 struct net *net = sock_net(sk);
482
483 icsk->icsk_ack.quick = 0;
484
485 if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
486 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
487 !tcp_under_memory_pressure(sk) &&
488 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
489 WRITE_ONCE(sk->sk_rcvbuf,
490 min(atomic_read(&sk->sk_rmem_alloc),
491 net->ipv4.sysctl_tcp_rmem[2]));
492 }
493 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
494 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
495}
496
497
498
499
500
501
502
503
504void tcp_initialize_rcv_mss(struct sock *sk)
505{
506 const struct tcp_sock *tp = tcp_sk(sk);
507 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
508
509 hint = min(hint, tp->rcv_wnd / 2);
510 hint = min(hint, TCP_MSS_DEFAULT);
511 hint = max(hint, TCP_MIN_MSS);
512
513 inet_csk(sk)->icsk_ack.rcv_mss = hint;
514}
515EXPORT_SYMBOL(tcp_initialize_rcv_mss);
516
517
518
519
520
521
522
523
524
525
526
527
528static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
529{
530 u32 new_sample = tp->rcv_rtt_est.rtt_us;
531 long m = sample;
532
533 if (new_sample != 0) {
534
535
536
537
538
539
540
541
542
543
544 if (!win_dep) {
545 m -= (new_sample >> 3);
546 new_sample += m;
547 } else {
548 m <<= 3;
549 if (m < new_sample)
550 new_sample = m;
551 }
552 } else {
553
554 new_sample = m << 3;
555 }
556
557 tp->rcv_rtt_est.rtt_us = new_sample;
558}
559
560static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
561{
562 u32 delta_us;
563
564 if (tp->rcv_rtt_est.time == 0)
565 goto new_measure;
566 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
567 return;
568 delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time);
569 if (!delta_us)
570 delta_us = 1;
571 tcp_rcv_rtt_update(tp, delta_us, 1);
572
573new_measure:
574 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
575 tp->rcv_rtt_est.time = tp->tcp_mstamp;
576}
577
578static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
579 const struct sk_buff *skb)
580{
581 struct tcp_sock *tp = tcp_sk(sk);
582
583 if (tp->rx_opt.rcv_tsecr == tp->rcv_rtt_last_tsecr)
584 return;
585 tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
586
587 if (TCP_SKB_CB(skb)->end_seq -
588 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) {
589 u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
590 u32 delta_us;
591
592 if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
593 if (!delta)
594 delta = 1;
595 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
596 tcp_rcv_rtt_update(tp, delta_us, 0);
597 }
598 }
599}
600
601
602
603
604
605void tcp_rcv_space_adjust(struct sock *sk)
606{
607 struct tcp_sock *tp = tcp_sk(sk);
608 u32 copied;
609 int time;
610
611 trace_tcp_rcv_space_adjust(sk);
612
613 tcp_mstamp_refresh(tp);
614 time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
615 if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
616 return;
617
618
619 copied = tp->copied_seq - tp->rcvq_space.seq;
620 if (copied <= tp->rcvq_space.space)
621 goto new_measure;
622
623
624
625
626
627
628
629
630
631
632 if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
633 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
634 int rcvmem, rcvbuf;
635 u64 rcvwin, grow;
636
637
638
639
640 rcvwin = ((u64)copied << 1) + 16 * tp->advmss;
641
642
643 grow = rcvwin * (copied - tp->rcvq_space.space);
644 do_div(grow, tp->rcvq_space.space);
645 rcvwin += (grow << 1);
646
647 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
648 while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
649 rcvmem += 128;
650
651 do_div(rcvwin, tp->advmss);
652 rcvbuf = min_t(u64, rcvwin * rcvmem,
653 sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
654 if (rcvbuf > sk->sk_rcvbuf) {
655 WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
656
657
658 tp->window_clamp = tcp_win_from_space(sk, rcvbuf);
659 }
660 }
661 tp->rcvq_space.space = copied;
662
663new_measure:
664 tp->rcvq_space.seq = tp->copied_seq;
665 tp->rcvq_space.time = tp->tcp_mstamp;
666}
667
668
669
670
671
672
673
674
675
676
677
678static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
679{
680 struct tcp_sock *tp = tcp_sk(sk);
681 struct inet_connection_sock *icsk = inet_csk(sk);
682 u32 now;
683
684 inet_csk_schedule_ack(sk);
685
686 tcp_measure_rcv_mss(sk, skb);
687
688 tcp_rcv_rtt_measure(tp);
689
690 now = tcp_jiffies32;
691
692 if (!icsk->icsk_ack.ato) {
693
694
695
696 tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
697 icsk->icsk_ack.ato = TCP_ATO_MIN;
698 } else {
699 int m = now - icsk->icsk_ack.lrcvtime;
700
701 if (m <= TCP_ATO_MIN / 2) {
702
703 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
704 } else if (m < icsk->icsk_ack.ato) {
705 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
706 if (icsk->icsk_ack.ato > icsk->icsk_rto)
707 icsk->icsk_ack.ato = icsk->icsk_rto;
708 } else if (m > icsk->icsk_rto) {
709
710
711
712 tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
713 sk_mem_reclaim(sk);
714 }
715 }
716 icsk->icsk_ack.lrcvtime = now;
717
718 tcp_ecn_check_ce(sk, skb);
719
720 if (skb->len >= 128)
721 tcp_grow_window(sk, skb);
722}
723
724
725
726
727
728
729
730
731
732
733static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
734{
735 struct tcp_sock *tp = tcp_sk(sk);
736 long m = mrtt_us;
737 u32 srtt = tp->srtt_us;
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755 if (srtt != 0) {
756 m -= (srtt >> 3);
757 srtt += m;
758 if (m < 0) {
759 m = -m;
760 m -= (tp->mdev_us >> 2);
761
762
763
764
765
766
767
768
769 if (m > 0)
770 m >>= 3;
771 } else {
772 m -= (tp->mdev_us >> 2);
773 }
774 tp->mdev_us += m;
775 if (tp->mdev_us > tp->mdev_max_us) {
776 tp->mdev_max_us = tp->mdev_us;
777 if (tp->mdev_max_us > tp->rttvar_us)
778 tp->rttvar_us = tp->mdev_max_us;
779 }
780 if (after(tp->snd_una, tp->rtt_seq)) {
781 if (tp->mdev_max_us < tp->rttvar_us)
782 tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
783 tp->rtt_seq = tp->snd_nxt;
784 tp->mdev_max_us = tcp_rto_min_us(sk);
785
786 tcp_bpf_rtt(sk);
787 }
788 } else {
789
790 srtt = m << 3;
791 tp->mdev_us = m << 1;
792 tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
793 tp->mdev_max_us = tp->rttvar_us;
794 tp->rtt_seq = tp->snd_nxt;
795
796 tcp_bpf_rtt(sk);
797 }
798 tp->srtt_us = max(1U, srtt);
799}
800
801static void tcp_update_pacing_rate(struct sock *sk)
802{
803 const struct tcp_sock *tp = tcp_sk(sk);
804 u64 rate;
805
806
807 rate = (u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3);
808
809
810
811
812
813
814
815
816
817 if (tp->snd_cwnd < tp->snd_ssthresh / 2)
818 rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
819 else
820 rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
821
822 rate *= max(tp->snd_cwnd, tp->packets_out);
823
824 if (likely(tp->srtt_us))
825 do_div(rate, tp->srtt_us);
826
827
828
829
830
831 WRITE_ONCE(sk->sk_pacing_rate, min_t(u64, rate,
832 sk->sk_max_pacing_rate));
833}
834
835
836
837
838static void tcp_set_rto(struct sock *sk)
839{
840 const struct tcp_sock *tp = tcp_sk(sk);
841
842
843
844
845
846
847
848
849
850
851 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
852
853
854
855
856
857
858
859
860
861
862 tcp_bound_rto(sk);
863}
864
865__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
866{
867 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
868
869 if (!cwnd)
870 cwnd = TCP_INIT_CWND;
871 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
872}
873
874struct tcp_sacktag_state {
875
876
877
878
879 u64 first_sackt;
880 u64 last_sackt;
881 u32 reord;
882 u32 sack_delivered;
883 int flag;
884 unsigned int mss_now;
885 struct rate_sample *rate;
886};
887
888
889
890
891
892
893
894static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq,
895 u32 end_seq, struct tcp_sacktag_state *state)
896{
897 u32 seq_len, dup_segs = 1;
898
899 if (!before(start_seq, end_seq))
900 return 0;
901
902 seq_len = end_seq - start_seq;
903
904 if (seq_len > tp->max_window)
905 return 0;
906 if (seq_len > tp->mss_cache)
907 dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache);
908
909 tp->dsack_dups += dup_segs;
910
911 if (tp->dsack_dups > tp->total_retrans)
912 return 0;
913
914 tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
915 tp->rack.dsack_seen = 1;
916
917 state->flag |= FLAG_DSACKING_ACK;
918
919 state->sack_delivered += dup_segs;
920
921 return dup_segs;
922}
923
924
925
926
927
928static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
929 const int ts)
930{
931 struct tcp_sock *tp = tcp_sk(sk);
932 const u32 mss = tp->mss_cache;
933 u32 fack, metric;
934
935 fack = tcp_highest_sack_seq(tp);
936 if (!before(low_seq, fack))
937 return;
938
939 metric = fack - low_seq;
940 if ((metric > tp->reordering * mss) && mss) {
941#if FASTRETRANS_DEBUG > 1
942 pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
943 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
944 tp->reordering,
945 0,
946 tp->sacked_out,
947 tp->undo_marker ? tp->undo_retrans : 0);
948#endif
949 tp->reordering = min_t(u32, (metric + mss - 1) / mss,
950 sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
951 }
952
953
954 tp->reord_seen++;
955 NET_INC_STATS(sock_net(sk),
956 ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
957}
958
959
960static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
961{
962 if ((!tp->retransmit_skb_hint && tp->retrans_out >= tp->lost_out) ||
963 (tp->retransmit_skb_hint &&
964 before(TCP_SKB_CB(skb)->seq,
965 TCP_SKB_CB(tp->retransmit_skb_hint)->seq)))
966 tp->retransmit_skb_hint = skb;
967}
968
969
970
971
972
973
974
975
976static void tcp_sum_lost(struct tcp_sock *tp, struct sk_buff *skb)
977{
978 __u8 sacked = TCP_SKB_CB(skb)->sacked;
979
980 if (!(sacked & TCPCB_LOST) ||
981 ((sacked & TCPCB_LOST) && (sacked & TCPCB_SACKED_RETRANS)))
982 tp->lost += tcp_skb_pcount(skb);
983}
984
985static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
986{
987 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
988 tcp_verify_retransmit_hint(tp, skb);
989
990 tp->lost_out += tcp_skb_pcount(skb);
991 tcp_sum_lost(tp, skb);
992 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
993 }
994}
995
996void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb)
997{
998 tcp_verify_retransmit_hint(tp, skb);
999
1000 tcp_sum_lost(tp, skb);
1001 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
1002 tp->lost_out += tcp_skb_pcount(skb);
1003 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1004 }
1005}
1006
1007
1008static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
1009 bool ece_ack)
1010{
1011 tp->delivered += delivered;
1012 if (ece_ack)
1013 tp->delivered_ce += delivered;
1014}
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
1110 u32 start_seq, u32 end_seq)
1111{
1112
1113 if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
1114 return false;
1115
1116
1117 if (!before(start_seq, tp->snd_nxt))
1118 return false;
1119
1120
1121
1122
1123 if (after(start_seq, tp->snd_una))
1124 return true;
1125
1126 if (!is_dsack || !tp->undo_marker)
1127 return false;
1128
1129
1130 if (after(end_seq, tp->snd_una))
1131 return false;
1132
1133 if (!before(start_seq, tp->undo_marker))
1134 return true;
1135
1136
1137 if (!after(end_seq, tp->undo_marker))
1138 return false;
1139
1140
1141
1142
1143 return !before(start_seq, end_seq - tp->max_window);
1144}
1145
1146static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1147 struct tcp_sack_block_wire *sp, int num_sacks,
1148 u32 prior_snd_una, struct tcp_sacktag_state *state)
1149{
1150 struct tcp_sock *tp = tcp_sk(sk);
1151 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
1152 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
1153 u32 dup_segs;
1154
1155 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1156 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
1157 } else if (num_sacks > 1) {
1158 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
1159 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
1160
1161 if (after(end_seq_0, end_seq_1) || before(start_seq_0, start_seq_1))
1162 return false;
1163 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKOFORECV);
1164 } else {
1165 return false;
1166 }
1167
1168 dup_segs = tcp_dsack_seen(tp, start_seq_0, end_seq_0, state);
1169 if (!dup_segs) {
1170 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKIGNOREDDUBIOUS);
1171 return false;
1172 }
1173
1174 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECVSEGS, dup_segs);
1175
1176
1177 if (tp->undo_marker && tp->undo_retrans > 0 &&
1178 !after(end_seq_0, prior_snd_una) &&
1179 after(end_seq_0, tp->undo_marker))
1180 tp->undo_retrans = max_t(int, 0, tp->undo_retrans - dup_segs);
1181
1182 return true;
1183}
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1194 u32 start_seq, u32 end_seq)
1195{
1196 int err;
1197 bool in_sack;
1198 unsigned int pkt_len;
1199 unsigned int mss;
1200
1201 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1202 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1203
1204 if (tcp_skb_pcount(skb) > 1 && !in_sack &&
1205 after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
1206 mss = tcp_skb_mss(skb);
1207 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1208
1209 if (!in_sack) {
1210 pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
1211 if (pkt_len < mss)
1212 pkt_len = mss;
1213 } else {
1214 pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
1215 if (pkt_len < mss)
1216 return -EINVAL;
1217 }
1218
1219
1220
1221
1222 if (pkt_len > mss) {
1223 unsigned int new_len = (pkt_len / mss) * mss;
1224 if (!in_sack && new_len < pkt_len)
1225 new_len += mss;
1226 pkt_len = new_len;
1227 }
1228
1229 if (pkt_len >= skb->len && !in_sack)
1230 return 0;
1231
1232 err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
1233 pkt_len, mss, GFP_ATOMIC);
1234 if (err < 0)
1235 return err;
1236 }
1237
1238 return in_sack;
1239}
1240
1241
1242static u8 tcp_sacktag_one(struct sock *sk,
1243 struct tcp_sacktag_state *state, u8 sacked,
1244 u32 start_seq, u32 end_seq,
1245 int dup_sack, int pcount,
1246 u64 xmit_time)
1247{
1248 struct tcp_sock *tp = tcp_sk(sk);
1249
1250
1251 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1252 if (tp->undo_marker && tp->undo_retrans > 0 &&
1253 after(end_seq, tp->undo_marker))
1254 tp->undo_retrans--;
1255 if ((sacked & TCPCB_SACKED_ACKED) &&
1256 before(start_seq, state->reord))
1257 state->reord = start_seq;
1258 }
1259
1260
1261 if (!after(end_seq, tp->snd_una))
1262 return sacked;
1263
1264 if (!(sacked & TCPCB_SACKED_ACKED)) {
1265 tcp_rack_advance(tp, sacked, end_seq, xmit_time);
1266
1267 if (sacked & TCPCB_SACKED_RETRANS) {
1268
1269
1270
1271
1272 if (sacked & TCPCB_LOST) {
1273 sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1274 tp->lost_out -= pcount;
1275 tp->retrans_out -= pcount;
1276 }
1277 } else {
1278 if (!(sacked & TCPCB_RETRANS)) {
1279
1280
1281
1282 if (before(start_seq,
1283 tcp_highest_sack_seq(tp)) &&
1284 before(start_seq, state->reord))
1285 state->reord = start_seq;
1286
1287 if (!after(end_seq, tp->high_seq))
1288 state->flag |= FLAG_ORIG_SACK_ACKED;
1289 if (state->first_sackt == 0)
1290 state->first_sackt = xmit_time;
1291 state->last_sackt = xmit_time;
1292 }
1293
1294 if (sacked & TCPCB_LOST) {
1295 sacked &= ~TCPCB_LOST;
1296 tp->lost_out -= pcount;
1297 }
1298 }
1299
1300 sacked |= TCPCB_SACKED_ACKED;
1301 state->flag |= FLAG_DATA_SACKED;
1302 tp->sacked_out += pcount;
1303
1304 state->sack_delivered += pcount;
1305
1306
1307 if (tp->lost_skb_hint &&
1308 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
1309 tp->lost_cnt_hint += pcount;
1310 }
1311
1312
1313
1314
1315
1316 if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
1317 sacked &= ~TCPCB_SACKED_RETRANS;
1318 tp->retrans_out -= pcount;
1319 }
1320
1321 return sacked;
1322}
1323
1324
1325
1326
1327static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
1328 struct sk_buff *skb,
1329 struct tcp_sacktag_state *state,
1330 unsigned int pcount, int shifted, int mss,
1331 bool dup_sack)
1332{
1333 struct tcp_sock *tp = tcp_sk(sk);
1334 u32 start_seq = TCP_SKB_CB(skb)->seq;
1335 u32 end_seq = start_seq + shifted;
1336
1337 BUG_ON(!pcount);
1338
1339
1340
1341
1342
1343
1344
1345 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1346 start_seq, end_seq, dup_sack, pcount,
1347 tcp_skb_timestamp_us(skb));
1348 tcp_rate_skb_delivered(sk, skb, state->rate);
1349
1350 if (skb == tp->lost_skb_hint)
1351 tp->lost_cnt_hint += pcount;
1352
1353 TCP_SKB_CB(prev)->end_seq += shifted;
1354 TCP_SKB_CB(skb)->seq += shifted;
1355
1356 tcp_skb_pcount_add(prev, pcount);
1357 WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
1358 tcp_skb_pcount_add(skb, -pcount);
1359
1360
1361
1362
1363
1364
1365 if (!TCP_SKB_CB(prev)->tcp_gso_size)
1366 TCP_SKB_CB(prev)->tcp_gso_size = mss;
1367
1368
1369 if (tcp_skb_pcount(skb) <= 1)
1370 TCP_SKB_CB(skb)->tcp_gso_size = 0;
1371
1372
1373 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
1374
1375 if (skb->len > 0) {
1376 BUG_ON(!tcp_skb_pcount(skb));
1377 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTED);
1378 return false;
1379 }
1380
1381
1382
1383 if (skb == tp->retransmit_skb_hint)
1384 tp->retransmit_skb_hint = prev;
1385 if (skb == tp->lost_skb_hint) {
1386 tp->lost_skb_hint = prev;
1387 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1388 }
1389
1390 TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1391 TCP_SKB_CB(prev)->eor = TCP_SKB_CB(skb)->eor;
1392 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1393 TCP_SKB_CB(prev)->end_seq++;
1394
1395 if (skb == tcp_highest_sack(sk))
1396 tcp_advance_highest_sack(sk, skb);
1397
1398 tcp_skb_collapse_tstamp(prev, skb);
1399 if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
1400 TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
1401
1402 tcp_rtx_queue_unlink_and_free(skb, sk);
1403
1404 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
1405
1406 return true;
1407}
1408
1409
1410
1411
1412static int tcp_skb_seglen(const struct sk_buff *skb)
1413{
1414 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
1415}
1416
1417
1418static int skb_can_shift(const struct sk_buff *skb)
1419{
1420 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1421}
1422
1423int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from,
1424 int pcount, int shiftlen)
1425{
1426
1427
1428
1429
1430
1431 if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE))
1432 return 0;
1433 if (unlikely(tcp_skb_pcount(to) + pcount > 65535))
1434 return 0;
1435 return skb_shift(to, from, shiftlen);
1436}
1437
1438
1439
1440
1441static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1442 struct tcp_sacktag_state *state,
1443 u32 start_seq, u32 end_seq,
1444 bool dup_sack)
1445{
1446 struct tcp_sock *tp = tcp_sk(sk);
1447 struct sk_buff *prev;
1448 int mss;
1449 int pcount = 0;
1450 int len;
1451 int in_sack;
1452
1453
1454 if (!dup_sack &&
1455 (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
1456 goto fallback;
1457 if (!skb_can_shift(skb))
1458 goto fallback;
1459
1460 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1461 goto fallback;
1462
1463
1464 prev = skb_rb_prev(skb);
1465 if (!prev)
1466 goto fallback;
1467
1468 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1469 goto fallback;
1470
1471 if (!tcp_skb_can_collapse(prev, skb))
1472 goto fallback;
1473
1474 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1475 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1476
1477 if (in_sack) {
1478 len = skb->len;
1479 pcount = tcp_skb_pcount(skb);
1480 mss = tcp_skb_seglen(skb);
1481
1482
1483
1484
1485 if (mss != tcp_skb_seglen(prev))
1486 goto fallback;
1487 } else {
1488 if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
1489 goto noop;
1490
1491
1492
1493
1494 if (tcp_skb_pcount(skb) <= 1)
1495 goto noop;
1496
1497 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1498 if (!in_sack) {
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510 goto fallback;
1511 }
1512
1513 len = end_seq - TCP_SKB_CB(skb)->seq;
1514 BUG_ON(len < 0);
1515 BUG_ON(len > skb->len);
1516
1517
1518
1519
1520
1521 mss = tcp_skb_mss(skb);
1522
1523
1524
1525
1526 if (mss != tcp_skb_seglen(prev))
1527 goto fallback;
1528
1529 if (len == mss) {
1530 pcount = 1;
1531 } else if (len < mss) {
1532 goto noop;
1533 } else {
1534 pcount = len / mss;
1535 len = pcount * mss;
1536 }
1537 }
1538
1539
1540 if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
1541 goto fallback;
1542
1543 if (!tcp_skb_shift(prev, skb, pcount, len))
1544 goto fallback;
1545 if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
1546 goto out;
1547
1548
1549
1550
1551 skb = skb_rb_next(prev);
1552 if (!skb)
1553 goto out;
1554
1555 if (!skb_can_shift(skb) ||
1556 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1557 (mss != tcp_skb_seglen(skb)))
1558 goto out;
1559
1560 len = skb->len;
1561 pcount = tcp_skb_pcount(skb);
1562 if (tcp_skb_shift(prev, skb, pcount, len))
1563 tcp_shifted_skb(sk, prev, skb, state, pcount,
1564 len, mss, 0);
1565
1566out:
1567 return prev;
1568
1569noop:
1570 return skb;
1571
1572fallback:
1573 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
1574 return NULL;
1575}
1576
1577static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1578 struct tcp_sack_block *next_dup,
1579 struct tcp_sacktag_state *state,
1580 u32 start_seq, u32 end_seq,
1581 bool dup_sack_in)
1582{
1583 struct tcp_sock *tp = tcp_sk(sk);
1584 struct sk_buff *tmp;
1585
1586 skb_rbtree_walk_from(skb) {
1587 int in_sack = 0;
1588 bool dup_sack = dup_sack_in;
1589
1590
1591 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1592 break;
1593
1594 if (next_dup &&
1595 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
1596 in_sack = tcp_match_skb_to_sack(sk, skb,
1597 next_dup->start_seq,
1598 next_dup->end_seq);
1599 if (in_sack > 0)
1600 dup_sack = true;
1601 }
1602
1603
1604
1605
1606
1607 if (in_sack <= 0) {
1608 tmp = tcp_shift_skb_data(sk, skb, state,
1609 start_seq, end_seq, dup_sack);
1610 if (tmp) {
1611 if (tmp != skb) {
1612 skb = tmp;
1613 continue;
1614 }
1615
1616 in_sack = 0;
1617 } else {
1618 in_sack = tcp_match_skb_to_sack(sk, skb,
1619 start_seq,
1620 end_seq);
1621 }
1622 }
1623
1624 if (unlikely(in_sack < 0))
1625 break;
1626
1627 if (in_sack) {
1628 TCP_SKB_CB(skb)->sacked =
1629 tcp_sacktag_one(sk,
1630 state,
1631 TCP_SKB_CB(skb)->sacked,
1632 TCP_SKB_CB(skb)->seq,
1633 TCP_SKB_CB(skb)->end_seq,
1634 dup_sack,
1635 tcp_skb_pcount(skb),
1636 tcp_skb_timestamp_us(skb));
1637 tcp_rate_skb_delivered(sk, skb, state->rate);
1638 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1639 list_del_init(&skb->tcp_tsorted_anchor);
1640
1641 if (!before(TCP_SKB_CB(skb)->seq,
1642 tcp_highest_sack_seq(tp)))
1643 tcp_advance_highest_sack(sk, skb);
1644 }
1645 }
1646 return skb;
1647}
1648
1649static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk, u32 seq)
1650{
1651 struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
1652 struct sk_buff *skb;
1653
1654 while (*p) {
1655 parent = *p;
1656 skb = rb_to_skb(parent);
1657 if (before(seq, TCP_SKB_CB(skb)->seq)) {
1658 p = &parent->rb_left;
1659 continue;
1660 }
1661 if (!before(seq, TCP_SKB_CB(skb)->end_seq)) {
1662 p = &parent->rb_right;
1663 continue;
1664 }
1665 return skb;
1666 }
1667 return NULL;
1668}
1669
1670static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1671 u32 skip_to_seq)
1672{
1673 if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq))
1674 return skb;
1675
1676 return tcp_sacktag_bsearch(sk, skip_to_seq);
1677}
1678
1679static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1680 struct sock *sk,
1681 struct tcp_sack_block *next_dup,
1682 struct tcp_sacktag_state *state,
1683 u32 skip_to_seq)
1684{
1685 if (!next_dup)
1686 return skb;
1687
1688 if (before(next_dup->start_seq, skip_to_seq)) {
1689 skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq);
1690 skb = tcp_sacktag_walk(skb, sk, NULL, state,
1691 next_dup->start_seq, next_dup->end_seq,
1692 1);
1693 }
1694
1695 return skb;
1696}
1697
1698static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
1699{
1700 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1701}
1702
1703static int
1704tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1705 u32 prior_snd_una, struct tcp_sacktag_state *state)
1706{
1707 struct tcp_sock *tp = tcp_sk(sk);
1708 const unsigned char *ptr = (skb_transport_header(ack_skb) +
1709 TCP_SKB_CB(ack_skb)->sacked);
1710 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1711 struct tcp_sack_block sp[TCP_NUM_SACKS];
1712 struct tcp_sack_block *cache;
1713 struct sk_buff *skb;
1714 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1715 int used_sacks;
1716 bool found_dup_sack = false;
1717 int i, j;
1718 int first_sack_index;
1719
1720 state->flag = 0;
1721 state->reord = tp->snd_nxt;
1722
1723 if (!tp->sacked_out)
1724 tcp_highest_sack_reset(sk);
1725
1726 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1727 num_sacks, prior_snd_una, state);
1728
1729
1730
1731
1732
1733 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
1734 return 0;
1735
1736 if (!tp->packets_out)
1737 goto out;
1738
1739 used_sacks = 0;
1740 first_sack_index = 0;
1741 for (i = 0; i < num_sacks; i++) {
1742 bool dup_sack = !i && found_dup_sack;
1743
1744 sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
1745 sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
1746
1747 if (!tcp_is_sackblock_valid(tp, dup_sack,
1748 sp[used_sacks].start_seq,
1749 sp[used_sacks].end_seq)) {
1750 int mib_idx;
1751
1752 if (dup_sack) {
1753 if (!tp->undo_marker)
1754 mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
1755 else
1756 mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
1757 } else {
1758
1759 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1760 !after(sp[used_sacks].end_seq, tp->snd_una))
1761 continue;
1762 mib_idx = LINUX_MIB_TCPSACKDISCARD;
1763 }
1764
1765 NET_INC_STATS(sock_net(sk), mib_idx);
1766 if (i == 0)
1767 first_sack_index = -1;
1768 continue;
1769 }
1770
1771
1772 if (!after(sp[used_sacks].end_seq, prior_snd_una)) {
1773 if (i == 0)
1774 first_sack_index = -1;
1775 continue;
1776 }
1777
1778 used_sacks++;
1779 }
1780
1781
1782 for (i = used_sacks - 1; i > 0; i--) {
1783 for (j = 0; j < i; j++) {
1784 if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
1785 swap(sp[j], sp[j + 1]);
1786
1787
1788 if (j == first_sack_index)
1789 first_sack_index = j + 1;
1790 }
1791 }
1792 }
1793
1794 state->mss_now = tcp_current_mss(sk);
1795 skb = NULL;
1796 i = 0;
1797
1798 if (!tp->sacked_out) {
1799
1800 cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1801 } else {
1802 cache = tp->recv_sack_cache;
1803
1804 while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
1805 !cache->end_seq)
1806 cache++;
1807 }
1808
1809 while (i < used_sacks) {
1810 u32 start_seq = sp[i].start_seq;
1811 u32 end_seq = sp[i].end_seq;
1812 bool dup_sack = (found_dup_sack && (i == first_sack_index));
1813 struct tcp_sack_block *next_dup = NULL;
1814
1815 if (found_dup_sack && ((i + 1) == first_sack_index))
1816 next_dup = &sp[i + 1];
1817
1818
1819 while (tcp_sack_cache_ok(tp, cache) &&
1820 !before(start_seq, cache->end_seq))
1821 cache++;
1822
1823
1824 if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
1825 after(end_seq, cache->start_seq)) {
1826
1827
1828 if (before(start_seq, cache->start_seq)) {
1829 skb = tcp_sacktag_skip(skb, sk, start_seq);
1830 skb = tcp_sacktag_walk(skb, sk, next_dup,
1831 state,
1832 start_seq,
1833 cache->start_seq,
1834 dup_sack);
1835 }
1836
1837
1838 if (!after(end_seq, cache->end_seq))
1839 goto advance_sp;
1840
1841 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1842 state,
1843 cache->end_seq);
1844
1845
1846 if (tcp_highest_sack_seq(tp) == cache->end_seq) {
1847
1848 skb = tcp_highest_sack(sk);
1849 if (!skb)
1850 break;
1851 cache++;
1852 goto walk;
1853 }
1854
1855 skb = tcp_sacktag_skip(skb, sk, cache->end_seq);
1856
1857 cache++;
1858 continue;
1859 }
1860
1861 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
1862 skb = tcp_highest_sack(sk);
1863 if (!skb)
1864 break;
1865 }
1866 skb = tcp_sacktag_skip(skb, sk, start_seq);
1867
1868walk:
1869 skb = tcp_sacktag_walk(skb, sk, next_dup, state,
1870 start_seq, end_seq, dup_sack);
1871
1872advance_sp:
1873 i++;
1874 }
1875
1876
1877 for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
1878 tp->recv_sack_cache[i].start_seq = 0;
1879 tp->recv_sack_cache[i].end_seq = 0;
1880 }
1881 for (j = 0; j < used_sacks; j++)
1882 tp->recv_sack_cache[i++] = sp[j];
1883
1884 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss || tp->undo_marker)
1885 tcp_check_sack_reordering(sk, state->reord, 0);
1886
1887 tcp_verify_left_out(tp);
1888out:
1889
1890#if FASTRETRANS_DEBUG > 0
1891 WARN_ON((int)tp->sacked_out < 0);
1892 WARN_ON((int)tp->lost_out < 0);
1893 WARN_ON((int)tp->retrans_out < 0);
1894 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1895#endif
1896 return state->flag;
1897}
1898
1899
1900
1901
1902static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
1903{
1904 u32 holes;
1905
1906 holes = max(tp->lost_out, 1U);
1907 holes = min(holes, tp->packets_out);
1908
1909 if ((tp->sacked_out + holes) > tp->packets_out) {
1910 tp->sacked_out = tp->packets_out - holes;
1911 return true;
1912 }
1913 return false;
1914}
1915
1916
1917
1918
1919
1920static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1921{
1922 struct tcp_sock *tp = tcp_sk(sk);
1923
1924 if (!tcp_limit_reno_sacked(tp))
1925 return;
1926
1927 tp->reordering = min_t(u32, tp->packets_out + addend,
1928 sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
1929 tp->reord_seen++;
1930 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
1931}
1932
1933
1934
1935static void tcp_add_reno_sack(struct sock *sk, int num_dupack, bool ece_ack)
1936{
1937 if (num_dupack) {
1938 struct tcp_sock *tp = tcp_sk(sk);
1939 u32 prior_sacked = tp->sacked_out;
1940 s32 delivered;
1941
1942 tp->sacked_out += num_dupack;
1943 tcp_check_reno_reordering(sk, 0);
1944 delivered = tp->sacked_out - prior_sacked;
1945 if (delivered > 0)
1946 tcp_count_delivered(tp, delivered, ece_ack);
1947 tcp_verify_left_out(tp);
1948 }
1949}
1950
1951
1952
1953static void tcp_remove_reno_sacks(struct sock *sk, int acked, bool ece_ack)
1954{
1955 struct tcp_sock *tp = tcp_sk(sk);
1956
1957 if (acked > 0) {
1958
1959 tcp_count_delivered(tp, max_t(int, acked - tp->sacked_out, 1),
1960 ece_ack);
1961 if (acked - 1 >= tp->sacked_out)
1962 tp->sacked_out = 0;
1963 else
1964 tp->sacked_out -= acked - 1;
1965 }
1966 tcp_check_reno_reordering(sk, acked);
1967 tcp_verify_left_out(tp);
1968}
1969
1970static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1971{
1972 tp->sacked_out = 0;
1973}
1974
1975void tcp_clear_retrans(struct tcp_sock *tp)
1976{
1977 tp->retrans_out = 0;
1978 tp->lost_out = 0;
1979 tp->undo_marker = 0;
1980 tp->undo_retrans = -1;
1981 tp->sacked_out = 0;
1982}
1983
1984static inline void tcp_init_undo(struct tcp_sock *tp)
1985{
1986 tp->undo_marker = tp->snd_una;
1987
1988 tp->undo_retrans = tp->retrans_out ? : -1;
1989}
1990
1991static bool tcp_is_rack(const struct sock *sk)
1992{
1993 return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION;
1994}
1995
1996
1997
1998
1999
2000static void tcp_timeout_mark_lost(struct sock *sk)
2001{
2002 struct tcp_sock *tp = tcp_sk(sk);
2003 struct sk_buff *skb, *head;
2004 bool is_reneg;
2005
2006 head = tcp_rtx_queue_head(sk);
2007 is_reneg = head && (TCP_SKB_CB(head)->sacked & TCPCB_SACKED_ACKED);
2008 if (is_reneg) {
2009 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
2010 tp->sacked_out = 0;
2011
2012 tp->is_sack_reneg = 1;
2013 } else if (tcp_is_reno(tp)) {
2014 tcp_reset_reno_sack(tp);
2015 }
2016
2017 skb = head;
2018 skb_rbtree_walk_from(skb) {
2019 if (is_reneg)
2020 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
2021 else if (tcp_is_rack(sk) && skb != head &&
2022 tcp_rack_skb_timeout(tp, skb, 0) > 0)
2023 continue;
2024 tcp_mark_skb_lost(sk, skb);
2025 }
2026 tcp_verify_left_out(tp);
2027 tcp_clear_all_retrans_hints(tp);
2028}
2029
2030
2031void tcp_enter_loss(struct sock *sk)
2032{
2033 const struct inet_connection_sock *icsk = inet_csk(sk);
2034 struct tcp_sock *tp = tcp_sk(sk);
2035 struct net *net = sock_net(sk);
2036 bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
2037
2038 tcp_timeout_mark_lost(sk);
2039
2040
2041 if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
2042 !after(tp->high_seq, tp->snd_una) ||
2043 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
2044 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2045 tp->prior_cwnd = tp->snd_cwnd;
2046 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2047 tcp_ca_event(sk, CA_EVENT_LOSS);
2048 tcp_init_undo(tp);
2049 }
2050 tp->snd_cwnd = tcp_packets_in_flight(tp) + 1;
2051 tp->snd_cwnd_cnt = 0;
2052 tp->snd_cwnd_stamp = tcp_jiffies32;
2053
2054
2055
2056
2057 if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
2058 tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
2059 tp->reordering = min_t(unsigned int, tp->reordering,
2060 net->ipv4.sysctl_tcp_reordering);
2061 tcp_set_ca_state(sk, TCP_CA_Loss);
2062 tp->high_seq = tp->snd_nxt;
2063 tcp_ecn_queue_cwr(tp);
2064
2065
2066
2067
2068
2069 tp->frto = net->ipv4.sysctl_tcp_frto &&
2070 (new_recovery || icsk->icsk_retransmits) &&
2071 !inet_csk(sk)->icsk_mtup.probe_size;
2072}
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084static bool tcp_check_sack_reneging(struct sock *sk, int flag)
2085{
2086 if (flag & FLAG_SACK_RENEGING) {
2087 struct tcp_sock *tp = tcp_sk(sk);
2088 unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
2089 msecs_to_jiffies(10));
2090
2091 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2092 delay, TCP_RTO_MAX);
2093 return true;
2094 }
2095 return false;
2096}
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
2110{
2111 return tp->sacked_out + 1;
2112}
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211static bool tcp_time_to_recover(struct sock *sk, int flag)
2212{
2213 struct tcp_sock *tp = tcp_sk(sk);
2214
2215
2216 if (tp->lost_out)
2217 return true;
2218
2219
2220 if (!tcp_is_rack(sk) && tcp_dupack_heuristics(tp) > tp->reordering)
2221 return true;
2222
2223 return false;
2224}
2225
2226
2227
2228
2229
2230
2231static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2232{
2233 struct tcp_sock *tp = tcp_sk(sk);
2234 struct sk_buff *skb;
2235 int cnt;
2236
2237 const u32 loss_high = tp->snd_nxt;
2238
2239 WARN_ON(packets > tp->packets_out);
2240 skb = tp->lost_skb_hint;
2241 if (skb) {
2242
2243 if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una))
2244 return;
2245 cnt = tp->lost_cnt_hint;
2246 } else {
2247 skb = tcp_rtx_queue_head(sk);
2248 cnt = 0;
2249 }
2250
2251 skb_rbtree_walk_from(skb) {
2252
2253
2254 tp->lost_skb_hint = skb;
2255 tp->lost_cnt_hint = cnt;
2256
2257 if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
2258 break;
2259
2260 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2261 cnt += tcp_skb_pcount(skb);
2262
2263 if (cnt > packets)
2264 break;
2265
2266 tcp_skb_mark_lost(tp, skb);
2267
2268 if (mark_head)
2269 break;
2270 }
2271 tcp_verify_left_out(tp);
2272}
2273
2274
2275
2276static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2277{
2278 struct tcp_sock *tp = tcp_sk(sk);
2279
2280 if (tcp_is_sack(tp)) {
2281 int sacked_upto = tp->sacked_out - tp->reordering;
2282 if (sacked_upto >= 0)
2283 tcp_mark_head_lost(sk, sacked_upto, 0);
2284 else if (fast_rexmit)
2285 tcp_mark_head_lost(sk, 1, 1);
2286 }
2287}
2288
2289static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when)
2290{
2291 return tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2292 before(tp->rx_opt.rcv_tsecr, when);
2293}
2294
2295
2296
2297
2298static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
2299 const struct sk_buff *skb)
2300{
2301 return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) &&
2302 tcp_tsopt_ecr_before(tp, tcp_skb_timestamp(skb));
2303}
2304
2305
2306
2307
2308static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
2309{
2310 return tp->retrans_stamp &&
2311 tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
2312}
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330static bool tcp_any_retrans_done(const struct sock *sk)
2331{
2332 const struct tcp_sock *tp = tcp_sk(sk);
2333 struct sk_buff *skb;
2334
2335 if (tp->retrans_out)
2336 return true;
2337
2338 skb = tcp_rtx_queue_head(sk);
2339 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2340 return true;
2341
2342 return false;
2343}
2344
2345static void DBGUNDO(struct sock *sk, const char *msg)
2346{
2347#if FASTRETRANS_DEBUG > 1
2348 struct tcp_sock *tp = tcp_sk(sk);
2349 struct inet_sock *inet = inet_sk(sk);
2350
2351 if (sk->sk_family == AF_INET) {
2352 pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2353 msg,
2354 &inet->inet_daddr, ntohs(inet->inet_dport),
2355 tp->snd_cwnd, tcp_left_out(tp),
2356 tp->snd_ssthresh, tp->prior_ssthresh,
2357 tp->packets_out);
2358 }
2359#if IS_ENABLED(CONFIG_IPV6)
2360 else if (sk->sk_family == AF_INET6) {
2361 pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2362 msg,
2363 &sk->sk_v6_daddr, ntohs(inet->inet_dport),
2364 tp->snd_cwnd, tcp_left_out(tp),
2365 tp->snd_ssthresh, tp->prior_ssthresh,
2366 tp->packets_out);
2367 }
2368#endif
2369#endif
2370}
2371
2372static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
2373{
2374 struct tcp_sock *tp = tcp_sk(sk);
2375
2376 if (unmark_loss) {
2377 struct sk_buff *skb;
2378
2379 skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
2380 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2381 }
2382 tp->lost_out = 0;
2383 tcp_clear_all_retrans_hints(tp);
2384 }
2385
2386 if (tp->prior_ssthresh) {
2387 const struct inet_connection_sock *icsk = inet_csk(sk);
2388
2389 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
2390
2391 if (tp->prior_ssthresh > tp->snd_ssthresh) {
2392 tp->snd_ssthresh = tp->prior_ssthresh;
2393 tcp_ecn_withdraw_cwr(tp);
2394 }
2395 }
2396 tp->snd_cwnd_stamp = tcp_jiffies32;
2397 tp->undo_marker = 0;
2398 tp->rack.advanced = 1;
2399}
2400
2401static inline bool tcp_may_undo(const struct tcp_sock *tp)
2402{
2403 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2404}
2405
2406
2407static bool tcp_try_undo_recovery(struct sock *sk)
2408{
2409 struct tcp_sock *tp = tcp_sk(sk);
2410
2411 if (tcp_may_undo(tp)) {
2412 int mib_idx;
2413
2414
2415
2416
2417 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2418 tcp_undo_cwnd_reduction(sk, false);
2419 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2420 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2421 else
2422 mib_idx = LINUX_MIB_TCPFULLUNDO;
2423
2424 NET_INC_STATS(sock_net(sk), mib_idx);
2425 } else if (tp->rack.reo_wnd_persist) {
2426 tp->rack.reo_wnd_persist--;
2427 }
2428 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2429
2430
2431
2432 if (!tcp_any_retrans_done(sk))
2433 tp->retrans_stamp = 0;
2434 return true;
2435 }
2436 tcp_set_ca_state(sk, TCP_CA_Open);
2437 tp->is_sack_reneg = 0;
2438 return false;
2439}
2440
2441
2442static bool tcp_try_undo_dsack(struct sock *sk)
2443{
2444 struct tcp_sock *tp = tcp_sk(sk);
2445
2446 if (tp->undo_marker && !tp->undo_retrans) {
2447 tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH,
2448 tp->rack.reo_wnd_persist + 1);
2449 DBGUNDO(sk, "D-SACK");
2450 tcp_undo_cwnd_reduction(sk, false);
2451 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2452 return true;
2453 }
2454 return false;
2455}
2456
2457
2458static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
2459{
2460 struct tcp_sock *tp = tcp_sk(sk);
2461
2462 if (frto_undo || tcp_may_undo(tp)) {
2463 tcp_undo_cwnd_reduction(sk, true);
2464
2465 DBGUNDO(sk, "partial loss");
2466 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2467 if (frto_undo)
2468 NET_INC_STATS(sock_net(sk),
2469 LINUX_MIB_TCPSPURIOUSRTOS);
2470 inet_csk(sk)->icsk_retransmits = 0;
2471 if (frto_undo || tcp_is_sack(tp)) {
2472 tcp_set_ca_state(sk, TCP_CA_Open);
2473 tp->is_sack_reneg = 0;
2474 }
2475 return true;
2476 }
2477 return false;
2478}
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489static void tcp_init_cwnd_reduction(struct sock *sk)
2490{
2491 struct tcp_sock *tp = tcp_sk(sk);
2492
2493 tp->high_seq = tp->snd_nxt;
2494 tp->tlp_high_seq = 0;
2495 tp->snd_cwnd_cnt = 0;
2496 tp->prior_cwnd = tp->snd_cwnd;
2497 tp->prr_delivered = 0;
2498 tp->prr_out = 0;
2499 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
2500 tcp_ecn_queue_cwr(tp);
2501}
2502
2503void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int flag)
2504{
2505 struct tcp_sock *tp = tcp_sk(sk);
2506 int sndcnt = 0;
2507 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2508
2509 if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd))
2510 return;
2511
2512 tp->prr_delivered += newly_acked_sacked;
2513 if (delta < 0) {
2514 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2515 tp->prior_cwnd - 1;
2516 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2517 } else if ((flag & (FLAG_RETRANS_DATA_ACKED | FLAG_LOST_RETRANS)) ==
2518 FLAG_RETRANS_DATA_ACKED) {
2519 sndcnt = min_t(int, delta,
2520 max_t(int, tp->prr_delivered - tp->prr_out,
2521 newly_acked_sacked) + 1);
2522 } else {
2523 sndcnt = min(delta, newly_acked_sacked);
2524 }
2525
2526 sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
2527 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2528}
2529
2530static inline void tcp_end_cwnd_reduction(struct sock *sk)
2531{
2532 struct tcp_sock *tp = tcp_sk(sk);
2533
2534 if (inet_csk(sk)->icsk_ca_ops->cong_control)
2535 return;
2536
2537
2538 if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
2539 (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
2540 tp->snd_cwnd = tp->snd_ssthresh;
2541 tp->snd_cwnd_stamp = tcp_jiffies32;
2542 }
2543 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2544}
2545
2546
2547void tcp_enter_cwr(struct sock *sk)
2548{
2549 struct tcp_sock *tp = tcp_sk(sk);
2550
2551 tp->prior_ssthresh = 0;
2552 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2553 tp->undo_marker = 0;
2554 tcp_init_cwnd_reduction(sk);
2555 tcp_set_ca_state(sk, TCP_CA_CWR);
2556 }
2557}
2558EXPORT_SYMBOL(tcp_enter_cwr);
2559
2560static void tcp_try_keep_open(struct sock *sk)
2561{
2562 struct tcp_sock *tp = tcp_sk(sk);
2563 int state = TCP_CA_Open;
2564
2565 if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
2566 state = TCP_CA_Disorder;
2567
2568 if (inet_csk(sk)->icsk_ca_state != state) {
2569 tcp_set_ca_state(sk, state);
2570 tp->high_seq = tp->snd_nxt;
2571 }
2572}
2573
2574static void tcp_try_to_open(struct sock *sk, int flag)
2575{
2576 struct tcp_sock *tp = tcp_sk(sk);
2577
2578 tcp_verify_left_out(tp);
2579
2580 if (!tcp_any_retrans_done(sk))
2581 tp->retrans_stamp = 0;
2582
2583 if (flag & FLAG_ECE)
2584 tcp_enter_cwr(sk);
2585
2586 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2587 tcp_try_keep_open(sk);
2588 }
2589}
2590
2591static void tcp_mtup_probe_failed(struct sock *sk)
2592{
2593 struct inet_connection_sock *icsk = inet_csk(sk);
2594
2595 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2596 icsk->icsk_mtup.probe_size = 0;
2597 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPFAIL);
2598}
2599
2600static void tcp_mtup_probe_success(struct sock *sk)
2601{
2602 struct tcp_sock *tp = tcp_sk(sk);
2603 struct inet_connection_sock *icsk = inet_csk(sk);
2604
2605
2606 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2607 tp->snd_cwnd = tp->snd_cwnd *
2608 tcp_mss_to_mtu(sk, tp->mss_cache) /
2609 icsk->icsk_mtup.probe_size;
2610 tp->snd_cwnd_cnt = 0;
2611 tp->snd_cwnd_stamp = tcp_jiffies32;
2612 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2613
2614 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2615 icsk->icsk_mtup.probe_size = 0;
2616 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2617 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
2618}
2619
2620
2621
2622
2623
2624void tcp_simple_retransmit(struct sock *sk)
2625{
2626 const struct inet_connection_sock *icsk = inet_csk(sk);
2627 struct tcp_sock *tp = tcp_sk(sk);
2628 struct sk_buff *skb;
2629 unsigned int mss = tcp_current_mss(sk);
2630
2631 skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
2632 if (tcp_skb_seglen(skb) > mss &&
2633 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2634 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2635 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2636 tp->retrans_out -= tcp_skb_pcount(skb);
2637 }
2638 tcp_skb_mark_lost_uncond_verify(tp, skb);
2639 }
2640 }
2641
2642 tcp_clear_retrans_hints_partial(tp);
2643
2644 if (!tp->lost_out)
2645 return;
2646
2647 if (tcp_is_reno(tp))
2648 tcp_limit_reno_sacked(tp);
2649
2650 tcp_verify_left_out(tp);
2651
2652
2653
2654
2655
2656
2657 if (icsk->icsk_ca_state != TCP_CA_Loss) {
2658 tp->high_seq = tp->snd_nxt;
2659 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2660 tp->prior_ssthresh = 0;
2661 tp->undo_marker = 0;
2662 tcp_set_ca_state(sk, TCP_CA_Loss);
2663 }
2664 tcp_xmit_retransmit_queue(sk);
2665}
2666EXPORT_SYMBOL(tcp_simple_retransmit);
2667
2668void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2669{
2670 struct tcp_sock *tp = tcp_sk(sk);
2671 int mib_idx;
2672
2673 if (tcp_is_reno(tp))
2674 mib_idx = LINUX_MIB_TCPRENORECOVERY;
2675 else
2676 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
2677
2678 NET_INC_STATS(sock_net(sk), mib_idx);
2679
2680 tp->prior_ssthresh = 0;
2681 tcp_init_undo(tp);
2682
2683 if (!tcp_in_cwnd_reduction(sk)) {
2684 if (!ece_ack)
2685 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2686 tcp_init_cwnd_reduction(sk);
2687 }
2688 tcp_set_ca_state(sk, TCP_CA_Recovery);
2689}
2690
2691
2692
2693
2694static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
2695 int *rexmit)
2696{
2697 struct tcp_sock *tp = tcp_sk(sk);
2698 bool recovered = !before(tp->snd_una, tp->high_seq);
2699
2700 if ((flag & FLAG_SND_UNA_ADVANCED || rcu_access_pointer(tp->fastopen_rsk)) &&
2701 tcp_try_undo_loss(sk, false))
2702 return;
2703
2704 if (tp->frto) {
2705
2706
2707
2708 if ((flag & FLAG_ORIG_SACK_ACKED) &&
2709 tcp_try_undo_loss(sk, true))
2710 return;
2711
2712 if (after(tp->snd_nxt, tp->high_seq)) {
2713 if (flag & FLAG_DATA_SACKED || num_dupack)
2714 tp->frto = 0;
2715 } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
2716 tp->high_seq = tp->snd_nxt;
2717
2718
2719
2720
2721 if (!tcp_write_queue_empty(sk) &&
2722 after(tcp_wnd_end(tp), tp->snd_nxt)) {
2723 *rexmit = REXMIT_NEW;
2724 return;
2725 }
2726 tp->frto = 0;
2727 }
2728 }
2729
2730 if (recovered) {
2731
2732 tcp_try_undo_recovery(sk);
2733 return;
2734 }
2735 if (tcp_is_reno(tp)) {
2736
2737
2738
2739 if (after(tp->snd_nxt, tp->high_seq) && num_dupack)
2740 tcp_add_reno_sack(sk, num_dupack, flag & FLAG_ECE);
2741 else if (flag & FLAG_SND_UNA_ADVANCED)
2742 tcp_reset_reno_sack(tp);
2743 }
2744 *rexmit = REXMIT_LOST;
2745}
2746
2747
2748static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
2749{
2750 struct tcp_sock *tp = tcp_sk(sk);
2751
2752 if (tp->undo_marker && tcp_packet_delayed(tp)) {
2753
2754
2755
2756 tcp_check_sack_reordering(sk, prior_snd_una, 1);
2757
2758
2759
2760
2761
2762
2763 if (tp->retrans_out)
2764 return true;
2765
2766 if (!tcp_any_retrans_done(sk))
2767 tp->retrans_stamp = 0;
2768
2769 DBGUNDO(sk, "partial recovery");
2770 tcp_undo_cwnd_reduction(sk, true);
2771 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2772 tcp_try_keep_open(sk);
2773 return true;
2774 }
2775 return false;
2776}
2777
2778static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag)
2779{
2780 struct tcp_sock *tp = tcp_sk(sk);
2781
2782 if (tcp_rtx_queue_empty(sk))
2783 return;
2784
2785 if (unlikely(tcp_is_reno(tp))) {
2786 tcp_newreno_mark_lost(sk, *ack_flag & FLAG_SND_UNA_ADVANCED);
2787 } else if (tcp_is_rack(sk)) {
2788 u32 prior_retrans = tp->retrans_out;
2789
2790 tcp_rack_mark_lost(sk);
2791 if (prior_retrans > tp->retrans_out)
2792 *ack_flag |= FLAG_LOST_RETRANS;
2793 }
2794}
2795
2796static bool tcp_force_fast_retransmit(struct sock *sk)
2797{
2798 struct tcp_sock *tp = tcp_sk(sk);
2799
2800 return after(tcp_highest_sack_seq(tp),
2801 tp->snd_una + tp->reordering * tp->mss_cache);
2802}
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
2817 int num_dupack, int *ack_flag, int *rexmit)
2818{
2819 struct inet_connection_sock *icsk = inet_csk(sk);
2820 struct tcp_sock *tp = tcp_sk(sk);
2821 int fast_rexmit = 0, flag = *ack_flag;
2822 bool ece_ack = flag & FLAG_ECE;
2823 bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) &&
2824 tcp_force_fast_retransmit(sk));
2825
2826 if (!tp->packets_out && tp->sacked_out)
2827 tp->sacked_out = 0;
2828
2829
2830
2831 if (ece_ack)
2832 tp->prior_ssthresh = 0;
2833
2834
2835 if (tcp_check_sack_reneging(sk, flag))
2836 return;
2837
2838
2839 tcp_verify_left_out(tp);
2840
2841
2842
2843 if (icsk->icsk_ca_state == TCP_CA_Open) {
2844 WARN_ON(tp->retrans_out != 0);
2845 tp->retrans_stamp = 0;
2846 } else if (!before(tp->snd_una, tp->high_seq)) {
2847 switch (icsk->icsk_ca_state) {
2848 case TCP_CA_CWR:
2849
2850
2851 if (tp->snd_una != tp->high_seq) {
2852 tcp_end_cwnd_reduction(sk);
2853 tcp_set_ca_state(sk, TCP_CA_Open);
2854 }
2855 break;
2856
2857 case TCP_CA_Recovery:
2858 if (tcp_is_reno(tp))
2859 tcp_reset_reno_sack(tp);
2860 if (tcp_try_undo_recovery(sk))
2861 return;
2862 tcp_end_cwnd_reduction(sk);
2863 break;
2864 }
2865 }
2866
2867
2868 switch (icsk->icsk_ca_state) {
2869 case TCP_CA_Recovery:
2870 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
2871 if (tcp_is_reno(tp))
2872 tcp_add_reno_sack(sk, num_dupack, ece_ack);
2873 } else {
2874 if (tcp_try_undo_partial(sk, prior_snd_una))
2875 return;
2876
2877 do_lost = tcp_force_fast_retransmit(sk);
2878 }
2879 if (tcp_try_undo_dsack(sk)) {
2880 tcp_try_keep_open(sk);
2881 return;
2882 }
2883 tcp_identify_packet_loss(sk, ack_flag);
2884 break;
2885 case TCP_CA_Loss:
2886 tcp_process_loss(sk, flag, num_dupack, rexmit);
2887 tcp_identify_packet_loss(sk, ack_flag);
2888 if (!(icsk->icsk_ca_state == TCP_CA_Open ||
2889 (*ack_flag & FLAG_LOST_RETRANS)))
2890 return;
2891
2892 fallthrough;
2893 default:
2894 if (tcp_is_reno(tp)) {
2895 if (flag & FLAG_SND_UNA_ADVANCED)
2896 tcp_reset_reno_sack(tp);
2897 tcp_add_reno_sack(sk, num_dupack, ece_ack);
2898 }
2899
2900 if (icsk->icsk_ca_state <= TCP_CA_Disorder)
2901 tcp_try_undo_dsack(sk);
2902
2903 tcp_identify_packet_loss(sk, ack_flag);
2904 if (!tcp_time_to_recover(sk, flag)) {
2905 tcp_try_to_open(sk, flag);
2906 return;
2907 }
2908
2909
2910 if (icsk->icsk_ca_state < TCP_CA_CWR &&
2911 icsk->icsk_mtup.probe_size &&
2912 tp->snd_una == tp->mtu_probe.probe_seq_start) {
2913 tcp_mtup_probe_failed(sk);
2914
2915 tp->snd_cwnd++;
2916 tcp_simple_retransmit(sk);
2917 return;
2918 }
2919
2920
2921 tcp_enter_recovery(sk, ece_ack);
2922 fast_rexmit = 1;
2923 }
2924
2925 if (!tcp_is_rack(sk) && do_lost)
2926 tcp_update_scoreboard(sk, fast_rexmit);
2927 *rexmit = REXMIT_LOST;
2928}
2929
2930static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
2931{
2932 u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
2933 struct tcp_sock *tp = tcp_sk(sk);
2934
2935 if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
2936
2937
2938
2939
2940 return;
2941 }
2942 minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
2943 rtt_us ? : jiffies_to_usecs(1));
2944}
2945
2946static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
2947 long seq_rtt_us, long sack_rtt_us,
2948 long ca_rtt_us, struct rate_sample *rs)
2949{
2950 const struct tcp_sock *tp = tcp_sk(sk);
2951
2952
2953
2954
2955
2956
2957 if (seq_rtt_us < 0)
2958 seq_rtt_us = sack_rtt_us;
2959
2960
2961
2962
2963
2964
2965
2966 if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2967 flag & FLAG_ACKED) {
2968 u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
2969
2970 if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
2971 if (!delta)
2972 delta = 1;
2973 seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
2974 ca_rtt_us = seq_rtt_us;
2975 }
2976 }
2977 rs->rtt_us = ca_rtt_us;
2978 if (seq_rtt_us < 0)
2979 return false;
2980
2981
2982
2983
2984
2985 tcp_update_rtt_min(sk, ca_rtt_us, flag);
2986 tcp_rtt_estimator(sk, seq_rtt_us);
2987 tcp_set_rto(sk);
2988
2989
2990 inet_csk(sk)->icsk_backoff = 0;
2991 return true;
2992}
2993
2994
2995void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
2996{
2997 struct rate_sample rs;
2998 long rtt_us = -1L;
2999
3000 if (req && !req->num_retrans && tcp_rsk(req)->snt_synack)
3001 rtt_us = tcp_stamp_us_delta(tcp_clock_us(), tcp_rsk(req)->snt_synack);
3002
3003 tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us, &rs);
3004}
3005
3006
3007static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
3008{
3009 const struct inet_connection_sock *icsk = inet_csk(sk);
3010
3011 icsk->icsk_ca_ops->cong_avoid(sk, ack, acked);
3012 tcp_sk(sk)->snd_cwnd_stamp = tcp_jiffies32;
3013}
3014
3015
3016
3017
3018void tcp_rearm_rto(struct sock *sk)
3019{
3020 const struct inet_connection_sock *icsk = inet_csk(sk);
3021 struct tcp_sock *tp = tcp_sk(sk);
3022
3023
3024
3025
3026 if (rcu_access_pointer(tp->fastopen_rsk))
3027 return;
3028
3029 if (!tp->packets_out) {
3030 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
3031 } else {
3032 u32 rto = inet_csk(sk)->icsk_rto;
3033
3034 if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
3035 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
3036 s64 delta_us = tcp_rto_delta_us(sk);
3037
3038
3039
3040 rto = usecs_to_jiffies(max_t(int, delta_us, 1));
3041 }
3042 tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
3043 TCP_RTO_MAX);
3044 }
3045}
3046
3047
3048static void tcp_set_xmit_timer(struct sock *sk)
3049{
3050 if (!tcp_schedule_loss_probe(sk, true))
3051 tcp_rearm_rto(sk);
3052}
3053
3054
3055static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
3056{
3057 struct tcp_sock *tp = tcp_sk(sk);
3058 u32 packets_acked;
3059
3060 BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
3061
3062 packets_acked = tcp_skb_pcount(skb);
3063 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
3064 return 0;
3065 packets_acked -= tcp_skb_pcount(skb);
3066
3067 if (packets_acked) {
3068 BUG_ON(tcp_skb_pcount(skb) == 0);
3069 BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
3070 }
3071
3072 return packets_acked;
3073}
3074
3075static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
3076 u32 prior_snd_una)
3077{
3078 const struct skb_shared_info *shinfo;
3079
3080
3081 if (likely(!TCP_SKB_CB(skb)->txstamp_ack))
3082 return;
3083
3084 shinfo = skb_shinfo(skb);
3085 if (!before(shinfo->tskey, prior_snd_una) &&
3086 before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
3087 tcp_skb_tsorted_save(skb) {
3088 __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
3089 } tcp_skb_tsorted_restore(skb);
3090 }
3091}
3092
3093
3094
3095
3096
3097static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
3098 u32 prior_snd_una,
3099 struct tcp_sacktag_state *sack, bool ece_ack)
3100{
3101 const struct inet_connection_sock *icsk = inet_csk(sk);
3102 u64 first_ackt, last_ackt;
3103 struct tcp_sock *tp = tcp_sk(sk);
3104 u32 prior_sacked = tp->sacked_out;
3105 u32 reord = tp->snd_nxt;
3106 struct sk_buff *skb, *next;
3107 bool fully_acked = true;
3108 long sack_rtt_us = -1L;
3109 long seq_rtt_us = -1L;
3110 long ca_rtt_us = -1L;
3111 u32 pkts_acked = 0;
3112 u32 last_in_flight = 0;
3113 bool rtt_update;
3114 int flag = 0;
3115
3116 first_ackt = 0;
3117
3118 for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
3119 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3120 const u32 start_seq = scb->seq;
3121 u8 sacked = scb->sacked;
3122 u32 acked_pcount;
3123
3124
3125 if (after(scb->end_seq, tp->snd_una)) {
3126 if (tcp_skb_pcount(skb) == 1 ||
3127 !after(tp->snd_una, scb->seq))
3128 break;
3129
3130 acked_pcount = tcp_tso_acked(sk, skb);
3131 if (!acked_pcount)
3132 break;
3133 fully_acked = false;
3134 } else {
3135 acked_pcount = tcp_skb_pcount(skb);
3136 }
3137
3138 if (unlikely(sacked & TCPCB_RETRANS)) {
3139 if (sacked & TCPCB_SACKED_RETRANS)
3140 tp->retrans_out -= acked_pcount;
3141 flag |= FLAG_RETRANS_DATA_ACKED;
3142 } else if (!(sacked & TCPCB_SACKED_ACKED)) {
3143 last_ackt = tcp_skb_timestamp_us(skb);
3144 WARN_ON_ONCE(last_ackt == 0);
3145 if (!first_ackt)
3146 first_ackt = last_ackt;
3147
3148 last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
3149 if (before(start_seq, reord))
3150 reord = start_seq;
3151 if (!after(scb->end_seq, tp->high_seq))
3152 flag |= FLAG_ORIG_SACK_ACKED;
3153 }
3154
3155 if (sacked & TCPCB_SACKED_ACKED) {
3156 tp->sacked_out -= acked_pcount;
3157 } else if (tcp_is_sack(tp)) {
3158 tcp_count_delivered(tp, acked_pcount, ece_ack);
3159 if (!tcp_skb_spurious_retrans(tp, skb))
3160 tcp_rack_advance(tp, sacked, scb->end_seq,
3161 tcp_skb_timestamp_us(skb));
3162 }
3163 if (sacked & TCPCB_LOST)
3164 tp->lost_out -= acked_pcount;
3165
3166 tp->packets_out -= acked_pcount;
3167 pkts_acked += acked_pcount;
3168 tcp_rate_skb_delivered(sk, skb, sack->rate);
3169
3170
3171
3172
3173
3174
3175
3176
3177 if (likely(!(scb->tcp_flags & TCPHDR_SYN))) {
3178 flag |= FLAG_DATA_ACKED;
3179 } else {
3180 flag |= FLAG_SYN_ACKED;
3181 tp->retrans_stamp = 0;
3182 }
3183
3184 if (!fully_acked)
3185 break;
3186
3187 tcp_ack_tstamp(sk, skb, prior_snd_una);
3188
3189 next = skb_rb_next(skb);
3190 if (unlikely(skb == tp->retransmit_skb_hint))
3191 tp->retransmit_skb_hint = NULL;
3192 if (unlikely(skb == tp->lost_skb_hint))
3193 tp->lost_skb_hint = NULL;
3194 tcp_highest_sack_replace(sk, skb, next);
3195 tcp_rtx_queue_unlink_and_free(skb, sk);
3196 }
3197
3198 if (!skb)
3199 tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
3200
3201 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
3202 tp->snd_up = tp->snd_una;
3203
3204 if (skb) {
3205 tcp_ack_tstamp(sk, skb, prior_snd_una);
3206 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
3207 flag |= FLAG_SACK_RENEGING;
3208 }
3209
3210 if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
3211 seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
3212 ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
3213
3214 if (pkts_acked == 1 && last_in_flight < tp->mss_cache &&
3215 last_in_flight && !prior_sacked && fully_acked &&
3216 sack->rate->prior_delivered + 1 == tp->delivered &&
3217 !(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) {
3218
3219
3220
3221
3222 flag |= FLAG_ACK_MAYBE_DELAYED;
3223 }
3224 }
3225 if (sack->first_sackt) {
3226 sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt);
3227 ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->last_sackt);
3228 }
3229 rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
3230 ca_rtt_us, sack->rate);
3231
3232 if (flag & FLAG_ACKED) {
3233 flag |= FLAG_SET_XMIT_TIMER;
3234 if (unlikely(icsk->icsk_mtup.probe_size &&
3235 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
3236 tcp_mtup_probe_success(sk);
3237 }
3238
3239 if (tcp_is_reno(tp)) {
3240 tcp_remove_reno_sacks(sk, pkts_acked, ece_ack);
3241
3242
3243
3244
3245
3246
3247
3248 if (flag & FLAG_RETRANS_DATA_ACKED)
3249 flag &= ~FLAG_ORIG_SACK_ACKED;
3250 } else {
3251 int delta;
3252
3253
3254 if (before(reord, prior_fack))
3255 tcp_check_sack_reordering(sk, reord, 0);
3256
3257 delta = prior_sacked - tp->sacked_out;
3258 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3259 }
3260 } else if (skb && rtt_update && sack_rtt_us >= 0 &&
3261 sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp,
3262 tcp_skb_timestamp_us(skb))) {
3263
3264
3265
3266
3267 flag |= FLAG_SET_XMIT_TIMER;
3268 }
3269
3270 if (icsk->icsk_ca_ops->pkts_acked) {
3271 struct ack_sample sample = { .pkts_acked = pkts_acked,
3272 .rtt_us = sack->rate->rtt_us,
3273 .in_flight = last_in_flight };
3274
3275 icsk->icsk_ca_ops->pkts_acked(sk, &sample);
3276 }
3277
3278#if FASTRETRANS_DEBUG > 0
3279 WARN_ON((int)tp->sacked_out < 0);
3280 WARN_ON((int)tp->lost_out < 0);
3281 WARN_ON((int)tp->retrans_out < 0);
3282 if (!tp->packets_out && tcp_is_sack(tp)) {
3283 icsk = inet_csk(sk);
3284 if (tp->lost_out) {
3285 pr_debug("Leak l=%u %d\n",
3286 tp->lost_out, icsk->icsk_ca_state);
3287 tp->lost_out = 0;
3288 }
3289 if (tp->sacked_out) {
3290 pr_debug("Leak s=%u %d\n",
3291 tp->sacked_out, icsk->icsk_ca_state);
3292 tp->sacked_out = 0;
3293 }
3294 if (tp->retrans_out) {
3295 pr_debug("Leak r=%u %d\n",
3296 tp->retrans_out, icsk->icsk_ca_state);
3297 tp->retrans_out = 0;
3298 }
3299 }
3300#endif
3301 return flag;
3302}
3303
3304static void tcp_ack_probe(struct sock *sk)
3305{
3306 struct inet_connection_sock *icsk = inet_csk(sk);
3307 struct sk_buff *head = tcp_send_head(sk);
3308 const struct tcp_sock *tp = tcp_sk(sk);
3309
3310
3311 if (!head)
3312 return;
3313 if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
3314 icsk->icsk_backoff = 0;
3315 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
3316
3317
3318
3319 } else {
3320 unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX);
3321
3322 tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3323 when, TCP_RTO_MAX);
3324 }
3325}
3326
3327static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
3328{
3329 return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3330 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3331}
3332
3333
3334static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3335{
3336
3337
3338
3339
3340
3341
3342 if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
3343 return flag & FLAG_FORWARD_PROGRESS;
3344
3345 return flag & FLAG_DATA_ACKED;
3346}
3347
3348
3349
3350
3351
3352
3353static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
3354 int flag, const struct rate_sample *rs)
3355{
3356 const struct inet_connection_sock *icsk = inet_csk(sk);
3357
3358 if (icsk->icsk_ca_ops->cong_control) {
3359 icsk->icsk_ca_ops->cong_control(sk, rs);
3360 return;
3361 }
3362
3363 if (tcp_in_cwnd_reduction(sk)) {
3364
3365 tcp_cwnd_reduction(sk, acked_sacked, flag);
3366 } else if (tcp_may_raise_cwnd(sk, flag)) {
3367
3368 tcp_cong_avoid(sk, ack, acked_sacked);
3369 }
3370 tcp_update_pacing_rate(sk);
3371}
3372
3373
3374
3375
3376static inline bool tcp_may_update_window(const struct tcp_sock *tp,
3377 const u32 ack, const u32 ack_seq,
3378 const u32 nwin)
3379{
3380 return after(ack, tp->snd_una) ||
3381 after(ack_seq, tp->snd_wl1) ||
3382 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
3383}
3384
3385
3386static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
3387{
3388 u32 delta = ack - tp->snd_una;
3389
3390 sock_owned_by_me((struct sock *)tp);
3391 tp->bytes_acked += delta;
3392 tp->snd_una = ack;
3393}
3394
3395
3396static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
3397{
3398 u32 delta = seq - tp->rcv_nxt;
3399
3400 sock_owned_by_me((struct sock *)tp);
3401 tp->bytes_received += delta;
3402 WRITE_ONCE(tp->rcv_nxt, seq);
3403}
3404
3405
3406
3407
3408
3409
3410static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
3411 u32 ack_seq)
3412{
3413 struct tcp_sock *tp = tcp_sk(sk);
3414 int flag = 0;
3415 u32 nwin = ntohs(tcp_hdr(skb)->window);
3416
3417 if (likely(!tcp_hdr(skb)->syn))
3418 nwin <<= tp->rx_opt.snd_wscale;
3419
3420 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
3421 flag |= FLAG_WIN_UPDATE;
3422 tcp_update_wl(tp, ack_seq);
3423
3424 if (tp->snd_wnd != nwin) {
3425 tp->snd_wnd = nwin;
3426
3427
3428
3429
3430 tp->pred_flags = 0;
3431 tcp_fast_path_check(sk);
3432
3433 if (!tcp_write_queue_empty(sk))
3434 tcp_slow_start_after_idle_check(sk);
3435
3436 if (nwin > tp->max_window) {
3437 tp->max_window = nwin;
3438 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
3439 }
3440 }
3441 }
3442
3443 tcp_snd_una_update(tp, ack);
3444
3445 return flag;
3446}
3447
3448static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
3449 u32 *last_oow_ack_time)
3450{
3451 if (*last_oow_ack_time) {
3452 s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
3453
3454 if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
3455 NET_INC_STATS(net, mib_idx);
3456 return true;
3457 }
3458 }
3459
3460 *last_oow_ack_time = tcp_jiffies32;
3461
3462 return false;
3463}
3464
3465
3466
3467
3468
3469
3470
3471
3472bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
3473 int mib_idx, u32 *last_oow_ack_time)
3474{
3475
3476 if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
3477 !tcp_hdr(skb)->syn)
3478 return false;
3479
3480 return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time);
3481}
3482
3483
3484static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
3485{
3486
3487 static u32 challenge_timestamp;
3488 static unsigned int challenge_count;
3489 struct tcp_sock *tp = tcp_sk(sk);
3490 struct net *net = sock_net(sk);
3491 u32 count, now;
3492
3493
3494 if (__tcp_oow_rate_limited(net,
3495 LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
3496 &tp->last_oow_ack_time))
3497 return;
3498
3499
3500 now = jiffies / HZ;
3501 if (now != challenge_timestamp) {
3502 u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
3503 u32 half = (ack_limit + 1) >> 1;
3504
3505 challenge_timestamp = now;
3506 WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit));
3507 }
3508 count = READ_ONCE(challenge_count);
3509 if (count > 0) {
3510 WRITE_ONCE(challenge_count, count - 1);
3511 NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK);
3512 tcp_send_ack(sk);
3513 }
3514}
3515
3516static void tcp_store_ts_recent(struct tcp_sock *tp)
3517{
3518 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
3519 tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
3520}
3521
3522static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3523{
3524 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
3525
3526
3527
3528
3529
3530
3531
3532 if (tcp_paws_check(&tp->rx_opt, 0))
3533 tcp_store_ts_recent(tp);
3534 }
3535}
3536
3537
3538
3539
3540static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
3541{
3542 struct tcp_sock *tp = tcp_sk(sk);
3543
3544 if (before(ack, tp->tlp_high_seq))
3545 return;
3546
3547 if (!tp->tlp_retrans) {
3548
3549 tp->tlp_high_seq = 0;
3550 } else if (flag & FLAG_DSACKING_ACK) {
3551
3552 tp->tlp_high_seq = 0;
3553 } else if (after(ack, tp->tlp_high_seq)) {
3554
3555
3556
3557 tcp_init_cwnd_reduction(sk);
3558 tcp_set_ca_state(sk, TCP_CA_CWR);
3559 tcp_end_cwnd_reduction(sk);
3560 tcp_try_keep_open(sk);
3561 NET_INC_STATS(sock_net(sk),
3562 LINUX_MIB_TCPLOSSPROBERECOVERY);
3563 } else if (!(flag & (FLAG_SND_UNA_ADVANCED |
3564 FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
3565
3566 tp->tlp_high_seq = 0;
3567 }
3568}
3569
3570static inline void tcp_in_ack_event(struct sock *sk, u32 flags)
3571{
3572 const struct inet_connection_sock *icsk = inet_csk(sk);
3573
3574 if (icsk->icsk_ca_ops->in_ack_event)
3575 icsk->icsk_ca_ops->in_ack_event(sk, flags);
3576}
3577
3578
3579
3580
3581
3582static void tcp_xmit_recovery(struct sock *sk, int rexmit)
3583{
3584 struct tcp_sock *tp = tcp_sk(sk);
3585
3586 if (rexmit == REXMIT_NONE || sk->sk_state == TCP_SYN_SENT)
3587 return;
3588
3589 if (unlikely(rexmit == REXMIT_NEW)) {
3590 __tcp_push_pending_frames(sk, tcp_current_mss(sk),
3591 TCP_NAGLE_OFF);
3592 if (after(tp->snd_nxt, tp->high_seq))
3593 return;
3594 tp->frto = 0;
3595 }
3596 tcp_xmit_retransmit_queue(sk);
3597}
3598
3599
3600static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag)
3601{
3602 const struct net *net = sock_net(sk);
3603 struct tcp_sock *tp = tcp_sk(sk);
3604 u32 delivered;
3605
3606 delivered = tp->delivered - prior_delivered;
3607 NET_ADD_STATS(net, LINUX_MIB_TCPDELIVERED, delivered);
3608 if (flag & FLAG_ECE)
3609 NET_ADD_STATS(net, LINUX_MIB_TCPDELIVEREDCE, delivered);
3610
3611 return delivered;
3612}
3613
3614
3615static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3616{
3617 struct inet_connection_sock *icsk = inet_csk(sk);
3618 struct tcp_sock *tp = tcp_sk(sk);
3619 struct tcp_sacktag_state sack_state;
3620 struct rate_sample rs = { .prior_delivered = 0 };
3621 u32 prior_snd_una = tp->snd_una;
3622 bool is_sack_reneg = tp->is_sack_reneg;
3623 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3624 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3625 int num_dupack = 0;
3626 int prior_packets = tp->packets_out;
3627 u32 delivered = tp->delivered;
3628 u32 lost = tp->lost;
3629 int rexmit = REXMIT_NONE;
3630 u32 prior_fack;
3631
3632 sack_state.first_sackt = 0;
3633 sack_state.rate = &rs;
3634 sack_state.sack_delivered = 0;
3635
3636
3637 prefetch(sk->tcp_rtx_queue.rb_node);
3638
3639
3640
3641
3642 if (before(ack, prior_snd_una)) {
3643
3644 if (before(ack, prior_snd_una - tp->max_window)) {
3645 if (!(flag & FLAG_NO_CHALLENGE_ACK))
3646 tcp_send_challenge_ack(sk, skb);
3647 return -1;
3648 }
3649 goto old_ack;
3650 }
3651
3652
3653
3654
3655 if (after(ack, tp->snd_nxt))
3656 return -1;
3657
3658 if (after(ack, prior_snd_una)) {
3659 flag |= FLAG_SND_UNA_ADVANCED;
3660 icsk->icsk_retransmits = 0;
3661
3662#if IS_ENABLED(CONFIG_TLS_DEVICE)
3663 if (static_branch_unlikely(&clean_acked_data_enabled.key))
3664 if (icsk->icsk_clean_acked)
3665 icsk->icsk_clean_acked(sk, ack);
3666#endif
3667 }
3668
3669 prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
3670 rs.prior_in_flight = tcp_packets_in_flight(tp);
3671
3672
3673
3674
3675 if (flag & FLAG_UPDATE_TS_RECENT)
3676 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
3677
3678 if ((flag & (FLAG_SLOWPATH | FLAG_SND_UNA_ADVANCED)) ==
3679 FLAG_SND_UNA_ADVANCED) {
3680
3681
3682
3683
3684 tcp_update_wl(tp, ack_seq);
3685 tcp_snd_una_update(tp, ack);
3686 flag |= FLAG_WIN_UPDATE;
3687
3688 tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
3689
3690 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
3691 } else {
3692 u32 ack_ev_flags = CA_ACK_SLOWPATH;
3693
3694 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3695 flag |= FLAG_DATA;
3696 else
3697 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
3698
3699 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3700
3701 if (TCP_SKB_CB(skb)->sacked)
3702 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3703 &sack_state);
3704
3705 if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
3706 flag |= FLAG_ECE;
3707 ack_ev_flags |= CA_ACK_ECE;
3708 }
3709
3710 if (sack_state.sack_delivered)
3711 tcp_count_delivered(tp, sack_state.sack_delivered,
3712 flag & FLAG_ECE);
3713
3714 if (flag & FLAG_WIN_UPDATE)
3715 ack_ev_flags |= CA_ACK_WIN_UPDATE;
3716
3717 tcp_in_ack_event(sk, ack_ev_flags);
3718 }
3719
3720
3721
3722
3723
3724
3725
3726
3727 tcp_ecn_accept_cwr(sk, skb);
3728
3729
3730
3731
3732 sk->sk_err_soft = 0;
3733 icsk->icsk_probes_out = 0;
3734 tp->rcv_tstamp = tcp_jiffies32;
3735 if (!prior_packets)
3736 goto no_queue;
3737
3738
3739 flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state,
3740 flag & FLAG_ECE);
3741
3742 tcp_rack_update_reo_wnd(sk, &rs);
3743
3744 if (tp->tlp_high_seq)
3745 tcp_process_tlp_ack(sk, ack, flag);
3746
3747 if (flag & FLAG_SET_XMIT_TIMER)
3748 tcp_set_xmit_timer(sk);
3749
3750 if (tcp_ack_is_dubious(sk, flag)) {
3751 if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) {
3752 num_dupack = 1;
3753
3754 if (!(flag & FLAG_DATA))
3755 num_dupack = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
3756 }
3757 tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
3758 &rexmit);
3759 }
3760
3761 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
3762 sk_dst_confirm(sk);
3763
3764 delivered = tcp_newly_delivered(sk, delivered, flag);
3765 lost = tp->lost - lost;
3766 rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
3767 tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
3768 tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
3769 tcp_xmit_recovery(sk, rexmit);
3770 return 1;
3771
3772no_queue:
3773
3774 if (flag & FLAG_DSACKING_ACK) {
3775 tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
3776 &rexmit);
3777 tcp_newly_delivered(sk, delivered, flag);
3778 }
3779
3780
3781
3782
3783 tcp_ack_probe(sk);
3784
3785 if (tp->tlp_high_seq)
3786 tcp_process_tlp_ack(sk, ack, flag);
3787 return 1;
3788
3789old_ack:
3790
3791
3792
3793 if (TCP_SKB_CB(skb)->sacked) {
3794 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3795 &sack_state);
3796 tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
3797 &rexmit);
3798 tcp_newly_delivered(sk, delivered, flag);
3799 tcp_xmit_recovery(sk, rexmit);
3800 }
3801
3802 return 0;
3803}
3804
3805static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
3806 bool syn, struct tcp_fastopen_cookie *foc,
3807 bool exp_opt)
3808{
3809
3810 if (!foc || !syn || len < 0 || (len & 1))
3811 return;
3812
3813 if (len >= TCP_FASTOPEN_COOKIE_MIN &&
3814 len <= TCP_FASTOPEN_COOKIE_MAX)
3815 memcpy(foc->val, cookie, len);
3816 else if (len != 0)
3817 len = -1;
3818 foc->len = len;
3819 foc->exp = exp_opt;
3820}
3821
3822static void smc_parse_options(const struct tcphdr *th,
3823 struct tcp_options_received *opt_rx,
3824 const unsigned char *ptr,
3825 int opsize)
3826{
3827#if IS_ENABLED(CONFIG_SMC)
3828 if (static_branch_unlikely(&tcp_have_smc)) {
3829 if (th->syn && !(opsize & 1) &&
3830 opsize >= TCPOLEN_EXP_SMC_BASE &&
3831 get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC)
3832 opt_rx->smc_ok = 1;
3833 }
3834#endif
3835}
3836
3837
3838
3839
3840static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
3841{
3842 const unsigned char *ptr = (const unsigned char *)(th + 1);
3843 int length = (th->doff * 4) - sizeof(struct tcphdr);
3844 u16 mss = 0;
3845
3846 while (length > 0) {
3847 int opcode = *ptr++;
3848 int opsize;
3849
3850 switch (opcode) {
3851 case TCPOPT_EOL:
3852 return mss;
3853 case TCPOPT_NOP:
3854 length--;
3855 continue;
3856 default:
3857 if (length < 2)
3858 return mss;
3859 opsize = *ptr++;
3860 if (opsize < 2)
3861 return mss;
3862 if (opsize > length)
3863 return mss;
3864 if (opcode == TCPOPT_MSS && opsize == TCPOLEN_MSS) {
3865 u16 in_mss = get_unaligned_be16(ptr);
3866
3867 if (in_mss) {
3868 if (user_mss && user_mss < in_mss)
3869 in_mss = user_mss;
3870 mss = in_mss;
3871 }
3872 }
3873 ptr += opsize - 2;
3874 length -= opsize;
3875 }
3876 }
3877 return mss;
3878}
3879
3880
3881
3882
3883
3884void tcp_parse_options(const struct net *net,
3885 const struct sk_buff *skb,
3886 struct tcp_options_received *opt_rx, int estab,
3887 struct tcp_fastopen_cookie *foc)
3888{
3889 const unsigned char *ptr;
3890 const struct tcphdr *th = tcp_hdr(skb);
3891 int length = (th->doff * 4) - sizeof(struct tcphdr);
3892
3893 ptr = (const unsigned char *)(th + 1);
3894 opt_rx->saw_tstamp = 0;
3895
3896 while (length > 0) {
3897 int opcode = *ptr++;
3898 int opsize;
3899
3900 switch (opcode) {
3901 case TCPOPT_EOL:
3902 return;
3903 case TCPOPT_NOP:
3904 length--;
3905 continue;
3906 default:
3907 if (length < 2)
3908 return;
3909 opsize = *ptr++;
3910 if (opsize < 2)
3911 return;
3912 if (opsize > length)
3913 return;
3914 switch (opcode) {
3915 case TCPOPT_MSS:
3916 if (opsize == TCPOLEN_MSS && th->syn && !estab) {
3917 u16 in_mss = get_unaligned_be16(ptr);
3918 if (in_mss) {
3919 if (opt_rx->user_mss &&
3920 opt_rx->user_mss < in_mss)
3921 in_mss = opt_rx->user_mss;
3922 opt_rx->mss_clamp = in_mss;
3923 }
3924 }
3925 break;
3926 case TCPOPT_WINDOW:
3927 if (opsize == TCPOLEN_WINDOW && th->syn &&
3928 !estab && net->ipv4.sysctl_tcp_window_scaling) {
3929 __u8 snd_wscale = *(__u8 *)ptr;
3930 opt_rx->wscale_ok = 1;
3931 if (snd_wscale > TCP_MAX_WSCALE) {
3932 net_info_ratelimited("%s: Illegal window scaling value %d > %u received\n",
3933 __func__,
3934 snd_wscale,
3935 TCP_MAX_WSCALE);
3936 snd_wscale = TCP_MAX_WSCALE;
3937 }
3938 opt_rx->snd_wscale = snd_wscale;
3939 }
3940 break;
3941 case TCPOPT_TIMESTAMP:
3942 if ((opsize == TCPOLEN_TIMESTAMP) &&
3943 ((estab && opt_rx->tstamp_ok) ||
3944 (!estab && net->ipv4.sysctl_tcp_timestamps))) {
3945 opt_rx->saw_tstamp = 1;
3946 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
3947 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
3948 }
3949 break;
3950 case TCPOPT_SACK_PERM:
3951 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3952 !estab && net->ipv4.sysctl_tcp_sack) {
3953 opt_rx->sack_ok = TCP_SACK_SEEN;
3954 tcp_sack_reset(opt_rx);
3955 }
3956 break;
3957
3958 case TCPOPT_SACK:
3959 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
3960 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
3961 opt_rx->sack_ok) {
3962 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
3963 }
3964 break;
3965#ifdef CONFIG_TCP_MD5SIG
3966 case TCPOPT_MD5SIG:
3967
3968
3969
3970
3971 break;
3972#endif
3973 case TCPOPT_FASTOPEN:
3974 tcp_parse_fastopen_option(
3975 opsize - TCPOLEN_FASTOPEN_BASE,
3976 ptr, th->syn, foc, false);
3977 break;
3978
3979 case TCPOPT_EXP:
3980
3981
3982
3983 if (opsize >= TCPOLEN_EXP_FASTOPEN_BASE &&
3984 get_unaligned_be16(ptr) ==
3985 TCPOPT_FASTOPEN_MAGIC)
3986 tcp_parse_fastopen_option(opsize -
3987 TCPOLEN_EXP_FASTOPEN_BASE,
3988 ptr + 2, th->syn, foc, true);
3989 else
3990 smc_parse_options(th, opt_rx, ptr,
3991 opsize);
3992 break;
3993
3994 }
3995 ptr += opsize-2;
3996 length -= opsize;
3997 }
3998 }
3999}
4000EXPORT_SYMBOL(tcp_parse_options);
4001
4002static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
4003{
4004 const __be32 *ptr = (const __be32 *)(th + 1);
4005
4006 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
4007 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
4008 tp->rx_opt.saw_tstamp = 1;
4009 ++ptr;
4010 tp->rx_opt.rcv_tsval = ntohl(*ptr);
4011 ++ptr;
4012 if (*ptr)
4013 tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
4014 else
4015 tp->rx_opt.rcv_tsecr = 0;
4016 return true;
4017 }
4018 return false;
4019}
4020
4021
4022
4023
4024static bool tcp_fast_parse_options(const struct net *net,
4025 const struct sk_buff *skb,
4026 const struct tcphdr *th, struct tcp_sock *tp)
4027{
4028
4029
4030
4031 if (th->doff == (sizeof(*th) / 4)) {
4032 tp->rx_opt.saw_tstamp = 0;
4033 return false;
4034 } else if (tp->rx_opt.tstamp_ok &&
4035 th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
4036 if (tcp_parse_aligned_timestamp(tp, th))
4037 return true;
4038 }
4039
4040 tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL);
4041 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
4042 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
4043
4044 return true;
4045}
4046
4047#ifdef CONFIG_TCP_MD5SIG
4048
4049
4050
4051const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
4052{
4053 int length = (th->doff << 2) - sizeof(*th);
4054 const u8 *ptr = (const u8 *)(th + 1);
4055
4056
4057 while (length >= TCPOLEN_MD5SIG) {
4058 int opcode = *ptr++;
4059 int opsize;
4060
4061 switch (opcode) {
4062 case TCPOPT_EOL:
4063 return NULL;
4064 case TCPOPT_NOP:
4065 length--;
4066 continue;
4067 default:
4068 opsize = *ptr++;
4069 if (opsize < 2 || opsize > length)
4070 return NULL;
4071 if (opcode == TCPOPT_MD5SIG)
4072 return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
4073 }
4074 ptr += opsize - 2;
4075 length -= opsize;
4076 }
4077 return NULL;
4078}
4079EXPORT_SYMBOL(tcp_parse_md5sig_option);
4080#endif
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
4106{
4107 const struct tcp_sock *tp = tcp_sk(sk);
4108 const struct tcphdr *th = tcp_hdr(skb);
4109 u32 seq = TCP_SKB_CB(skb)->seq;
4110 u32 ack = TCP_SKB_CB(skb)->ack_seq;
4111
4112 return (
4113 (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
4114
4115
4116 ack == tp->snd_una &&
4117
4118
4119 !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
4120
4121
4122 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
4123}
4124
4125static inline bool tcp_paws_discard(const struct sock *sk,
4126 const struct sk_buff *skb)
4127{
4128 const struct tcp_sock *tp = tcp_sk(sk);
4129
4130 return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
4131 !tcp_disordered_ack(sk, skb);
4132}
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
4148{
4149 return !before(end_seq, tp->rcv_wup) &&
4150 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
4151}
4152
4153
4154void tcp_reset(struct sock *sk)
4155{
4156 trace_tcp_receive_reset(sk);
4157
4158
4159 switch (sk->sk_state) {
4160 case TCP_SYN_SENT:
4161 sk->sk_err = ECONNREFUSED;
4162 break;
4163 case TCP_CLOSE_WAIT:
4164 sk->sk_err = EPIPE;
4165 break;
4166 case TCP_CLOSE:
4167 return;
4168 default:
4169 sk->sk_err = ECONNRESET;
4170 }
4171
4172 smp_wmb();
4173
4174 tcp_write_queue_purge(sk);
4175 tcp_done(sk);
4176
4177 if (!sock_flag(sk, SOCK_DEAD))
4178 sk->sk_error_report(sk);
4179}
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195void tcp_fin(struct sock *sk)
4196{
4197 struct tcp_sock *tp = tcp_sk(sk);
4198
4199 inet_csk_schedule_ack(sk);
4200
4201 sk->sk_shutdown |= RCV_SHUTDOWN;
4202 sock_set_flag(sk, SOCK_DONE);
4203
4204 switch (sk->sk_state) {
4205 case TCP_SYN_RECV:
4206 case TCP_ESTABLISHED:
4207
4208 tcp_set_state(sk, TCP_CLOSE_WAIT);
4209 inet_csk_enter_pingpong_mode(sk);
4210 break;
4211
4212 case TCP_CLOSE_WAIT:
4213 case TCP_CLOSING:
4214
4215
4216
4217 break;
4218 case TCP_LAST_ACK:
4219
4220 break;
4221
4222 case TCP_FIN_WAIT1:
4223
4224
4225
4226
4227 tcp_send_ack(sk);
4228 tcp_set_state(sk, TCP_CLOSING);
4229 break;
4230 case TCP_FIN_WAIT2:
4231
4232 tcp_send_ack(sk);
4233 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
4234 break;
4235 default:
4236
4237
4238
4239 pr_err("%s: Impossible, sk->sk_state=%d\n",
4240 __func__, sk->sk_state);
4241 break;
4242 }
4243
4244
4245
4246
4247 skb_rbtree_purge(&tp->out_of_order_queue);
4248 if (tcp_is_sack(tp))
4249 tcp_sack_reset(&tp->rx_opt);
4250 sk_mem_reclaim(sk);
4251
4252 if (!sock_flag(sk, SOCK_DEAD)) {
4253 sk->sk_state_change(sk);
4254
4255
4256 if (sk->sk_shutdown == SHUTDOWN_MASK ||
4257 sk->sk_state == TCP_CLOSE)
4258 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
4259 else
4260 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
4261 }
4262}
4263
4264static inline bool tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
4265 u32 end_seq)
4266{
4267 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
4268 if (before(seq, sp->start_seq))
4269 sp->start_seq = seq;
4270 if (after(end_seq, sp->end_seq))
4271 sp->end_seq = end_seq;
4272 return true;
4273 }
4274 return false;
4275}
4276
4277static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4278{
4279 struct tcp_sock *tp = tcp_sk(sk);
4280
4281 if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
4282 int mib_idx;
4283
4284 if (before(seq, tp->rcv_nxt))
4285 mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
4286 else
4287 mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
4288
4289 NET_INC_STATS(sock_net(sk), mib_idx);
4290
4291 tp->rx_opt.dsack = 1;
4292 tp->duplicate_sack[0].start_seq = seq;
4293 tp->duplicate_sack[0].end_seq = end_seq;
4294 }
4295}
4296
4297static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
4298{
4299 struct tcp_sock *tp = tcp_sk(sk);
4300
4301 if (!tp->rx_opt.dsack)
4302 tcp_dsack_set(sk, seq, end_seq);
4303 else
4304 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
4305}
4306
4307static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
4308{
4309
4310
4311
4312
4313
4314 if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq) {
4315 sk_rethink_txhash(sk);
4316 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH);
4317 }
4318}
4319
4320static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
4321{
4322 struct tcp_sock *tp = tcp_sk(sk);
4323
4324 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
4325 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4326 NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4327 tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
4328
4329 if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
4330 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4331
4332 tcp_rcv_spurious_retrans(sk, skb);
4333 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
4334 end_seq = tp->rcv_nxt;
4335 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
4336 }
4337 }
4338
4339 tcp_send_ack(sk);
4340}
4341
4342
4343
4344
4345static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
4346{
4347 int this_sack;
4348 struct tcp_sack_block *sp = &tp->selective_acks[0];
4349 struct tcp_sack_block *swalk = sp + 1;
4350
4351
4352
4353
4354 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
4355 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
4356 int i;
4357
4358
4359
4360
4361 tp->rx_opt.num_sacks--;
4362 for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
4363 sp[i] = sp[i + 1];
4364 continue;
4365 }
4366 this_sack++, swalk++;
4367 }
4368}
4369
4370static void tcp_sack_compress_send_ack(struct sock *sk)
4371{
4372 struct tcp_sock *tp = tcp_sk(sk);
4373
4374 if (!tp->compressed_ack)
4375 return;
4376
4377 if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
4378 __sock_put(sk);
4379
4380
4381
4382
4383
4384 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
4385 tp->compressed_ack - 1);
4386
4387 tp->compressed_ack = 0;
4388 tcp_send_ack(sk);
4389}
4390
4391
4392
4393
4394
4395#define TCP_SACK_BLOCKS_EXPECTED 2
4396
4397static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
4398{
4399 struct tcp_sock *tp = tcp_sk(sk);
4400 struct tcp_sack_block *sp = &tp->selective_acks[0];
4401 int cur_sacks = tp->rx_opt.num_sacks;
4402 int this_sack;
4403
4404 if (!cur_sacks)
4405 goto new_sack;
4406
4407 for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
4408 if (tcp_sack_extend(sp, seq, end_seq)) {
4409 if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
4410 tcp_sack_compress_send_ack(sk);
4411
4412 for (; this_sack > 0; this_sack--, sp--)
4413 swap(*sp, *(sp - 1));
4414 if (cur_sacks > 1)
4415 tcp_sack_maybe_coalesce(tp);
4416 return;
4417 }
4418 }
4419
4420 if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
4421 tcp_sack_compress_send_ack(sk);
4422
4423
4424
4425
4426
4427
4428
4429 if (this_sack >= TCP_NUM_SACKS) {
4430 this_sack--;
4431 tp->rx_opt.num_sacks--;
4432 sp--;
4433 }
4434 for (; this_sack > 0; this_sack--, sp--)
4435 *sp = *(sp - 1);
4436
4437new_sack:
4438
4439 sp->start_seq = seq;
4440 sp->end_seq = end_seq;
4441 tp->rx_opt.num_sacks++;
4442}
4443
4444
4445
4446static void tcp_sack_remove(struct tcp_sock *tp)
4447{
4448 struct tcp_sack_block *sp = &tp->selective_acks[0];
4449 int num_sacks = tp->rx_opt.num_sacks;
4450 int this_sack;
4451
4452
4453 if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
4454 tp->rx_opt.num_sacks = 0;
4455 return;
4456 }
4457
4458 for (this_sack = 0; this_sack < num_sacks;) {
4459
4460 if (!before(tp->rcv_nxt, sp->start_seq)) {
4461 int i;
4462
4463
4464 WARN_ON(before(tp->rcv_nxt, sp->end_seq));
4465
4466
4467 for (i = this_sack+1; i < num_sacks; i++)
4468 tp->selective_acks[i-1] = tp->selective_acks[i];
4469 num_sacks--;
4470 continue;
4471 }
4472 this_sack++;
4473 sp++;
4474 }
4475 tp->rx_opt.num_sacks = num_sacks;
4476}
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491static bool tcp_try_coalesce(struct sock *sk,
4492 struct sk_buff *to,
4493 struct sk_buff *from,
4494 bool *fragstolen)
4495{
4496 int delta;
4497
4498 *fragstolen = false;
4499
4500
4501 if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
4502 return false;
4503
4504 if (!mptcp_skb_can_collapse(to, from))
4505 return false;
4506
4507#ifdef CONFIG_TLS_DEVICE
4508 if (from->decrypted != to->decrypted)
4509 return false;
4510#endif
4511
4512 if (!skb_try_coalesce(to, from, fragstolen, &delta))
4513 return false;
4514
4515 atomic_add(delta, &sk->sk_rmem_alloc);
4516 sk_mem_charge(sk, delta);
4517 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
4518 TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
4519 TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
4520 TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
4521
4522 if (TCP_SKB_CB(from)->has_rxtstamp) {
4523 TCP_SKB_CB(to)->has_rxtstamp = true;
4524 to->tstamp = from->tstamp;
4525 skb_hwtstamps(to)->hwtstamp = skb_hwtstamps(from)->hwtstamp;
4526 }
4527
4528 return true;
4529}
4530
4531static bool tcp_ooo_try_coalesce(struct sock *sk,
4532 struct sk_buff *to,
4533 struct sk_buff *from,
4534 bool *fragstolen)
4535{
4536 bool res = tcp_try_coalesce(sk, to, from, fragstolen);
4537
4538
4539 if (res) {
4540 u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) +
4541 max_t(u16, 1, skb_shinfo(from)->gso_segs);
4542
4543 skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
4544 }
4545 return res;
4546}
4547
4548static void tcp_drop(struct sock *sk, struct sk_buff *skb)
4549{
4550 sk_drops_add(sk, skb);
4551 __kfree_skb(skb);
4552}
4553
4554
4555
4556
4557static void tcp_ofo_queue(struct sock *sk)
4558{
4559 struct tcp_sock *tp = tcp_sk(sk);
4560 __u32 dsack_high = tp->rcv_nxt;
4561 bool fin, fragstolen, eaten;
4562 struct sk_buff *skb, *tail;
4563 struct rb_node *p;
4564
4565 p = rb_first(&tp->out_of_order_queue);
4566 while (p) {
4567 skb = rb_to_skb(p);
4568 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
4569 break;
4570
4571 if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
4572 __u32 dsack = dsack_high;
4573 if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
4574 dsack_high = TCP_SKB_CB(skb)->end_seq;
4575 tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
4576 }
4577 p = rb_next(p);
4578 rb_erase(&skb->rbnode, &tp->out_of_order_queue);
4579
4580 if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
4581 tcp_drop(sk, skb);
4582 continue;
4583 }
4584
4585 tail = skb_peek_tail(&sk->sk_receive_queue);
4586 eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
4587 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
4588 fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
4589 if (!eaten)
4590 __skb_queue_tail(&sk->sk_receive_queue, skb);
4591 else
4592 kfree_skb_partial(skb, fragstolen);
4593
4594 if (unlikely(fin)) {
4595 tcp_fin(sk);
4596
4597
4598
4599 break;
4600 }
4601 }
4602}
4603
4604static bool tcp_prune_ofo_queue(struct sock *sk);
4605static int tcp_prune_queue(struct sock *sk);
4606
4607static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
4608 unsigned int size)
4609{
4610 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
4611 !sk_rmem_schedule(sk, skb, size)) {
4612
4613 if (tcp_prune_queue(sk) < 0)
4614 return -1;
4615
4616 while (!sk_rmem_schedule(sk, skb, size)) {
4617 if (!tcp_prune_ofo_queue(sk))
4618 return -1;
4619 }
4620 }
4621 return 0;
4622}
4623
4624static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4625{
4626 struct tcp_sock *tp = tcp_sk(sk);
4627 struct rb_node **p, *parent;
4628 struct sk_buff *skb1;
4629 u32 seq, end_seq;
4630 bool fragstolen;
4631
4632 tcp_ecn_check_ce(sk, skb);
4633
4634 if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
4635 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
4636 sk->sk_data_ready(sk);
4637 tcp_drop(sk, skb);
4638 return;
4639 }
4640
4641
4642 tp->pred_flags = 0;
4643 inet_csk_schedule_ack(sk);
4644
4645 tp->rcv_ooopack += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
4646 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
4647 seq = TCP_SKB_CB(skb)->seq;
4648 end_seq = TCP_SKB_CB(skb)->end_seq;
4649
4650 p = &tp->out_of_order_queue.rb_node;
4651 if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
4652
4653 if (tcp_is_sack(tp)) {
4654 tp->rx_opt.num_sacks = 1;
4655 tp->selective_acks[0].start_seq = seq;
4656 tp->selective_acks[0].end_seq = end_seq;
4657 }
4658 rb_link_node(&skb->rbnode, NULL, p);
4659 rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
4660 tp->ooo_last_skb = skb;
4661 goto end;
4662 }
4663
4664
4665
4666
4667 if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb,
4668 skb, &fragstolen)) {
4669coalesce_done:
4670
4671
4672
4673 if (tcp_is_sack(tp))
4674 tcp_grow_window(sk, skb);
4675 kfree_skb_partial(skb, fragstolen);
4676 skb = NULL;
4677 goto add_sack;
4678 }
4679
4680 if (!before(seq, TCP_SKB_CB(tp->ooo_last_skb)->end_seq)) {
4681 parent = &tp->ooo_last_skb->rbnode;
4682 p = &parent->rb_right;
4683 goto insert;
4684 }
4685
4686
4687 parent = NULL;
4688 while (*p) {
4689 parent = *p;
4690 skb1 = rb_to_skb(parent);
4691 if (before(seq, TCP_SKB_CB(skb1)->seq)) {
4692 p = &parent->rb_left;
4693 continue;
4694 }
4695 if (before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4696 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4697
4698 NET_INC_STATS(sock_net(sk),
4699 LINUX_MIB_TCPOFOMERGE);
4700 tcp_drop(sk, skb);
4701 skb = NULL;
4702 tcp_dsack_set(sk, seq, end_seq);
4703 goto add_sack;
4704 }
4705 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4706
4707 tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq);
4708 } else {
4709
4710
4711
4712 rb_replace_node(&skb1->rbnode, &skb->rbnode,
4713 &tp->out_of_order_queue);
4714 tcp_dsack_extend(sk,
4715 TCP_SKB_CB(skb1)->seq,
4716 TCP_SKB_CB(skb1)->end_seq);
4717 NET_INC_STATS(sock_net(sk),
4718 LINUX_MIB_TCPOFOMERGE);
4719 tcp_drop(sk, skb1);
4720 goto merge_right;
4721 }
4722 } else if (tcp_ooo_try_coalesce(sk, skb1,
4723 skb, &fragstolen)) {
4724 goto coalesce_done;
4725 }
4726 p = &parent->rb_right;
4727 }
4728insert:
4729
4730 rb_link_node(&skb->rbnode, parent, p);
4731 rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
4732
4733merge_right:
4734
4735 while ((skb1 = skb_rb_next(skb)) != NULL) {
4736 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4737 break;
4738 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4739 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4740 end_seq);
4741 break;
4742 }
4743 rb_erase(&skb1->rbnode, &tp->out_of_order_queue);
4744 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4745 TCP_SKB_CB(skb1)->end_seq);
4746 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
4747 tcp_drop(sk, skb1);
4748 }
4749
4750 if (!skb1)
4751 tp->ooo_last_skb = skb;
4752
4753add_sack:
4754 if (tcp_is_sack(tp))
4755 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4756end:
4757 if (skb) {
4758
4759
4760
4761 if (tcp_is_sack(tp))
4762 tcp_grow_window(sk, skb);
4763 skb_condense(skb);
4764 skb_set_owner_r(skb, sk);
4765 }
4766}
4767
4768static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
4769 bool *fragstolen)
4770{
4771 int eaten;
4772 struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
4773
4774 eaten = (tail &&
4775 tcp_try_coalesce(sk, tail,
4776 skb, fragstolen)) ? 1 : 0;
4777 tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
4778 if (!eaten) {
4779 __skb_queue_tail(&sk->sk_receive_queue, skb);
4780 skb_set_owner_r(skb, sk);
4781 }
4782 return eaten;
4783}
4784
4785int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
4786{
4787 struct sk_buff *skb;
4788 int err = -ENOMEM;
4789 int data_len = 0;
4790 bool fragstolen;
4791
4792 if (size == 0)
4793 return 0;
4794
4795 if (size > PAGE_SIZE) {
4796 int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS);
4797
4798 data_len = npages << PAGE_SHIFT;
4799 size = data_len + (size & ~PAGE_MASK);
4800 }
4801 skb = alloc_skb_with_frags(size - data_len, data_len,
4802 PAGE_ALLOC_COSTLY_ORDER,
4803 &err, sk->sk_allocation);
4804 if (!skb)
4805 goto err;
4806
4807 skb_put(skb, size - data_len);
4808 skb->data_len = data_len;
4809 skb->len = size;
4810
4811 if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
4812 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
4813 goto err_free;
4814 }
4815
4816 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
4817 if (err)
4818 goto err_free;
4819
4820 TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
4821 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
4822 TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
4823
4824 if (tcp_queue_rcv(sk, skb, &fragstolen)) {
4825 WARN_ON_ONCE(fragstolen);
4826 __kfree_skb(skb);
4827 }
4828 return size;
4829
4830err_free:
4831 kfree_skb(skb);
4832err:
4833 return err;
4834
4835}
4836
4837void tcp_data_ready(struct sock *sk)
4838{
4839 const struct tcp_sock *tp = tcp_sk(sk);
4840 int avail = tp->rcv_nxt - tp->copied_seq;
4841
4842 if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) &&
4843 !sock_flag(sk, SOCK_DONE))
4844 return;
4845
4846 sk->sk_data_ready(sk);
4847}
4848
4849static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4850{
4851 struct tcp_sock *tp = tcp_sk(sk);
4852 bool fragstolen;
4853 int eaten;
4854
4855 if (sk_is_mptcp(sk))
4856 mptcp_incoming_options(sk, skb, &tp->rx_opt);
4857
4858 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
4859 __kfree_skb(skb);
4860 return;
4861 }
4862 skb_dst_drop(skb);
4863 __skb_pull(skb, tcp_hdr(skb)->doff * 4);
4864
4865 tp->rx_opt.dsack = 0;
4866
4867
4868
4869
4870
4871 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
4872 if (tcp_receive_window(tp) == 0) {
4873 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
4874 goto out_of_window;
4875 }
4876
4877
4878queue_and_out:
4879 if (skb_queue_len(&sk->sk_receive_queue) == 0)
4880 sk_forced_mem_schedule(sk, skb->truesize);
4881 else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
4882 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
4883 sk->sk_data_ready(sk);
4884 goto drop;
4885 }
4886
4887 eaten = tcp_queue_rcv(sk, skb, &fragstolen);
4888 if (skb->len)
4889 tcp_event_data_recv(sk, skb);
4890 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
4891 tcp_fin(sk);
4892
4893 if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
4894 tcp_ofo_queue(sk);
4895
4896
4897
4898
4899 if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
4900 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
4901 }
4902
4903 if (tp->rx_opt.num_sacks)
4904 tcp_sack_remove(tp);
4905
4906 tcp_fast_path_check(sk);
4907
4908 if (eaten > 0)
4909 kfree_skb_partial(skb, fragstolen);
4910 if (!sock_flag(sk, SOCK_DEAD))
4911 tcp_data_ready(sk);
4912 return;
4913 }
4914
4915 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4916 tcp_rcv_spurious_retrans(sk, skb);
4917
4918 NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4919 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4920
4921out_of_window:
4922 tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
4923 inet_csk_schedule_ack(sk);
4924drop:
4925 tcp_drop(sk, skb);
4926 return;
4927 }
4928
4929
4930 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
4931 goto out_of_window;
4932
4933 if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4934
4935 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
4936
4937
4938
4939
4940 if (!tcp_receive_window(tp)) {
4941 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
4942 goto out_of_window;
4943 }
4944 goto queue_and_out;
4945 }
4946
4947 tcp_data_queue_ofo(sk, skb);
4948}
4949
4950static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *list)
4951{
4952 if (list)
4953 return !skb_queue_is_last(list, skb) ? skb->next : NULL;
4954
4955 return skb_rb_next(skb);
4956}
4957
4958static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
4959 struct sk_buff_head *list,
4960 struct rb_root *root)
4961{
4962 struct sk_buff *next = tcp_skb_next(skb, list);
4963
4964 if (list)
4965 __skb_unlink(skb, list);
4966 else
4967 rb_erase(&skb->rbnode, root);
4968
4969 __kfree_skb(skb);
4970 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
4971
4972 return next;
4973}
4974
4975
4976void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
4977{
4978 struct rb_node **p = &root->rb_node;
4979 struct rb_node *parent = NULL;
4980 struct sk_buff *skb1;
4981
4982 while (*p) {
4983 parent = *p;
4984 skb1 = rb_to_skb(parent);
4985 if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
4986 p = &parent->rb_left;
4987 else
4988 p = &parent->rb_right;
4989 }
4990 rb_link_node(&skb->rbnode, parent, p);
4991 rb_insert_color(&skb->rbnode, root);
4992}
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002static void
5003tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
5004 struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end)
5005{
5006 struct sk_buff *skb = head, *n;
5007 struct sk_buff_head tmp;
5008 bool end_of_skbs;
5009
5010
5011
5012
5013restart:
5014 for (end_of_skbs = true; skb != NULL && skb != tail; skb = n) {
5015 n = tcp_skb_next(skb, list);
5016
5017
5018 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
5019 skb = tcp_collapse_one(sk, skb, list, root);
5020 if (!skb)
5021 break;
5022 goto restart;
5023 }
5024
5025
5026
5027
5028
5029
5030 if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
5031 (tcp_win_from_space(sk, skb->truesize) > skb->len ||
5032 before(TCP_SKB_CB(skb)->seq, start))) {
5033 end_of_skbs = false;
5034 break;
5035 }
5036
5037 if (n && n != tail && mptcp_skb_can_collapse(skb, n) &&
5038 TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) {
5039 end_of_skbs = false;
5040 break;
5041 }
5042
5043
5044 start = TCP_SKB_CB(skb)->end_seq;
5045 }
5046 if (end_of_skbs ||
5047 (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
5048 return;
5049
5050 __skb_queue_head_init(&tmp);
5051
5052 while (before(start, end)) {
5053 int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start);
5054 struct sk_buff *nskb;
5055
5056 nskb = alloc_skb(copy, GFP_ATOMIC);
5057 if (!nskb)
5058 break;
5059
5060 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
5061#ifdef CONFIG_TLS_DEVICE
5062 nskb->decrypted = skb->decrypted;
5063#endif
5064 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
5065 if (list)
5066 __skb_queue_before(list, skb, nskb);
5067 else
5068 __skb_queue_tail(&tmp, nskb);
5069 skb_set_owner_r(nskb, sk);
5070 mptcp_skb_ext_move(nskb, skb);
5071
5072
5073 while (copy > 0) {
5074 int offset = start - TCP_SKB_CB(skb)->seq;
5075 int size = TCP_SKB_CB(skb)->end_seq - start;
5076
5077 BUG_ON(offset < 0);
5078 if (size > 0) {
5079 size = min(copy, size);
5080 if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
5081 BUG();
5082 TCP_SKB_CB(nskb)->end_seq += size;
5083 copy -= size;
5084 start += size;
5085 }
5086 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
5087 skb = tcp_collapse_one(sk, skb, list, root);
5088 if (!skb ||
5089 skb == tail ||
5090 !mptcp_skb_can_collapse(nskb, skb) ||
5091 (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
5092 goto end;
5093#ifdef CONFIG_TLS_DEVICE
5094 if (skb->decrypted != nskb->decrypted)
5095 goto end;
5096#endif
5097 }
5098 }
5099 }
5100end:
5101 skb_queue_walk_safe(&tmp, skb, n)
5102 tcp_rbtree_insert(root, skb);
5103}
5104
5105
5106
5107
5108static void tcp_collapse_ofo_queue(struct sock *sk)
5109{
5110 struct tcp_sock *tp = tcp_sk(sk);
5111 u32 range_truesize, sum_tiny = 0;
5112 struct sk_buff *skb, *head;
5113 u32 start, end;
5114
5115 skb = skb_rb_first(&tp->out_of_order_queue);
5116new_range:
5117 if (!skb) {
5118 tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue);
5119 return;
5120 }
5121 start = TCP_SKB_CB(skb)->seq;
5122 end = TCP_SKB_CB(skb)->end_seq;
5123 range_truesize = skb->truesize;
5124
5125 for (head = skb;;) {
5126 skb = skb_rb_next(skb);
5127
5128
5129
5130
5131 if (!skb ||
5132 after(TCP_SKB_CB(skb)->seq, end) ||
5133 before(TCP_SKB_CB(skb)->end_seq, start)) {
5134
5135 if (range_truesize != head->truesize ||
5136 end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) {
5137 tcp_collapse(sk, NULL, &tp->out_of_order_queue,
5138 head, skb, start, end);
5139 } else {
5140 sum_tiny += range_truesize;
5141 if (sum_tiny > sk->sk_rcvbuf >> 3)
5142 return;
5143 }
5144 goto new_range;
5145 }
5146
5147 range_truesize += skb->truesize;
5148 if (unlikely(before(TCP_SKB_CB(skb)->seq, start)))
5149 start = TCP_SKB_CB(skb)->seq;
5150 if (after(TCP_SKB_CB(skb)->end_seq, end))
5151 end = TCP_SKB_CB(skb)->end_seq;
5152 }
5153}
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166static bool tcp_prune_ofo_queue(struct sock *sk)
5167{
5168 struct tcp_sock *tp = tcp_sk(sk);
5169 struct rb_node *node, *prev;
5170 int goal;
5171
5172 if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
5173 return false;
5174
5175 NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
5176 goal = sk->sk_rcvbuf >> 3;
5177 node = &tp->ooo_last_skb->rbnode;
5178 do {
5179 prev = rb_prev(node);
5180 rb_erase(node, &tp->out_of_order_queue);
5181 goal -= rb_to_skb(node)->truesize;
5182 tcp_drop(sk, rb_to_skb(node));
5183 if (!prev || goal <= 0) {
5184 sk_mem_reclaim(sk);
5185 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
5186 !tcp_under_memory_pressure(sk))
5187 break;
5188 goal = sk->sk_rcvbuf >> 3;
5189 }
5190 node = prev;
5191 } while (node);
5192 tp->ooo_last_skb = rb_to_skb(prev);
5193
5194
5195
5196
5197
5198
5199 if (tp->rx_opt.sack_ok)
5200 tcp_sack_reset(&tp->rx_opt);
5201 return true;
5202}
5203
5204
5205
5206
5207
5208
5209
5210
5211static int tcp_prune_queue(struct sock *sk)
5212{
5213 struct tcp_sock *tp = tcp_sk(sk);
5214
5215 NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
5216
5217 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
5218 tcp_clamp_window(sk);
5219 else if (tcp_under_memory_pressure(sk))
5220 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
5221
5222 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
5223 return 0;
5224
5225 tcp_collapse_ofo_queue(sk);
5226 if (!skb_queue_empty(&sk->sk_receive_queue))
5227 tcp_collapse(sk, &sk->sk_receive_queue, NULL,
5228 skb_peek(&sk->sk_receive_queue),
5229 NULL,
5230 tp->copied_seq, tp->rcv_nxt);
5231 sk_mem_reclaim(sk);
5232
5233 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
5234 return 0;
5235
5236
5237
5238
5239 tcp_prune_ofo_queue(sk);
5240
5241 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
5242 return 0;
5243
5244
5245
5246
5247
5248 NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED);
5249
5250
5251 tp->pred_flags = 0;
5252 return -1;
5253}
5254
5255static bool tcp_should_expand_sndbuf(const struct sock *sk)
5256{
5257 const struct tcp_sock *tp = tcp_sk(sk);
5258
5259
5260
5261
5262 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
5263 return false;
5264
5265
5266 if (tcp_under_memory_pressure(sk))
5267 return false;
5268
5269
5270 if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
5271 return false;
5272
5273
5274 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
5275 return false;
5276
5277 return true;
5278}
5279
5280
5281
5282
5283
5284
5285
5286static void tcp_new_space(struct sock *sk)
5287{
5288 struct tcp_sock *tp = tcp_sk(sk);
5289
5290 if (tcp_should_expand_sndbuf(sk)) {
5291 tcp_sndbuf_expand(sk);
5292 tp->snd_cwnd_stamp = tcp_jiffies32;
5293 }
5294
5295 sk->sk_write_space(sk);
5296}
5297
5298static void tcp_check_space(struct sock *sk)
5299{
5300 if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
5301 sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
5302
5303 smp_mb();
5304 if (sk->sk_socket &&
5305 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
5306 tcp_new_space(sk);
5307 if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
5308 tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
5309 }
5310 }
5311}
5312
5313static inline void tcp_data_snd_check(struct sock *sk)
5314{
5315 tcp_push_pending_frames(sk);
5316 tcp_check_space(sk);
5317}
5318
5319
5320
5321
5322static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
5323{
5324 struct tcp_sock *tp = tcp_sk(sk);
5325 unsigned long rtt, delay;
5326
5327
5328 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
5329
5330
5331
5332
5333
5334 (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
5335 __tcp_select_window(sk) >= tp->rcv_wnd)) ||
5336
5337 tcp_in_quickack_mode(sk) ||
5338
5339 inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOW) {
5340send_now:
5341 tcp_send_ack(sk);
5342 return;
5343 }
5344
5345 if (!ofo_possible || RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
5346 tcp_send_delayed_ack(sk);
5347 return;
5348 }
5349
5350 if (!tcp_is_sack(tp) ||
5351 tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
5352 goto send_now;
5353
5354 if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
5355 tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
5356 tp->dup_ack_counter = 0;
5357 }
5358 if (tp->dup_ack_counter < TCP_FASTRETRANS_THRESH) {
5359 tp->dup_ack_counter++;
5360 goto send_now;
5361 }
5362 tp->compressed_ack++;
5363 if (hrtimer_is_queued(&tp->compressed_ack_timer))
5364 return;
5365
5366
5367
5368 rtt = tp->rcv_rtt_est.rtt_us;
5369 if (tp->srtt_us && tp->srtt_us < rtt)
5370 rtt = tp->srtt_us;
5371
5372 delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
5373 rtt * (NSEC_PER_USEC >> 3)/20);
5374 sock_hold(sk);
5375 hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
5376 sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns,
5377 HRTIMER_MODE_REL_PINNED_SOFT);
5378}
5379
5380static inline void tcp_ack_snd_check(struct sock *sk)
5381{
5382 if (!inet_csk_ack_scheduled(sk)) {
5383
5384 return;
5385 }
5386 __tcp_ack_snd_check(sk, 1);
5387}
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
5400{
5401 struct tcp_sock *tp = tcp_sk(sk);
5402 u32 ptr = ntohs(th->urg_ptr);
5403
5404 if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
5405 ptr--;
5406 ptr += ntohl(th->seq);
5407
5408
5409 if (after(tp->copied_seq, ptr))
5410 return;
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422 if (before(ptr, tp->rcv_nxt))
5423 return;
5424
5425
5426 if (tp->urg_data && !after(ptr, tp->urg_seq))
5427 return;
5428
5429
5430 sk_send_sigurg(sk);
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
5448 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
5449 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
5450 tp->copied_seq++;
5451 if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
5452 __skb_unlink(skb, &sk->sk_receive_queue);
5453 __kfree_skb(skb);
5454 }
5455 }
5456
5457 tp->urg_data = TCP_URG_NOTYET;
5458 WRITE_ONCE(tp->urg_seq, ptr);
5459
5460
5461 tp->pred_flags = 0;
5462}
5463
5464
5465static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
5466{
5467 struct tcp_sock *tp = tcp_sk(sk);
5468
5469
5470 if (th->urg)
5471 tcp_check_urg(sk, th);
5472
5473
5474 if (tp->urg_data == TCP_URG_NOTYET) {
5475 u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
5476 th->syn;
5477
5478
5479 if (ptr < skb->len) {
5480 u8 tmp;
5481 if (skb_copy_bits(skb, ptr, &tmp, 1))
5482 BUG();
5483 tp->urg_data = TCP_URG_VALID | tmp;
5484 if (!sock_flag(sk, SOCK_DEAD))
5485 sk->sk_data_ready(sk);
5486 }
5487 }
5488}
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498static bool tcp_reset_check(const struct sock *sk, const struct sk_buff *skb)
5499{
5500 struct tcp_sock *tp = tcp_sk(sk);
5501
5502 return unlikely(TCP_SKB_CB(skb)->seq == (tp->rcv_nxt - 1) &&
5503 (1 << sk->sk_state) & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK |
5504 TCPF_CLOSING));
5505}
5506
5507
5508
5509
5510static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5511 const struct tcphdr *th, int syn_inerr)
5512{
5513 struct tcp_sock *tp = tcp_sk(sk);
5514 bool rst_seq_match = false;
5515
5516
5517 if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) &&
5518 tp->rx_opt.saw_tstamp &&
5519 tcp_paws_discard(sk, skb)) {
5520 if (!th->rst) {
5521 NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
5522 if (!tcp_oow_rate_limited(sock_net(sk), skb,
5523 LINUX_MIB_TCPACKSKIPPEDPAWS,
5524 &tp->last_oow_ack_time))
5525 tcp_send_dupack(sk, skb);
5526 goto discard;
5527 }
5528
5529 }
5530
5531
5532 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
5533
5534
5535
5536
5537
5538
5539 if (!th->rst) {
5540 if (th->syn)
5541 goto syn_challenge;
5542 if (!tcp_oow_rate_limited(sock_net(sk), skb,
5543 LINUX_MIB_TCPACKSKIPPEDSEQ,
5544 &tp->last_oow_ack_time))
5545 tcp_send_dupack(sk, skb);
5546 } else if (tcp_reset_check(sk, skb)) {
5547 tcp_reset(sk);
5548 }
5549 goto discard;
5550 }
5551
5552
5553 if (th->rst) {
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt ||
5564 tcp_reset_check(sk, skb)) {
5565 rst_seq_match = true;
5566 } else if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
5567 struct tcp_sack_block *sp = &tp->selective_acks[0];
5568 int max_sack = sp[0].end_seq;
5569 int this_sack;
5570
5571 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;
5572 ++this_sack) {
5573 max_sack = after(sp[this_sack].end_seq,
5574 max_sack) ?
5575 sp[this_sack].end_seq : max_sack;
5576 }
5577
5578 if (TCP_SKB_CB(skb)->seq == max_sack)
5579 rst_seq_match = true;
5580 }
5581
5582 if (rst_seq_match)
5583 tcp_reset(sk);
5584 else {
5585
5586
5587
5588
5589 if (tp->syn_fastopen && !tp->data_segs_in &&
5590 sk->sk_state == TCP_ESTABLISHED)
5591 tcp_fastopen_active_disable(sk);
5592 tcp_send_challenge_ack(sk, skb);
5593 }
5594 goto discard;
5595 }
5596
5597
5598
5599
5600
5601
5602 if (th->syn) {
5603syn_challenge:
5604 if (syn_inerr)
5605 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
5606 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
5607 tcp_send_challenge_ack(sk, skb);
5608 goto discard;
5609 }
5610
5611 return true;
5612
5613discard:
5614 tcp_drop(sk, skb);
5615 return false;
5616}
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
5642{
5643 const struct tcphdr *th = (const struct tcphdr *)skb->data;
5644 struct tcp_sock *tp = tcp_sk(sk);
5645 unsigned int len = skb->len;
5646
5647
5648 trace_tcp_probe(sk, skb);
5649
5650 tcp_mstamp_refresh(tp);
5651 if (unlikely(!sk->sk_rx_dst))
5652 inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668 tp->rx_opt.saw_tstamp = 0;
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
5680 TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
5681 !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
5682 int tcp_header_len = tp->tcp_header_len;
5683
5684
5685
5686
5687
5688
5689
5690 if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
5691
5692 if (!tcp_parse_aligned_timestamp(tp, th))
5693 goto slow_path;
5694
5695
5696 if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
5697 goto slow_path;
5698
5699
5700
5701
5702
5703
5704 }
5705
5706 if (len <= tcp_header_len) {
5707
5708 if (len == tcp_header_len) {
5709
5710
5711
5712
5713 if (tcp_header_len ==
5714 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5715 tp->rcv_nxt == tp->rcv_wup)
5716 tcp_store_ts_recent(tp);
5717
5718
5719
5720
5721 tcp_ack(sk, skb, 0);
5722 __kfree_skb(skb);
5723 tcp_data_snd_check(sk);
5724
5725
5726
5727
5728 tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
5729 return;
5730 } else {
5731 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
5732 goto discard;
5733 }
5734 } else {
5735 int eaten = 0;
5736 bool fragstolen = false;
5737
5738 if (tcp_checksum_complete(skb))
5739 goto csum_error;
5740
5741 if ((int)skb->truesize > sk->sk_forward_alloc)
5742 goto step5;
5743
5744
5745
5746
5747
5748 if (tcp_header_len ==
5749 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5750 tp->rcv_nxt == tp->rcv_wup)
5751 tcp_store_ts_recent(tp);
5752
5753 tcp_rcv_rtt_measure_ts(sk, skb);
5754
5755 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
5756
5757
5758 __skb_pull(skb, tcp_header_len);
5759 eaten = tcp_queue_rcv(sk, skb, &fragstolen);
5760
5761 tcp_event_data_recv(sk, skb);
5762
5763 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
5764
5765 tcp_ack(sk, skb, FLAG_DATA);
5766 tcp_data_snd_check(sk);
5767 if (!inet_csk_ack_scheduled(sk))
5768 goto no_ack;
5769 }
5770
5771 __tcp_ack_snd_check(sk, 0);
5772no_ack:
5773 if (eaten)
5774 kfree_skb_partial(skb, fragstolen);
5775 tcp_data_ready(sk);
5776 return;
5777 }
5778 }
5779
5780slow_path:
5781 if (len < (th->doff << 2) || tcp_checksum_complete(skb))
5782 goto csum_error;
5783
5784 if (!th->ack && !th->rst && !th->syn)
5785 goto discard;
5786
5787
5788
5789
5790
5791 if (!tcp_validate_incoming(sk, skb, th, 1))
5792 return;
5793
5794step5:
5795 if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
5796 goto discard;
5797
5798 tcp_rcv_rtt_measure_ts(sk, skb);
5799
5800
5801 tcp_urg(sk, skb, th);
5802
5803
5804 tcp_data_queue(sk, skb);
5805
5806 tcp_data_snd_check(sk);
5807 tcp_ack_snd_check(sk);
5808 return;
5809
5810csum_error:
5811 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
5812 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
5813
5814discard:
5815 tcp_drop(sk, skb);
5816}
5817EXPORT_SYMBOL(tcp_rcv_established);
5818
5819void tcp_init_transfer(struct sock *sk, int bpf_op)
5820{
5821 struct inet_connection_sock *icsk = inet_csk(sk);
5822 struct tcp_sock *tp = tcp_sk(sk);
5823
5824 tcp_mtup_init(sk);
5825 icsk->icsk_af_ops->rebuild_header(sk);
5826 tcp_init_metrics(sk);
5827
5828
5829
5830
5831
5832
5833
5834 if (tp->total_retrans > 1 && tp->undo_marker)
5835 tp->snd_cwnd = 1;
5836 else
5837 tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
5838 tp->snd_cwnd_stamp = tcp_jiffies32;
5839
5840 tcp_call_bpf(sk, bpf_op, 0, NULL);
5841 tcp_init_congestion_control(sk);
5842 tcp_init_buffer_space(sk);
5843}
5844
5845void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
5846{
5847 struct tcp_sock *tp = tcp_sk(sk);
5848 struct inet_connection_sock *icsk = inet_csk(sk);
5849
5850 tcp_set_state(sk, TCP_ESTABLISHED);
5851 icsk->icsk_ack.lrcvtime = tcp_jiffies32;
5852
5853 if (skb) {
5854 icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
5855 security_inet_conn_established(sk, skb);
5856 sk_mark_napi_id(sk, skb);
5857 }
5858
5859 tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
5860
5861
5862
5863
5864 tp->lsndtime = tcp_jiffies32;
5865
5866 if (sock_flag(sk, SOCK_KEEPOPEN))
5867 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
5868
5869 if (!tp->rx_opt.snd_wscale)
5870 __tcp_fast_path_on(tp, tp->snd_wnd);
5871 else
5872 tp->pred_flags = 0;
5873}
5874
5875static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5876 struct tcp_fastopen_cookie *cookie)
5877{
5878 struct tcp_sock *tp = tcp_sk(sk);
5879 struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL;
5880 u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
5881 bool syn_drop = false;
5882
5883 if (mss == tp->rx_opt.user_mss) {
5884 struct tcp_options_received opt;
5885
5886
5887 tcp_clear_options(&opt);
5888 opt.user_mss = opt.mss_clamp = 0;
5889 tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL);
5890 mss = opt.mss_clamp;
5891 }
5892
5893 if (!tp->syn_fastopen) {
5894
5895 cookie->len = -1;
5896 } else if (tp->total_retrans) {
5897
5898
5899
5900
5901
5902 syn_drop = (cookie->len < 0 && data);
5903 } else if (cookie->len < 0 && !tp->syn_data) {
5904
5905
5906
5907
5908 try_exp = tp->syn_fastopen_exp ? 2 : 1;
5909 }
5910
5911 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
5912
5913 if (data) {
5914 if (tp->total_retrans)
5915 tp->fastopen_client_fail = TFO_SYN_RETRANSMITTED;
5916 else
5917 tp->fastopen_client_fail = TFO_DATA_NOT_ACKED;
5918 skb_rbtree_walk_from(data) {
5919 if (__tcp_retransmit_skb(sk, data, 1))
5920 break;
5921 }
5922 tcp_rearm_rto(sk);
5923 NET_INC_STATS(sock_net(sk),
5924 LINUX_MIB_TCPFASTOPENACTIVEFAIL);
5925 return true;
5926 }
5927 tp->syn_data_acked = tp->syn_data;
5928 if (tp->syn_data_acked) {
5929 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
5930
5931 if (tp->delivered > 1)
5932 --tp->delivered;
5933 }
5934
5935 tcp_fastopen_add_skb(sk, synack);
5936
5937 return false;
5938}
5939
5940static void smc_check_reset_syn(struct tcp_sock *tp)
5941{
5942#if IS_ENABLED(CONFIG_SMC)
5943 if (static_branch_unlikely(&tcp_have_smc)) {
5944 if (tp->syn_smc && !tp->rx_opt.smc_ok)
5945 tp->syn_smc = 0;
5946 }
5947#endif
5948}
5949
5950static void tcp_try_undo_spurious_syn(struct sock *sk)
5951{
5952 struct tcp_sock *tp = tcp_sk(sk);
5953 u32 syn_stamp;
5954
5955
5956
5957
5958
5959 syn_stamp = tp->retrans_stamp;
5960 if (tp->undo_marker && syn_stamp && tp->rx_opt.saw_tstamp &&
5961 syn_stamp == tp->rx_opt.rcv_tsecr)
5962 tp->undo_marker = 0;
5963}
5964
5965static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5966 const struct tcphdr *th)
5967{
5968 struct inet_connection_sock *icsk = inet_csk(sk);
5969 struct tcp_sock *tp = tcp_sk(sk);
5970 struct tcp_fastopen_cookie foc = { .len = -1 };
5971 int saved_clamp = tp->rx_opt.mss_clamp;
5972 bool fastopen_fail;
5973
5974 tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc);
5975 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
5976 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
5977
5978 if (th->ack) {
5979
5980
5981
5982
5983
5984
5985
5986
5987 if (!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_una) ||
5988 after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
5989
5990 if (icsk->icsk_retransmits == 0)
5991 inet_csk_reset_xmit_timer(sk,
5992 ICSK_TIME_RETRANS,
5993 TCP_TIMEOUT_MIN, TCP_RTO_MAX);
5994 goto reset_and_undo;
5995 }
5996
5997 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
5998 !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
5999 tcp_time_stamp(tp))) {
6000 NET_INC_STATS(sock_net(sk),
6001 LINUX_MIB_PAWSACTIVEREJECTED);
6002 goto reset_and_undo;
6003 }
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013 if (th->rst) {
6014 tcp_reset(sk);
6015 goto discard;
6016 }
6017
6018
6019
6020
6021
6022
6023
6024
6025 if (!th->syn)
6026 goto discard_and_undo;
6027
6028
6029
6030
6031
6032
6033
6034
6035 tcp_ecn_rcv_synack(tp, th);
6036
6037 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
6038 tcp_try_undo_spurious_syn(sk);
6039 tcp_ack(sk, skb, FLAG_SLOWPATH);
6040
6041
6042
6043
6044 WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
6045 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
6046
6047
6048
6049
6050 tp->snd_wnd = ntohs(th->window);
6051
6052 if (!tp->rx_opt.wscale_ok) {
6053 tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
6054 tp->window_clamp = min(tp->window_clamp, 65535U);
6055 }
6056
6057 if (tp->rx_opt.saw_tstamp) {
6058 tp->rx_opt.tstamp_ok = 1;
6059 tp->tcp_header_len =
6060 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
6061 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
6062 tcp_store_ts_recent(tp);
6063 } else {
6064 tp->tcp_header_len = sizeof(struct tcphdr);
6065 }
6066
6067 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
6068 tcp_initialize_rcv_mss(sk);
6069
6070
6071
6072
6073 WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
6074
6075 smc_check_reset_syn(tp);
6076
6077 smp_mb();
6078
6079 tcp_finish_connect(sk, skb);
6080
6081 fastopen_fail = (tp->syn_fastopen || tp->syn_data) &&
6082 tcp_rcv_fastopen_synack(sk, skb, &foc);
6083
6084 if (!sock_flag(sk, SOCK_DEAD)) {
6085 sk->sk_state_change(sk);
6086 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
6087 }
6088 if (fastopen_fail)
6089 return -1;
6090 if (sk->sk_write_pending ||
6091 icsk->icsk_accept_queue.rskq_defer_accept ||
6092 inet_csk_in_pingpong_mode(sk)) {
6093
6094
6095
6096
6097
6098
6099
6100 inet_csk_schedule_ack(sk);
6101 tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
6102 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
6103 TCP_DELACK_MAX, TCP_RTO_MAX);
6104
6105discard:
6106 tcp_drop(sk, skb);
6107 return 0;
6108 } else {
6109 tcp_send_ack(sk);
6110 }
6111 return -1;
6112 }
6113
6114
6115
6116 if (th->rst) {
6117
6118
6119
6120
6121
6122
6123 goto discard_and_undo;
6124 }
6125
6126
6127 if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
6128 tcp_paws_reject(&tp->rx_opt, 0))
6129 goto discard_and_undo;
6130
6131 if (th->syn) {
6132
6133
6134
6135
6136 tcp_set_state(sk, TCP_SYN_RECV);
6137
6138 if (tp->rx_opt.saw_tstamp) {
6139 tp->rx_opt.tstamp_ok = 1;
6140 tcp_store_ts_recent(tp);
6141 tp->tcp_header_len =
6142 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
6143 } else {
6144 tp->tcp_header_len = sizeof(struct tcphdr);
6145 }
6146
6147 WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
6148 WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
6149 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
6150
6151
6152
6153
6154 tp->snd_wnd = ntohs(th->window);
6155 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
6156 tp->max_window = tp->snd_wnd;
6157
6158 tcp_ecn_rcv_syn(tp, th);
6159
6160 tcp_mtup_init(sk);
6161 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
6162 tcp_initialize_rcv_mss(sk);
6163
6164 tcp_send_synack(sk);
6165#if 0
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177 return -1;
6178#else
6179 goto discard;
6180#endif
6181 }
6182
6183
6184
6185
6186discard_and_undo:
6187 tcp_clear_options(&tp->rx_opt);
6188 tp->rx_opt.mss_clamp = saved_clamp;
6189 goto discard;
6190
6191reset_and_undo:
6192 tcp_clear_options(&tp->rx_opt);
6193 tp->rx_opt.mss_clamp = saved_clamp;
6194 return 1;
6195}
6196
6197static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
6198{
6199 struct request_sock *req;
6200
6201
6202
6203
6204 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
6205 tcp_try_undo_loss(sk, false);
6206
6207
6208 tcp_sk(sk)->retrans_stamp = 0;
6209 inet_csk(sk)->icsk_retransmits = 0;
6210
6211
6212
6213
6214 req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk,
6215 lockdep_sock_is_held(sk));
6216 reqsk_fastopen_remove(sk, req, false);
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226 tcp_rearm_rto(sk);
6227}
6228
6229
6230
6231
6232
6233
6234
6235
6236int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
6237{
6238 struct tcp_sock *tp = tcp_sk(sk);
6239 struct inet_connection_sock *icsk = inet_csk(sk);
6240 const struct tcphdr *th = tcp_hdr(skb);
6241 struct request_sock *req;
6242 int queued = 0;
6243 bool acceptable;
6244
6245 switch (sk->sk_state) {
6246 case TCP_CLOSE:
6247 goto discard;
6248
6249 case TCP_LISTEN:
6250 if (th->ack)
6251 return 1;
6252
6253 if (th->rst)
6254 goto discard;
6255
6256 if (th->syn) {
6257 if (th->fin)
6258 goto discard;
6259
6260
6261
6262 rcu_read_lock();
6263 local_bh_disable();
6264 acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
6265 local_bh_enable();
6266 rcu_read_unlock();
6267
6268 if (!acceptable)
6269 return 1;
6270 consume_skb(skb);
6271 return 0;
6272 }
6273 goto discard;
6274
6275 case TCP_SYN_SENT:
6276 tp->rx_opt.saw_tstamp = 0;
6277 tcp_mstamp_refresh(tp);
6278 queued = tcp_rcv_synsent_state_process(sk, skb, th);
6279 if (queued >= 0)
6280 return queued;
6281
6282
6283 tcp_urg(sk, skb, th);
6284 __kfree_skb(skb);
6285 tcp_data_snd_check(sk);
6286 return 0;
6287 }
6288
6289 tcp_mstamp_refresh(tp);
6290 tp->rx_opt.saw_tstamp = 0;
6291 req = rcu_dereference_protected(tp->fastopen_rsk,
6292 lockdep_sock_is_held(sk));
6293 if (req) {
6294 bool req_stolen;
6295
6296 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
6297 sk->sk_state != TCP_FIN_WAIT1);
6298
6299 if (!tcp_check_req(sk, skb, req, true, &req_stolen))
6300 goto discard;
6301 }
6302
6303 if (!th->ack && !th->rst && !th->syn)
6304 goto discard;
6305
6306 if (!tcp_validate_incoming(sk, skb, th, 0))
6307 return 0;
6308
6309
6310 acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
6311 FLAG_UPDATE_TS_RECENT |
6312 FLAG_NO_CHALLENGE_ACK) > 0;
6313
6314 if (!acceptable) {
6315 if (sk->sk_state == TCP_SYN_RECV)
6316 return 1;
6317 tcp_send_challenge_ack(sk, skb);
6318 goto discard;
6319 }
6320 switch (sk->sk_state) {
6321 case TCP_SYN_RECV:
6322 tp->delivered++;
6323 if (!tp->srtt_us)
6324 tcp_synack_rtt_meas(sk, req);
6325
6326 if (req) {
6327 tcp_rcv_synrecv_state_fastopen(sk);
6328 } else {
6329 tcp_try_undo_spurious_syn(sk);
6330 tp->retrans_stamp = 0;
6331 tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
6332 WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
6333 }
6334 smp_mb();
6335 tcp_set_state(sk, TCP_ESTABLISHED);
6336 sk->sk_state_change(sk);
6337
6338
6339
6340
6341
6342 if (sk->sk_socket)
6343 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
6344
6345 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
6346 tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
6347 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
6348
6349 if (tp->rx_opt.tstamp_ok)
6350 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
6351
6352 if (!inet_csk(sk)->icsk_ca_ops->cong_control)
6353 tcp_update_pacing_rate(sk);
6354
6355
6356 tp->lsndtime = tcp_jiffies32;
6357
6358 tcp_initialize_rcv_mss(sk);
6359 tcp_fast_path_on(tp);
6360 break;
6361
6362 case TCP_FIN_WAIT1: {
6363 int tmo;
6364
6365 if (req)
6366 tcp_rcv_synrecv_state_fastopen(sk);
6367
6368 if (tp->snd_una != tp->write_seq)
6369 break;
6370
6371 tcp_set_state(sk, TCP_FIN_WAIT2);
6372 sk->sk_shutdown |= SEND_SHUTDOWN;
6373
6374 sk_dst_confirm(sk);
6375
6376 if (!sock_flag(sk, SOCK_DEAD)) {
6377
6378 sk->sk_state_change(sk);
6379 break;
6380 }
6381
6382 if (tp->linger2 < 0) {
6383 tcp_done(sk);
6384 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
6385 return 1;
6386 }
6387 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
6388 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
6389
6390 if (tp->syn_fastopen && th->fin)
6391 tcp_fastopen_active_disable(sk);
6392 tcp_done(sk);
6393 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
6394 return 1;
6395 }
6396
6397 tmo = tcp_fin_time(sk);
6398 if (tmo > TCP_TIMEWAIT_LEN) {
6399 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
6400 } else if (th->fin || sock_owned_by_user(sk)) {
6401
6402
6403
6404
6405
6406
6407 inet_csk_reset_keepalive_timer(sk, tmo);
6408 } else {
6409 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
6410 goto discard;
6411 }
6412 break;
6413 }
6414
6415 case TCP_CLOSING:
6416 if (tp->snd_una == tp->write_seq) {
6417 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
6418 goto discard;
6419 }
6420 break;
6421
6422 case TCP_LAST_ACK:
6423 if (tp->snd_una == tp->write_seq) {
6424 tcp_update_metrics(sk);
6425 tcp_done(sk);
6426 goto discard;
6427 }
6428 break;
6429 }
6430
6431
6432 tcp_urg(sk, skb, th);
6433
6434
6435 switch (sk->sk_state) {
6436 case TCP_CLOSE_WAIT:
6437 case TCP_CLOSING:
6438 case TCP_LAST_ACK:
6439 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
6440 if (sk_is_mptcp(sk))
6441 mptcp_incoming_options(sk, skb, &tp->rx_opt);
6442 break;
6443 }
6444 fallthrough;
6445 case TCP_FIN_WAIT1:
6446 case TCP_FIN_WAIT2:
6447
6448
6449
6450
6451 if (sk->sk_shutdown & RCV_SHUTDOWN) {
6452 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
6453 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
6454 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
6455 tcp_reset(sk);
6456 return 1;
6457 }
6458 }
6459 fallthrough;
6460 case TCP_ESTABLISHED:
6461 tcp_data_queue(sk, skb);
6462 queued = 1;
6463 break;
6464 }
6465
6466
6467 if (sk->sk_state != TCP_CLOSE) {
6468 tcp_data_snd_check(sk);
6469 tcp_ack_snd_check(sk);
6470 }
6471
6472 if (!queued) {
6473discard:
6474 tcp_drop(sk, skb);
6475 }
6476 return 0;
6477}
6478EXPORT_SYMBOL(tcp_rcv_state_process);
6479
6480static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
6481{
6482 struct inet_request_sock *ireq = inet_rsk(req);
6483
6484 if (family == AF_INET)
6485 net_dbg_ratelimited("drop open request from %pI4/%u\n",
6486 &ireq->ir_rmt_addr, port);
6487#if IS_ENABLED(CONFIG_IPV6)
6488 else if (family == AF_INET6)
6489 net_dbg_ratelimited("drop open request from %pI6/%u\n",
6490 &ireq->ir_v6_rmt_addr, port);
6491#endif
6492}
6493
6494
6495
6496
6497
6498
6499
6500
6501
6502
6503
6504
6505
6506
6507
6508
6509
6510
6511static void tcp_ecn_create_request(struct request_sock *req,
6512 const struct sk_buff *skb,
6513 const struct sock *listen_sk,
6514 const struct dst_entry *dst)
6515{
6516 const struct tcphdr *th = tcp_hdr(skb);
6517 const struct net *net = sock_net(listen_sk);
6518 bool th_ecn = th->ece && th->cwr;
6519 bool ect, ecn_ok;
6520 u32 ecn_ok_dst;
6521
6522 if (!th_ecn)
6523 return;
6524
6525 ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
6526 ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK);
6527 ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
6528
6529 if (((!ect || th->res1) && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
6530 (ecn_ok_dst & DST_FEATURE_ECN_CA) ||
6531 tcp_bpf_ca_needs_ecn((struct sock *)req))
6532 inet_rsk(req)->ecn_ok = 1;
6533}
6534
6535static void tcp_openreq_init(struct request_sock *req,
6536 const struct tcp_options_received *rx_opt,
6537 struct sk_buff *skb, const struct sock *sk)
6538{
6539 struct inet_request_sock *ireq = inet_rsk(req);
6540
6541 req->rsk_rcv_wnd = 0;
6542 tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
6543 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
6544 tcp_rsk(req)->snt_synack = 0;
6545 tcp_rsk(req)->last_oow_ack_time = 0;
6546 req->mss = rx_opt->mss_clamp;
6547 req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
6548 ireq->tstamp_ok = rx_opt->tstamp_ok;
6549 ireq->sack_ok = rx_opt->sack_ok;
6550 ireq->snd_wscale = rx_opt->snd_wscale;
6551 ireq->wscale_ok = rx_opt->wscale_ok;
6552 ireq->acked = 0;
6553 ireq->ecn_ok = 0;
6554 ireq->ir_rmt_port = tcp_hdr(skb)->source;
6555 ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
6556 ireq->ir_mark = inet_request_mark(sk, skb);
6557#if IS_ENABLED(CONFIG_SMC)
6558 ireq->smc_ok = rx_opt->smc_ok;
6559#endif
6560}
6561
6562struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
6563 struct sock *sk_listener,
6564 bool attach_listener)
6565{
6566 struct request_sock *req = reqsk_alloc(ops, sk_listener,
6567 attach_listener);
6568
6569 if (req) {
6570 struct inet_request_sock *ireq = inet_rsk(req);
6571
6572 ireq->ireq_opt = NULL;
6573#if IS_ENABLED(CONFIG_IPV6)
6574 ireq->pktopts = NULL;
6575#endif
6576 atomic64_set(&ireq->ir_cookie, 0);
6577 ireq->ireq_state = TCP_NEW_SYN_RECV;
6578 write_pnet(&ireq->ireq_net, sock_net(sk_listener));
6579 ireq->ireq_family = sk_listener->sk_family;
6580 }
6581
6582 return req;
6583}
6584EXPORT_SYMBOL(inet_reqsk_alloc);
6585
6586
6587
6588
6589static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
6590{
6591 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
6592 const char *msg = "Dropping request";
6593 bool want_cookie = false;
6594 struct net *net = sock_net(sk);
6595
6596#ifdef CONFIG_SYN_COOKIES
6597 if (net->ipv4.sysctl_tcp_syncookies) {
6598 msg = "Sending cookies";
6599 want_cookie = true;
6600 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
6601 } else
6602#endif
6603 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
6604
6605 if (!queue->synflood_warned &&
6606 net->ipv4.sysctl_tcp_syncookies != 2 &&
6607 xchg(&queue->synflood_warned, 1) == 0)
6608 net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
6609 proto, sk->sk_num, msg);
6610
6611 return want_cookie;
6612}
6613
6614static void tcp_reqsk_record_syn(const struct sock *sk,
6615 struct request_sock *req,
6616 const struct sk_buff *skb)
6617{
6618 if (tcp_sk(sk)->save_syn) {
6619 u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb);
6620 u32 *copy;
6621
6622 copy = kmalloc(len + sizeof(u32), GFP_ATOMIC);
6623 if (copy) {
6624 copy[0] = len;
6625 memcpy(©[1], skb_network_header(skb), len);
6626 req->saved_syn = copy;
6627 }
6628 }
6629}
6630
6631
6632
6633
6634u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
6635 const struct tcp_request_sock_ops *af_ops,
6636 struct sock *sk, struct tcphdr *th)
6637{
6638 struct tcp_sock *tp = tcp_sk(sk);
6639 u16 mss;
6640
6641 if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 &&
6642 !inet_csk_reqsk_queue_is_full(sk))
6643 return 0;
6644
6645 if (!tcp_syn_flood_action(sk, rsk_ops->slab_name))
6646 return 0;
6647
6648 if (sk_acceptq_is_full(sk)) {
6649 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
6650 return 0;
6651 }
6652
6653 mss = tcp_parse_mss_option(th, tp->rx_opt.user_mss);
6654 if (!mss)
6655 mss = af_ops->mss_clamp;
6656
6657 return mss;
6658}
6659EXPORT_SYMBOL_GPL(tcp_get_syncookie_mss);
6660
6661int tcp_conn_request(struct request_sock_ops *rsk_ops,
6662 const struct tcp_request_sock_ops *af_ops,
6663 struct sock *sk, struct sk_buff *skb)
6664{
6665 struct tcp_fastopen_cookie foc = { .len = -1 };
6666 __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
6667 struct tcp_options_received tmp_opt;
6668 struct tcp_sock *tp = tcp_sk(sk);
6669 struct net *net = sock_net(sk);
6670 struct sock *fastopen_sk = NULL;
6671 struct request_sock *req;
6672 bool want_cookie = false;
6673 struct dst_entry *dst;
6674 struct flowi fl;
6675
6676
6677
6678
6679
6680 if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
6681 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
6682 want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
6683 if (!want_cookie)
6684 goto drop;
6685 }
6686
6687 if (sk_acceptq_is_full(sk)) {
6688 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
6689 goto drop;
6690 }
6691
6692 req = inet_reqsk_alloc(rsk_ops, sk, !want_cookie);
6693 if (!req)
6694 goto drop;
6695
6696 req->syncookie = want_cookie;
6697 tcp_rsk(req)->af_specific = af_ops;
6698 tcp_rsk(req)->ts_off = 0;
6699#if IS_ENABLED(CONFIG_MPTCP)
6700 tcp_rsk(req)->is_mptcp = 0;
6701#endif
6702
6703 tcp_clear_options(&tmp_opt);
6704 tmp_opt.mss_clamp = af_ops->mss_clamp;
6705 tmp_opt.user_mss = tp->rx_opt.user_mss;
6706 tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0,
6707 want_cookie ? NULL : &foc);
6708
6709 if (want_cookie && !tmp_opt.saw_tstamp)
6710 tcp_clear_options(&tmp_opt);
6711
6712 if (IS_ENABLED(CONFIG_SMC) && want_cookie)
6713 tmp_opt.smc_ok = 0;
6714
6715 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
6716 tcp_openreq_init(req, &tmp_opt, skb, sk);
6717 inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent;
6718
6719
6720 inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
6721
6722 af_ops->init_req(req, sk, skb);
6723
6724 if (security_inet_conn_request(sk, skb, req))
6725 goto drop_and_free;
6726
6727 if (tmp_opt.tstamp_ok)
6728 tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
6729
6730 dst = af_ops->route_req(sk, &fl, req);
6731 if (!dst)
6732 goto drop_and_free;
6733
6734 if (!want_cookie && !isn) {
6735
6736 if (!net->ipv4.sysctl_tcp_syncookies &&
6737 (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
6738 (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
6739 !tcp_peer_is_proven(req, dst)) {
6740
6741
6742
6743
6744
6745
6746
6747 pr_drop_req(req, ntohs(tcp_hdr(skb)->source),
6748 rsk_ops->family);
6749 goto drop_and_release;
6750 }
6751
6752 isn = af_ops->init_seq(skb);
6753 }
6754
6755 tcp_ecn_create_request(req, skb, sk, dst);
6756
6757 if (want_cookie) {
6758 isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
6759 if (!tmp_opt.tstamp_ok)
6760 inet_rsk(req)->ecn_ok = 0;
6761 }
6762
6763 tcp_rsk(req)->snt_isn = isn;
6764 tcp_rsk(req)->txhash = net_tx_rndhash();
6765 tcp_openreq_init_rwin(req, sk, dst);
6766 sk_rx_queue_set(req_to_sk(req), skb);
6767 if (!want_cookie) {
6768 tcp_reqsk_record_syn(sk, req, skb);
6769 fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
6770 }
6771 if (fastopen_sk) {
6772 af_ops->send_synack(fastopen_sk, dst, &fl, req,
6773 &foc, TCP_SYNACK_FASTOPEN);
6774
6775 if (!inet_csk_reqsk_queue_add(sk, req, fastopen_sk)) {
6776 reqsk_fastopen_remove(fastopen_sk, req, false);
6777 bh_unlock_sock(fastopen_sk);
6778 sock_put(fastopen_sk);
6779 goto drop_and_free;
6780 }
6781 sk->sk_data_ready(sk);
6782 bh_unlock_sock(fastopen_sk);
6783 sock_put(fastopen_sk);
6784 } else {
6785 tcp_rsk(req)->tfo_listener = false;
6786 if (!want_cookie)
6787 inet_csk_reqsk_queue_hash_add(sk, req,
6788 tcp_timeout_init((struct sock *)req));
6789 af_ops->send_synack(sk, dst, &fl, req, &foc,
6790 !want_cookie ? TCP_SYNACK_NORMAL :
6791 TCP_SYNACK_COOKIE);
6792 if (want_cookie) {
6793 reqsk_free(req);
6794 return 0;
6795 }
6796 }
6797 reqsk_put(req);
6798 return 0;
6799
6800drop_and_release:
6801 dst_release(dst);
6802drop_and_free:
6803 __reqsk_free(req);
6804drop:
6805 tcp_listendrop(sk);
6806 return 0;
6807}
6808EXPORT_SYMBOL(tcp_conn_request);
6809