1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65#define pr_fmt(fmt) "TCP: " fmt
66
67#include <linux/mm.h>
68#include <linux/slab.h>
69#include <linux/module.h>
70#include <linux/sysctl.h>
71#include <linux/kernel.h>
72#include <linux/prefetch.h>
73#include <net/dst.h>
74#include <net/tcp.h>
75#include <net/inet_common.h>
76#include <linux/ipsec.h>
77#include <asm/unaligned.h>
78#include <linux/errqueue.h>
79#include <trace/events/tcp.h>
80#include <linux/jump_label_ratelimit.h>
81#include <net/busy_poll.h>
82
83int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
84
85#define FLAG_DATA 0x01
86#define FLAG_WIN_UPDATE 0x02
87#define FLAG_DATA_ACKED 0x04
88#define FLAG_RETRANS_DATA_ACKED 0x08
89#define FLAG_SYN_ACKED 0x10
90#define FLAG_DATA_SACKED 0x20
91#define FLAG_ECE 0x40
92#define FLAG_LOST_RETRANS 0x80
93#define FLAG_SLOWPATH 0x100
94#define FLAG_ORIG_SACK_ACKED 0x200
95#define FLAG_SND_UNA_ADVANCED 0x400
96#define FLAG_DSACKING_ACK 0x800
97#define FLAG_SET_XMIT_TIMER 0x1000
98#define FLAG_SACK_RENEGING 0x2000
99#define FLAG_UPDATE_TS_RECENT 0x4000
100#define FLAG_NO_CHALLENGE_ACK 0x8000
101#define FLAG_ACK_MAYBE_DELAYED 0x10000
102
103#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
104#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
105#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK)
106#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
107
108#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
109#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
110
111#define REXMIT_NONE 0
112#define REXMIT_LOST 1
113#define REXMIT_NEW 2
114
115#if IS_ENABLED(CONFIG_TLS_DEVICE)
116static DEFINE_STATIC_KEY_DEFERRED_FALSE(clean_acked_data_enabled, HZ);
117
118void clean_acked_data_enable(struct inet_connection_sock *icsk,
119 void (*cad)(struct sock *sk, u32 ack_seq))
120{
121 icsk->icsk_clean_acked = cad;
122 static_branch_deferred_inc(&clean_acked_data_enabled);
123}
124EXPORT_SYMBOL_GPL(clean_acked_data_enable);
125
126void clean_acked_data_disable(struct inet_connection_sock *icsk)
127{
128 static_branch_slow_dec_deferred(&clean_acked_data_enabled);
129 icsk->icsk_clean_acked = NULL;
130}
131EXPORT_SYMBOL_GPL(clean_acked_data_disable);
132
133void clean_acked_data_flush(void)
134{
135 static_key_deferred_flush(&clean_acked_data_enabled);
136}
137EXPORT_SYMBOL_GPL(clean_acked_data_flush);
138#endif
139
140static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
141 unsigned int len)
142{
143 static bool __once __read_mostly;
144
145 if (!__once) {
146 struct net_device *dev;
147
148 __once = true;
149
150 rcu_read_lock();
151 dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
152 if (!dev || len >= dev->mtu)
153 pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
154 dev ? dev->name : "Unknown driver");
155 rcu_read_unlock();
156 }
157}
158
159
160
161
162static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
163{
164 struct inet_connection_sock *icsk = inet_csk(sk);
165 const unsigned int lss = icsk->icsk_ack.last_seg_size;
166 unsigned int len;
167
168 icsk->icsk_ack.last_seg_size = 0;
169
170
171
172
173 len = skb_shinfo(skb)->gso_size ? : skb->len;
174 if (len >= icsk->icsk_ack.rcv_mss) {
175 icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
176 tcp_sk(sk)->advmss);
177
178 if (unlikely(len > icsk->icsk_ack.rcv_mss +
179 MAX_TCP_OPTION_SPACE))
180 tcp_gro_dev_warn(sk, skb, len);
181 } else {
182
183
184
185
186
187 len += skb->data - skb_transport_header(skb);
188 if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
189
190
191
192
193
194 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
195 !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
196
197
198
199
200 len -= tcp_sk(sk)->tcp_header_len;
201 icsk->icsk_ack.last_seg_size = len;
202 if (len == lss) {
203 icsk->icsk_ack.rcv_mss = len;
204 return;
205 }
206 }
207 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
208 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
209 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
210 }
211}
212
213static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
214{
215 struct inet_connection_sock *icsk = inet_csk(sk);
216 unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
217
218 if (quickacks == 0)
219 quickacks = 2;
220 quickacks = min(quickacks, max_quickacks);
221 if (quickacks > icsk->icsk_ack.quick)
222 icsk->icsk_ack.quick = quickacks;
223}
224
225void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
226{
227 struct inet_connection_sock *icsk = inet_csk(sk);
228
229 tcp_incr_quickack(sk, max_quickacks);
230 inet_csk_exit_pingpong_mode(sk);
231 icsk->icsk_ack.ato = TCP_ATO_MIN;
232}
233EXPORT_SYMBOL(tcp_enter_quickack_mode);
234
235
236
237
238
239static bool tcp_in_quickack_mode(struct sock *sk)
240{
241 const struct inet_connection_sock *icsk = inet_csk(sk);
242 const struct dst_entry *dst = __sk_dst_get(sk);
243
244 return (dst && dst_metric(dst, RTAX_QUICKACK)) ||
245 (icsk->icsk_ack.quick && !inet_csk_in_pingpong_mode(sk));
246}
247
248static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
249{
250 if (tp->ecn_flags & TCP_ECN_OK)
251 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
252}
253
254static void tcp_ecn_accept_cwr(struct sock *sk, const struct sk_buff *skb)
255{
256 if (tcp_hdr(skb)->cwr) {
257 tcp_sk(sk)->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
258
259
260
261
262
263 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
264 }
265}
266
267static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
268{
269 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
270}
271
272static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
273{
274 struct tcp_sock *tp = tcp_sk(sk);
275
276 switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
277 case INET_ECN_NOT_ECT:
278
279
280
281
282 if (tp->ecn_flags & TCP_ECN_SEEN)
283 tcp_enter_quickack_mode(sk, 2);
284 break;
285 case INET_ECN_CE:
286 if (tcp_ca_needs_ecn(sk))
287 tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
288
289 if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
290
291 tcp_enter_quickack_mode(sk, 2);
292 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
293 }
294 tp->ecn_flags |= TCP_ECN_SEEN;
295 break;
296 default:
297 if (tcp_ca_needs_ecn(sk))
298 tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
299 tp->ecn_flags |= TCP_ECN_SEEN;
300 break;
301 }
302}
303
304static void tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
305{
306 if (tcp_sk(sk)->ecn_flags & TCP_ECN_OK)
307 __tcp_ecn_check_ce(sk, skb);
308}
309
310static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
311{
312 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
313 tp->ecn_flags &= ~TCP_ECN_OK;
314}
315
316static void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
317{
318 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
319 tp->ecn_flags &= ~TCP_ECN_OK;
320}
321
322static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
323{
324 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
325 return true;
326 return false;
327}
328
329
330
331
332
333
334static void tcp_sndbuf_expand(struct sock *sk)
335{
336 const struct tcp_sock *tp = tcp_sk(sk);
337 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
338 int sndmem, per_mss;
339 u32 nr_segs;
340
341
342
343
344 per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
345 MAX_TCP_HEADER +
346 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
347
348 per_mss = roundup_pow_of_two(per_mss) +
349 SKB_DATA_ALIGN(sizeof(struct sk_buff));
350
351 nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
352 nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
353
354
355
356
357
358 sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2;
359 sndmem *= nr_segs * per_mss;
360
361 if (sk->sk_sndbuf < sndmem)
362 sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]);
363}
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
392{
393 struct tcp_sock *tp = tcp_sk(sk);
394
395 int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
396 int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
397
398 while (tp->rcv_ssthresh <= window) {
399 if (truesize <= skb->len)
400 return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
401
402 truesize >>= 1;
403 window >>= 1;
404 }
405 return 0;
406}
407
408static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
409{
410 struct tcp_sock *tp = tcp_sk(sk);
411 int room;
412
413 room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh;
414
415
416 if (room > 0 && !tcp_under_memory_pressure(sk)) {
417 int incr;
418
419
420
421
422 if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
423 incr = 2 * tp->advmss;
424 else
425 incr = __tcp_grow_window(sk, skb);
426
427 if (incr) {
428 incr = max_t(int, incr, 2 * skb->len);
429 tp->rcv_ssthresh += min(room, incr);
430 inet_csk(sk)->icsk_ack.quick |= 1;
431 }
432 }
433}
434
435
436
437
438void tcp_init_buffer_space(struct sock *sk)
439{
440 int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
441 struct tcp_sock *tp = tcp_sk(sk);
442 int maxwin;
443
444 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
445 tcp_sndbuf_expand(sk);
446
447 tp->rcvq_space.space = min_t(u32, tp->rcv_wnd, TCP_INIT_CWND * tp->advmss);
448 tcp_mstamp_refresh(tp);
449 tp->rcvq_space.time = tp->tcp_mstamp;
450 tp->rcvq_space.seq = tp->copied_seq;
451
452 maxwin = tcp_full_space(sk);
453
454 if (tp->window_clamp >= maxwin) {
455 tp->window_clamp = maxwin;
456
457 if (tcp_app_win && maxwin > 4 * tp->advmss)
458 tp->window_clamp = max(maxwin -
459 (maxwin >> tcp_app_win),
460 4 * tp->advmss);
461 }
462
463
464 if (tcp_app_win &&
465 tp->window_clamp > 2 * tp->advmss &&
466 tp->window_clamp + tp->advmss > maxwin)
467 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
468
469 tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
470 tp->snd_cwnd_stamp = tcp_jiffies32;
471}
472
473
474static void tcp_clamp_window(struct sock *sk)
475{
476 struct tcp_sock *tp = tcp_sk(sk);
477 struct inet_connection_sock *icsk = inet_csk(sk);
478 struct net *net = sock_net(sk);
479
480 icsk->icsk_ack.quick = 0;
481
482 if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
483 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
484 !tcp_under_memory_pressure(sk) &&
485 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
486 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
487 net->ipv4.sysctl_tcp_rmem[2]);
488 }
489 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
490 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
491}
492
493
494
495
496
497
498
499
500void tcp_initialize_rcv_mss(struct sock *sk)
501{
502 const struct tcp_sock *tp = tcp_sk(sk);
503 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
504
505 hint = min(hint, tp->rcv_wnd / 2);
506 hint = min(hint, TCP_MSS_DEFAULT);
507 hint = max(hint, TCP_MIN_MSS);
508
509 inet_csk(sk)->icsk_ack.rcv_mss = hint;
510}
511EXPORT_SYMBOL(tcp_initialize_rcv_mss);
512
513
514
515
516
517
518
519
520
521
522
523
524static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
525{
526 u32 new_sample = tp->rcv_rtt_est.rtt_us;
527 long m = sample;
528
529 if (new_sample != 0) {
530
531
532
533
534
535
536
537
538
539
540 if (!win_dep) {
541 m -= (new_sample >> 3);
542 new_sample += m;
543 } else {
544 m <<= 3;
545 if (m < new_sample)
546 new_sample = m;
547 }
548 } else {
549
550 new_sample = m << 3;
551 }
552
553 tp->rcv_rtt_est.rtt_us = new_sample;
554}
555
556static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
557{
558 u32 delta_us;
559
560 if (tp->rcv_rtt_est.time == 0)
561 goto new_measure;
562 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
563 return;
564 delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time);
565 if (!delta_us)
566 delta_us = 1;
567 tcp_rcv_rtt_update(tp, delta_us, 1);
568
569new_measure:
570 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
571 tp->rcv_rtt_est.time = tp->tcp_mstamp;
572}
573
574static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
575 const struct sk_buff *skb)
576{
577 struct tcp_sock *tp = tcp_sk(sk);
578
579 if (tp->rx_opt.rcv_tsecr == tp->rcv_rtt_last_tsecr)
580 return;
581 tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
582
583 if (TCP_SKB_CB(skb)->end_seq -
584 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) {
585 u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
586 u32 delta_us;
587
588 if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
589 if (!delta)
590 delta = 1;
591 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
592 tcp_rcv_rtt_update(tp, delta_us, 0);
593 }
594 }
595}
596
597
598
599
600
601void tcp_rcv_space_adjust(struct sock *sk)
602{
603 struct tcp_sock *tp = tcp_sk(sk);
604 u32 copied;
605 int time;
606
607 trace_tcp_rcv_space_adjust(sk);
608
609 tcp_mstamp_refresh(tp);
610 time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
611 if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
612 return;
613
614
615 copied = tp->copied_seq - tp->rcvq_space.seq;
616 if (copied <= tp->rcvq_space.space)
617 goto new_measure;
618
619
620
621
622
623
624
625
626
627
628 if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
629 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
630 int rcvmem, rcvbuf;
631 u64 rcvwin, grow;
632
633
634
635
636 rcvwin = ((u64)copied << 1) + 16 * tp->advmss;
637
638
639 grow = rcvwin * (copied - tp->rcvq_space.space);
640 do_div(grow, tp->rcvq_space.space);
641 rcvwin += (grow << 1);
642
643 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
644 while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
645 rcvmem += 128;
646
647 do_div(rcvwin, tp->advmss);
648 rcvbuf = min_t(u64, rcvwin * rcvmem,
649 sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
650 if (rcvbuf > sk->sk_rcvbuf) {
651 sk->sk_rcvbuf = rcvbuf;
652
653
654 tp->window_clamp = tcp_win_from_space(sk, rcvbuf);
655 }
656 }
657 tp->rcvq_space.space = copied;
658
659new_measure:
660 tp->rcvq_space.seq = tp->copied_seq;
661 tp->rcvq_space.time = tp->tcp_mstamp;
662}
663
664
665
666
667
668
669
670
671
672
673
674static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
675{
676 struct tcp_sock *tp = tcp_sk(sk);
677 struct inet_connection_sock *icsk = inet_csk(sk);
678 u32 now;
679
680 inet_csk_schedule_ack(sk);
681
682 tcp_measure_rcv_mss(sk, skb);
683
684 tcp_rcv_rtt_measure(tp);
685
686 now = tcp_jiffies32;
687
688 if (!icsk->icsk_ack.ato) {
689
690
691
692 tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
693 icsk->icsk_ack.ato = TCP_ATO_MIN;
694 } else {
695 int m = now - icsk->icsk_ack.lrcvtime;
696
697 if (m <= TCP_ATO_MIN / 2) {
698
699 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
700 } else if (m < icsk->icsk_ack.ato) {
701 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
702 if (icsk->icsk_ack.ato > icsk->icsk_rto)
703 icsk->icsk_ack.ato = icsk->icsk_rto;
704 } else if (m > icsk->icsk_rto) {
705
706
707
708 tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
709 sk_mem_reclaim(sk);
710 }
711 }
712 icsk->icsk_ack.lrcvtime = now;
713
714 tcp_ecn_check_ce(sk, skb);
715
716 if (skb->len >= 128)
717 tcp_grow_window(sk, skb);
718}
719
720
721
722
723
724
725
726
727
728
729static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
730{
731 struct tcp_sock *tp = tcp_sk(sk);
732 long m = mrtt_us;
733 u32 srtt = tp->srtt_us;
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751 if (srtt != 0) {
752 m -= (srtt >> 3);
753 srtt += m;
754 if (m < 0) {
755 m = -m;
756 m -= (tp->mdev_us >> 2);
757
758
759
760
761
762
763
764
765 if (m > 0)
766 m >>= 3;
767 } else {
768 m -= (tp->mdev_us >> 2);
769 }
770 tp->mdev_us += m;
771 if (tp->mdev_us > tp->mdev_max_us) {
772 tp->mdev_max_us = tp->mdev_us;
773 if (tp->mdev_max_us > tp->rttvar_us)
774 tp->rttvar_us = tp->mdev_max_us;
775 }
776 if (after(tp->snd_una, tp->rtt_seq)) {
777 if (tp->mdev_max_us < tp->rttvar_us)
778 tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
779 tp->rtt_seq = tp->snd_nxt;
780 tp->mdev_max_us = tcp_rto_min_us(sk);
781
782 tcp_bpf_rtt(sk);
783 }
784 } else {
785
786 srtt = m << 3;
787 tp->mdev_us = m << 1;
788 tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
789 tp->mdev_max_us = tp->rttvar_us;
790 tp->rtt_seq = tp->snd_nxt;
791
792 tcp_bpf_rtt(sk);
793 }
794 tp->srtt_us = max(1U, srtt);
795}
796
797static void tcp_update_pacing_rate(struct sock *sk)
798{
799 const struct tcp_sock *tp = tcp_sk(sk);
800 u64 rate;
801
802
803 rate = (u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3);
804
805
806
807
808
809
810
811
812
813 if (tp->snd_cwnd < tp->snd_ssthresh / 2)
814 rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
815 else
816 rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
817
818 rate *= max(tp->snd_cwnd, tp->packets_out);
819
820 if (likely(tp->srtt_us))
821 do_div(rate, tp->srtt_us);
822
823
824
825
826
827 WRITE_ONCE(sk->sk_pacing_rate, min_t(u64, rate,
828 sk->sk_max_pacing_rate));
829}
830
831
832
833
834static void tcp_set_rto(struct sock *sk)
835{
836 const struct tcp_sock *tp = tcp_sk(sk);
837
838
839
840
841
842
843
844
845
846
847 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
848
849
850
851
852
853
854
855
856
857
858 tcp_bound_rto(sk);
859}
860
861__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
862{
863 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
864
865 if (!cwnd)
866 cwnd = TCP_INIT_CWND;
867 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
868}
869
870
871static void tcp_dsack_seen(struct tcp_sock *tp)
872{
873 tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
874 tp->rack.dsack_seen = 1;
875 tp->dsack_dups++;
876}
877
878
879
880
881
882static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
883 const int ts)
884{
885 struct tcp_sock *tp = tcp_sk(sk);
886 const u32 mss = tp->mss_cache;
887 u32 fack, metric;
888
889 fack = tcp_highest_sack_seq(tp);
890 if (!before(low_seq, fack))
891 return;
892
893 metric = fack - low_seq;
894 if ((metric > tp->reordering * mss) && mss) {
895#if FASTRETRANS_DEBUG > 1
896 pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
897 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
898 tp->reordering,
899 0,
900 tp->sacked_out,
901 tp->undo_marker ? tp->undo_retrans : 0);
902#endif
903 tp->reordering = min_t(u32, (metric + mss - 1) / mss,
904 sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
905 }
906
907
908 tp->reord_seen++;
909 NET_INC_STATS(sock_net(sk),
910 ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
911}
912
913
914static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
915{
916 if (!tp->retransmit_skb_hint ||
917 before(TCP_SKB_CB(skb)->seq,
918 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
919 tp->retransmit_skb_hint = skb;
920}
921
922
923
924
925
926
927
928
929static void tcp_sum_lost(struct tcp_sock *tp, struct sk_buff *skb)
930{
931 __u8 sacked = TCP_SKB_CB(skb)->sacked;
932
933 if (!(sacked & TCPCB_LOST) ||
934 ((sacked & TCPCB_LOST) && (sacked & TCPCB_SACKED_RETRANS)))
935 tp->lost += tcp_skb_pcount(skb);
936}
937
938static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
939{
940 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
941 tcp_verify_retransmit_hint(tp, skb);
942
943 tp->lost_out += tcp_skb_pcount(skb);
944 tcp_sum_lost(tp, skb);
945 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
946 }
947}
948
949void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb)
950{
951 tcp_verify_retransmit_hint(tp, skb);
952
953 tcp_sum_lost(tp, skb);
954 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
955 tp->lost_out += tcp_skb_pcount(skb);
956 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
957 }
958}
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
1054 u32 start_seq, u32 end_seq)
1055{
1056
1057 if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
1058 return false;
1059
1060
1061 if (!before(start_seq, tp->snd_nxt))
1062 return false;
1063
1064
1065
1066
1067 if (after(start_seq, tp->snd_una))
1068 return true;
1069
1070 if (!is_dsack || !tp->undo_marker)
1071 return false;
1072
1073
1074 if (after(end_seq, tp->snd_una))
1075 return false;
1076
1077 if (!before(start_seq, tp->undo_marker))
1078 return true;
1079
1080
1081 if (!after(end_seq, tp->undo_marker))
1082 return false;
1083
1084
1085
1086
1087 return !before(start_seq, end_seq - tp->max_window);
1088}
1089
1090static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1091 struct tcp_sack_block_wire *sp, int num_sacks,
1092 u32 prior_snd_una)
1093{
1094 struct tcp_sock *tp = tcp_sk(sk);
1095 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
1096 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
1097 bool dup_sack = false;
1098
1099 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1100 dup_sack = true;
1101 tcp_dsack_seen(tp);
1102 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
1103 } else if (num_sacks > 1) {
1104 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
1105 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
1106
1107 if (!after(end_seq_0, end_seq_1) &&
1108 !before(start_seq_0, start_seq_1)) {
1109 dup_sack = true;
1110 tcp_dsack_seen(tp);
1111 NET_INC_STATS(sock_net(sk),
1112 LINUX_MIB_TCPDSACKOFORECV);
1113 }
1114 }
1115
1116
1117 if (dup_sack && tp->undo_marker && tp->undo_retrans > 0 &&
1118 !after(end_seq_0, prior_snd_una) &&
1119 after(end_seq_0, tp->undo_marker))
1120 tp->undo_retrans--;
1121
1122 return dup_sack;
1123}
1124
1125struct tcp_sacktag_state {
1126 u32 reord;
1127
1128
1129
1130
1131 u64 first_sackt;
1132 u64 last_sackt;
1133 struct rate_sample *rate;
1134 int flag;
1135 unsigned int mss_now;
1136};
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1147 u32 start_seq, u32 end_seq)
1148{
1149 int err;
1150 bool in_sack;
1151 unsigned int pkt_len;
1152 unsigned int mss;
1153
1154 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1155 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1156
1157 if (tcp_skb_pcount(skb) > 1 && !in_sack &&
1158 after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
1159 mss = tcp_skb_mss(skb);
1160 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1161
1162 if (!in_sack) {
1163 pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
1164 if (pkt_len < mss)
1165 pkt_len = mss;
1166 } else {
1167 pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
1168 if (pkt_len < mss)
1169 return -EINVAL;
1170 }
1171
1172
1173
1174
1175 if (pkt_len > mss) {
1176 unsigned int new_len = (pkt_len / mss) * mss;
1177 if (!in_sack && new_len < pkt_len)
1178 new_len += mss;
1179 pkt_len = new_len;
1180 }
1181
1182 if (pkt_len >= skb->len && !in_sack)
1183 return 0;
1184
1185 err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
1186 pkt_len, mss, GFP_ATOMIC);
1187 if (err < 0)
1188 return err;
1189 }
1190
1191 return in_sack;
1192}
1193
1194
1195static u8 tcp_sacktag_one(struct sock *sk,
1196 struct tcp_sacktag_state *state, u8 sacked,
1197 u32 start_seq, u32 end_seq,
1198 int dup_sack, int pcount,
1199 u64 xmit_time)
1200{
1201 struct tcp_sock *tp = tcp_sk(sk);
1202
1203
1204 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1205 if (tp->undo_marker && tp->undo_retrans > 0 &&
1206 after(end_seq, tp->undo_marker))
1207 tp->undo_retrans--;
1208 if ((sacked & TCPCB_SACKED_ACKED) &&
1209 before(start_seq, state->reord))
1210 state->reord = start_seq;
1211 }
1212
1213
1214 if (!after(end_seq, tp->snd_una))
1215 return sacked;
1216
1217 if (!(sacked & TCPCB_SACKED_ACKED)) {
1218 tcp_rack_advance(tp, sacked, end_seq, xmit_time);
1219
1220 if (sacked & TCPCB_SACKED_RETRANS) {
1221
1222
1223
1224
1225 if (sacked & TCPCB_LOST) {
1226 sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1227 tp->lost_out -= pcount;
1228 tp->retrans_out -= pcount;
1229 }
1230 } else {
1231 if (!(sacked & TCPCB_RETRANS)) {
1232
1233
1234
1235 if (before(start_seq,
1236 tcp_highest_sack_seq(tp)) &&
1237 before(start_seq, state->reord))
1238 state->reord = start_seq;
1239
1240 if (!after(end_seq, tp->high_seq))
1241 state->flag |= FLAG_ORIG_SACK_ACKED;
1242 if (state->first_sackt == 0)
1243 state->first_sackt = xmit_time;
1244 state->last_sackt = xmit_time;
1245 }
1246
1247 if (sacked & TCPCB_LOST) {
1248 sacked &= ~TCPCB_LOST;
1249 tp->lost_out -= pcount;
1250 }
1251 }
1252
1253 sacked |= TCPCB_SACKED_ACKED;
1254 state->flag |= FLAG_DATA_SACKED;
1255 tp->sacked_out += pcount;
1256 tp->delivered += pcount;
1257
1258
1259 if (tp->lost_skb_hint &&
1260 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
1261 tp->lost_cnt_hint += pcount;
1262 }
1263
1264
1265
1266
1267
1268 if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
1269 sacked &= ~TCPCB_SACKED_RETRANS;
1270 tp->retrans_out -= pcount;
1271 }
1272
1273 return sacked;
1274}
1275
1276
1277
1278
1279static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
1280 struct sk_buff *skb,
1281 struct tcp_sacktag_state *state,
1282 unsigned int pcount, int shifted, int mss,
1283 bool dup_sack)
1284{
1285 struct tcp_sock *tp = tcp_sk(sk);
1286 u32 start_seq = TCP_SKB_CB(skb)->seq;
1287 u32 end_seq = start_seq + shifted;
1288
1289 BUG_ON(!pcount);
1290
1291
1292
1293
1294
1295
1296
1297 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1298 start_seq, end_seq, dup_sack, pcount,
1299 tcp_skb_timestamp_us(skb));
1300 tcp_rate_skb_delivered(sk, skb, state->rate);
1301
1302 if (skb == tp->lost_skb_hint)
1303 tp->lost_cnt_hint += pcount;
1304
1305 TCP_SKB_CB(prev)->end_seq += shifted;
1306 TCP_SKB_CB(skb)->seq += shifted;
1307
1308 tcp_skb_pcount_add(prev, pcount);
1309 WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
1310 tcp_skb_pcount_add(skb, -pcount);
1311
1312
1313
1314
1315
1316
1317 if (!TCP_SKB_CB(prev)->tcp_gso_size)
1318 TCP_SKB_CB(prev)->tcp_gso_size = mss;
1319
1320
1321 if (tcp_skb_pcount(skb) <= 1)
1322 TCP_SKB_CB(skb)->tcp_gso_size = 0;
1323
1324
1325 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
1326
1327 if (skb->len > 0) {
1328 BUG_ON(!tcp_skb_pcount(skb));
1329 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTED);
1330 return false;
1331 }
1332
1333
1334
1335 if (skb == tp->retransmit_skb_hint)
1336 tp->retransmit_skb_hint = prev;
1337 if (skb == tp->lost_skb_hint) {
1338 tp->lost_skb_hint = prev;
1339 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1340 }
1341
1342 TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1343 TCP_SKB_CB(prev)->eor = TCP_SKB_CB(skb)->eor;
1344 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1345 TCP_SKB_CB(prev)->end_seq++;
1346
1347 if (skb == tcp_highest_sack(sk))
1348 tcp_advance_highest_sack(sk, skb);
1349
1350 tcp_skb_collapse_tstamp(prev, skb);
1351 if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
1352 TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
1353
1354 tcp_rtx_queue_unlink_and_free(skb, sk);
1355
1356 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
1357
1358 return true;
1359}
1360
1361
1362
1363
1364static int tcp_skb_seglen(const struct sk_buff *skb)
1365{
1366 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
1367}
1368
1369
1370static int skb_can_shift(const struct sk_buff *skb)
1371{
1372 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1373}
1374
1375int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from,
1376 int pcount, int shiftlen)
1377{
1378
1379
1380
1381
1382
1383 if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE))
1384 return 0;
1385 if (unlikely(tcp_skb_pcount(to) + pcount > 65535))
1386 return 0;
1387 return skb_shift(to, from, shiftlen);
1388}
1389
1390
1391
1392
1393static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1394 struct tcp_sacktag_state *state,
1395 u32 start_seq, u32 end_seq,
1396 bool dup_sack)
1397{
1398 struct tcp_sock *tp = tcp_sk(sk);
1399 struct sk_buff *prev;
1400 int mss;
1401 int pcount = 0;
1402 int len;
1403 int in_sack;
1404
1405
1406 if (!dup_sack &&
1407 (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
1408 goto fallback;
1409 if (!skb_can_shift(skb))
1410 goto fallback;
1411
1412 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1413 goto fallback;
1414
1415
1416 prev = skb_rb_prev(skb);
1417 if (!prev)
1418 goto fallback;
1419
1420 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1421 goto fallback;
1422
1423 if (!tcp_skb_can_collapse_to(prev))
1424 goto fallback;
1425
1426 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1427 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1428
1429 if (in_sack) {
1430 len = skb->len;
1431 pcount = tcp_skb_pcount(skb);
1432 mss = tcp_skb_seglen(skb);
1433
1434
1435
1436
1437 if (mss != tcp_skb_seglen(prev))
1438 goto fallback;
1439 } else {
1440 if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
1441 goto noop;
1442
1443
1444
1445
1446 if (tcp_skb_pcount(skb) <= 1)
1447 goto noop;
1448
1449 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1450 if (!in_sack) {
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462 goto fallback;
1463 }
1464
1465 len = end_seq - TCP_SKB_CB(skb)->seq;
1466 BUG_ON(len < 0);
1467 BUG_ON(len > skb->len);
1468
1469
1470
1471
1472
1473 mss = tcp_skb_mss(skb);
1474
1475
1476
1477
1478 if (mss != tcp_skb_seglen(prev))
1479 goto fallback;
1480
1481 if (len == mss) {
1482 pcount = 1;
1483 } else if (len < mss) {
1484 goto noop;
1485 } else {
1486 pcount = len / mss;
1487 len = pcount * mss;
1488 }
1489 }
1490
1491
1492 if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
1493 goto fallback;
1494
1495 if (!tcp_skb_shift(prev, skb, pcount, len))
1496 goto fallback;
1497 if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
1498 goto out;
1499
1500
1501
1502
1503 skb = skb_rb_next(prev);
1504 if (!skb)
1505 goto out;
1506
1507 if (!skb_can_shift(skb) ||
1508 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1509 (mss != tcp_skb_seglen(skb)))
1510 goto out;
1511
1512 len = skb->len;
1513 pcount = tcp_skb_pcount(skb);
1514 if (tcp_skb_shift(prev, skb, pcount, len))
1515 tcp_shifted_skb(sk, prev, skb, state, pcount,
1516 len, mss, 0);
1517
1518out:
1519 return prev;
1520
1521noop:
1522 return skb;
1523
1524fallback:
1525 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
1526 return NULL;
1527}
1528
1529static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1530 struct tcp_sack_block *next_dup,
1531 struct tcp_sacktag_state *state,
1532 u32 start_seq, u32 end_seq,
1533 bool dup_sack_in)
1534{
1535 struct tcp_sock *tp = tcp_sk(sk);
1536 struct sk_buff *tmp;
1537
1538 skb_rbtree_walk_from(skb) {
1539 int in_sack = 0;
1540 bool dup_sack = dup_sack_in;
1541
1542
1543 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1544 break;
1545
1546 if (next_dup &&
1547 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
1548 in_sack = tcp_match_skb_to_sack(sk, skb,
1549 next_dup->start_seq,
1550 next_dup->end_seq);
1551 if (in_sack > 0)
1552 dup_sack = true;
1553 }
1554
1555
1556
1557
1558
1559 if (in_sack <= 0) {
1560 tmp = tcp_shift_skb_data(sk, skb, state,
1561 start_seq, end_seq, dup_sack);
1562 if (tmp) {
1563 if (tmp != skb) {
1564 skb = tmp;
1565 continue;
1566 }
1567
1568 in_sack = 0;
1569 } else {
1570 in_sack = tcp_match_skb_to_sack(sk, skb,
1571 start_seq,
1572 end_seq);
1573 }
1574 }
1575
1576 if (unlikely(in_sack < 0))
1577 break;
1578
1579 if (in_sack) {
1580 TCP_SKB_CB(skb)->sacked =
1581 tcp_sacktag_one(sk,
1582 state,
1583 TCP_SKB_CB(skb)->sacked,
1584 TCP_SKB_CB(skb)->seq,
1585 TCP_SKB_CB(skb)->end_seq,
1586 dup_sack,
1587 tcp_skb_pcount(skb),
1588 tcp_skb_timestamp_us(skb));
1589 tcp_rate_skb_delivered(sk, skb, state->rate);
1590 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1591 list_del_init(&skb->tcp_tsorted_anchor);
1592
1593 if (!before(TCP_SKB_CB(skb)->seq,
1594 tcp_highest_sack_seq(tp)))
1595 tcp_advance_highest_sack(sk, skb);
1596 }
1597 }
1598 return skb;
1599}
1600
1601static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk, u32 seq)
1602{
1603 struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
1604 struct sk_buff *skb;
1605
1606 while (*p) {
1607 parent = *p;
1608 skb = rb_to_skb(parent);
1609 if (before(seq, TCP_SKB_CB(skb)->seq)) {
1610 p = &parent->rb_left;
1611 continue;
1612 }
1613 if (!before(seq, TCP_SKB_CB(skb)->end_seq)) {
1614 p = &parent->rb_right;
1615 continue;
1616 }
1617 return skb;
1618 }
1619 return NULL;
1620}
1621
1622static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1623 u32 skip_to_seq)
1624{
1625 if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq))
1626 return skb;
1627
1628 return tcp_sacktag_bsearch(sk, skip_to_seq);
1629}
1630
1631static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1632 struct sock *sk,
1633 struct tcp_sack_block *next_dup,
1634 struct tcp_sacktag_state *state,
1635 u32 skip_to_seq)
1636{
1637 if (!next_dup)
1638 return skb;
1639
1640 if (before(next_dup->start_seq, skip_to_seq)) {
1641 skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq);
1642 skb = tcp_sacktag_walk(skb, sk, NULL, state,
1643 next_dup->start_seq, next_dup->end_seq,
1644 1);
1645 }
1646
1647 return skb;
1648}
1649
1650static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
1651{
1652 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1653}
1654
1655static int
1656tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1657 u32 prior_snd_una, struct tcp_sacktag_state *state)
1658{
1659 struct tcp_sock *tp = tcp_sk(sk);
1660 const unsigned char *ptr = (skb_transport_header(ack_skb) +
1661 TCP_SKB_CB(ack_skb)->sacked);
1662 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1663 struct tcp_sack_block sp[TCP_NUM_SACKS];
1664 struct tcp_sack_block *cache;
1665 struct sk_buff *skb;
1666 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1667 int used_sacks;
1668 bool found_dup_sack = false;
1669 int i, j;
1670 int first_sack_index;
1671
1672 state->flag = 0;
1673 state->reord = tp->snd_nxt;
1674
1675 if (!tp->sacked_out)
1676 tcp_highest_sack_reset(sk);
1677
1678 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1679 num_sacks, prior_snd_una);
1680 if (found_dup_sack) {
1681 state->flag |= FLAG_DSACKING_ACK;
1682 tp->delivered++;
1683 }
1684
1685
1686
1687
1688
1689 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
1690 return 0;
1691
1692 if (!tp->packets_out)
1693 goto out;
1694
1695 used_sacks = 0;
1696 first_sack_index = 0;
1697 for (i = 0; i < num_sacks; i++) {
1698 bool dup_sack = !i && found_dup_sack;
1699
1700 sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
1701 sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
1702
1703 if (!tcp_is_sackblock_valid(tp, dup_sack,
1704 sp[used_sacks].start_seq,
1705 sp[used_sacks].end_seq)) {
1706 int mib_idx;
1707
1708 if (dup_sack) {
1709 if (!tp->undo_marker)
1710 mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
1711 else
1712 mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
1713 } else {
1714
1715 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1716 !after(sp[used_sacks].end_seq, tp->snd_una))
1717 continue;
1718 mib_idx = LINUX_MIB_TCPSACKDISCARD;
1719 }
1720
1721 NET_INC_STATS(sock_net(sk), mib_idx);
1722 if (i == 0)
1723 first_sack_index = -1;
1724 continue;
1725 }
1726
1727
1728 if (!after(sp[used_sacks].end_seq, prior_snd_una))
1729 continue;
1730
1731 used_sacks++;
1732 }
1733
1734
1735 for (i = used_sacks - 1; i > 0; i--) {
1736 for (j = 0; j < i; j++) {
1737 if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
1738 swap(sp[j], sp[j + 1]);
1739
1740
1741 if (j == first_sack_index)
1742 first_sack_index = j + 1;
1743 }
1744 }
1745 }
1746
1747 state->mss_now = tcp_current_mss(sk);
1748 skb = NULL;
1749 i = 0;
1750
1751 if (!tp->sacked_out) {
1752
1753 cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1754 } else {
1755 cache = tp->recv_sack_cache;
1756
1757 while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
1758 !cache->end_seq)
1759 cache++;
1760 }
1761
1762 while (i < used_sacks) {
1763 u32 start_seq = sp[i].start_seq;
1764 u32 end_seq = sp[i].end_seq;
1765 bool dup_sack = (found_dup_sack && (i == first_sack_index));
1766 struct tcp_sack_block *next_dup = NULL;
1767
1768 if (found_dup_sack && ((i + 1) == first_sack_index))
1769 next_dup = &sp[i + 1];
1770
1771
1772 while (tcp_sack_cache_ok(tp, cache) &&
1773 !before(start_seq, cache->end_seq))
1774 cache++;
1775
1776
1777 if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
1778 after(end_seq, cache->start_seq)) {
1779
1780
1781 if (before(start_seq, cache->start_seq)) {
1782 skb = tcp_sacktag_skip(skb, sk, start_seq);
1783 skb = tcp_sacktag_walk(skb, sk, next_dup,
1784 state,
1785 start_seq,
1786 cache->start_seq,
1787 dup_sack);
1788 }
1789
1790
1791 if (!after(end_seq, cache->end_seq))
1792 goto advance_sp;
1793
1794 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1795 state,
1796 cache->end_seq);
1797
1798
1799 if (tcp_highest_sack_seq(tp) == cache->end_seq) {
1800
1801 skb = tcp_highest_sack(sk);
1802 if (!skb)
1803 break;
1804 cache++;
1805 goto walk;
1806 }
1807
1808 skb = tcp_sacktag_skip(skb, sk, cache->end_seq);
1809
1810 cache++;
1811 continue;
1812 }
1813
1814 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
1815 skb = tcp_highest_sack(sk);
1816 if (!skb)
1817 break;
1818 }
1819 skb = tcp_sacktag_skip(skb, sk, start_seq);
1820
1821walk:
1822 skb = tcp_sacktag_walk(skb, sk, next_dup, state,
1823 start_seq, end_seq, dup_sack);
1824
1825advance_sp:
1826 i++;
1827 }
1828
1829
1830 for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
1831 tp->recv_sack_cache[i].start_seq = 0;
1832 tp->recv_sack_cache[i].end_seq = 0;
1833 }
1834 for (j = 0; j < used_sacks; j++)
1835 tp->recv_sack_cache[i++] = sp[j];
1836
1837 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss || tp->undo_marker)
1838 tcp_check_sack_reordering(sk, state->reord, 0);
1839
1840 tcp_verify_left_out(tp);
1841out:
1842
1843#if FASTRETRANS_DEBUG > 0
1844 WARN_ON((int)tp->sacked_out < 0);
1845 WARN_ON((int)tp->lost_out < 0);
1846 WARN_ON((int)tp->retrans_out < 0);
1847 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1848#endif
1849 return state->flag;
1850}
1851
1852
1853
1854
1855static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
1856{
1857 u32 holes;
1858
1859 holes = max(tp->lost_out, 1U);
1860 holes = min(holes, tp->packets_out);
1861
1862 if ((tp->sacked_out + holes) > tp->packets_out) {
1863 tp->sacked_out = tp->packets_out - holes;
1864 return true;
1865 }
1866 return false;
1867}
1868
1869
1870
1871
1872
1873static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1874{
1875 struct tcp_sock *tp = tcp_sk(sk);
1876
1877 if (!tcp_limit_reno_sacked(tp))
1878 return;
1879
1880 tp->reordering = min_t(u32, tp->packets_out + addend,
1881 sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
1882 tp->reord_seen++;
1883 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
1884}
1885
1886
1887
1888static void tcp_add_reno_sack(struct sock *sk, int num_dupack)
1889{
1890 if (num_dupack) {
1891 struct tcp_sock *tp = tcp_sk(sk);
1892 u32 prior_sacked = tp->sacked_out;
1893 s32 delivered;
1894
1895 tp->sacked_out += num_dupack;
1896 tcp_check_reno_reordering(sk, 0);
1897 delivered = tp->sacked_out - prior_sacked;
1898 if (delivered > 0)
1899 tp->delivered += delivered;
1900 tcp_verify_left_out(tp);
1901 }
1902}
1903
1904
1905
1906static void tcp_remove_reno_sacks(struct sock *sk, int acked)
1907{
1908 struct tcp_sock *tp = tcp_sk(sk);
1909
1910 if (acked > 0) {
1911
1912 tp->delivered += max_t(int, acked - tp->sacked_out, 1);
1913 if (acked - 1 >= tp->sacked_out)
1914 tp->sacked_out = 0;
1915 else
1916 tp->sacked_out -= acked - 1;
1917 }
1918 tcp_check_reno_reordering(sk, acked);
1919 tcp_verify_left_out(tp);
1920}
1921
1922static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1923{
1924 tp->sacked_out = 0;
1925}
1926
1927void tcp_clear_retrans(struct tcp_sock *tp)
1928{
1929 tp->retrans_out = 0;
1930 tp->lost_out = 0;
1931 tp->undo_marker = 0;
1932 tp->undo_retrans = -1;
1933 tp->sacked_out = 0;
1934}
1935
1936static inline void tcp_init_undo(struct tcp_sock *tp)
1937{
1938 tp->undo_marker = tp->snd_una;
1939
1940 tp->undo_retrans = tp->retrans_out ? : -1;
1941}
1942
1943static bool tcp_is_rack(const struct sock *sk)
1944{
1945 return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION;
1946}
1947
1948
1949
1950
1951
1952static void tcp_timeout_mark_lost(struct sock *sk)
1953{
1954 struct tcp_sock *tp = tcp_sk(sk);
1955 struct sk_buff *skb, *head;
1956 bool is_reneg;
1957
1958 head = tcp_rtx_queue_head(sk);
1959 is_reneg = head && (TCP_SKB_CB(head)->sacked & TCPCB_SACKED_ACKED);
1960 if (is_reneg) {
1961 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
1962 tp->sacked_out = 0;
1963
1964 tp->is_sack_reneg = 1;
1965 } else if (tcp_is_reno(tp)) {
1966 tcp_reset_reno_sack(tp);
1967 }
1968
1969 skb = head;
1970 skb_rbtree_walk_from(skb) {
1971 if (is_reneg)
1972 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
1973 else if (tcp_is_rack(sk) && skb != head &&
1974 tcp_rack_skb_timeout(tp, skb, 0) > 0)
1975 continue;
1976 tcp_mark_skb_lost(sk, skb);
1977 }
1978 tcp_verify_left_out(tp);
1979 tcp_clear_all_retrans_hints(tp);
1980}
1981
1982
1983void tcp_enter_loss(struct sock *sk)
1984{
1985 const struct inet_connection_sock *icsk = inet_csk(sk);
1986 struct tcp_sock *tp = tcp_sk(sk);
1987 struct net *net = sock_net(sk);
1988 bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
1989
1990 tcp_timeout_mark_lost(sk);
1991
1992
1993 if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
1994 !after(tp->high_seq, tp->snd_una) ||
1995 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
1996 tp->prior_ssthresh = tcp_current_ssthresh(sk);
1997 tp->prior_cwnd = tp->snd_cwnd;
1998 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1999 tcp_ca_event(sk, CA_EVENT_LOSS);
2000 tcp_init_undo(tp);
2001 }
2002 tp->snd_cwnd = tcp_packets_in_flight(tp) + 1;
2003 tp->snd_cwnd_cnt = 0;
2004 tp->snd_cwnd_stamp = tcp_jiffies32;
2005
2006
2007
2008
2009 if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
2010 tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
2011 tp->reordering = min_t(unsigned int, tp->reordering,
2012 net->ipv4.sysctl_tcp_reordering);
2013 tcp_set_ca_state(sk, TCP_CA_Loss);
2014 tp->high_seq = tp->snd_nxt;
2015 tcp_ecn_queue_cwr(tp);
2016
2017
2018
2019
2020
2021 tp->frto = net->ipv4.sysctl_tcp_frto &&
2022 (new_recovery || icsk->icsk_retransmits) &&
2023 !inet_csk(sk)->icsk_mtup.probe_size;
2024}
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036static bool tcp_check_sack_reneging(struct sock *sk, int flag)
2037{
2038 if (flag & FLAG_SACK_RENEGING) {
2039 struct tcp_sock *tp = tcp_sk(sk);
2040 unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
2041 msecs_to_jiffies(10));
2042
2043 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2044 delay, TCP_RTO_MAX);
2045 return true;
2046 }
2047 return false;
2048}
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
2062{
2063 return tp->sacked_out + 1;
2064}
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163static bool tcp_time_to_recover(struct sock *sk, int flag)
2164{
2165 struct tcp_sock *tp = tcp_sk(sk);
2166
2167
2168 if (tp->lost_out)
2169 return true;
2170
2171
2172 if (!tcp_is_rack(sk) && tcp_dupack_heuristics(tp) > tp->reordering)
2173 return true;
2174
2175 return false;
2176}
2177
2178
2179
2180
2181
2182
2183
2184static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2185{
2186 struct tcp_sock *tp = tcp_sk(sk);
2187 struct sk_buff *skb;
2188 int cnt, oldcnt, lost;
2189 unsigned int mss;
2190
2191 const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
2192
2193 WARN_ON(packets > tp->packets_out);
2194 skb = tp->lost_skb_hint;
2195 if (skb) {
2196
2197 if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una))
2198 return;
2199 cnt = tp->lost_cnt_hint;
2200 } else {
2201 skb = tcp_rtx_queue_head(sk);
2202 cnt = 0;
2203 }
2204
2205 skb_rbtree_walk_from(skb) {
2206
2207
2208 tp->lost_skb_hint = skb;
2209 tp->lost_cnt_hint = cnt;
2210
2211 if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
2212 break;
2213
2214 oldcnt = cnt;
2215 if (tcp_is_reno(tp) ||
2216 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2217 cnt += tcp_skb_pcount(skb);
2218
2219 if (cnt > packets) {
2220 if (tcp_is_sack(tp) ||
2221 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
2222 (oldcnt >= packets))
2223 break;
2224
2225 mss = tcp_skb_mss(skb);
2226
2227 lost = (packets - oldcnt) * mss;
2228 if (lost < skb->len &&
2229 tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
2230 lost, mss, GFP_ATOMIC) < 0)
2231 break;
2232 cnt = packets;
2233 }
2234
2235 tcp_skb_mark_lost(tp, skb);
2236
2237 if (mark_head)
2238 break;
2239 }
2240 tcp_verify_left_out(tp);
2241}
2242
2243
2244
2245static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2246{
2247 struct tcp_sock *tp = tcp_sk(sk);
2248
2249 if (tcp_is_sack(tp)) {
2250 int sacked_upto = tp->sacked_out - tp->reordering;
2251 if (sacked_upto >= 0)
2252 tcp_mark_head_lost(sk, sacked_upto, 0);
2253 else if (fast_rexmit)
2254 tcp_mark_head_lost(sk, 1, 1);
2255 }
2256}
2257
2258static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when)
2259{
2260 return tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2261 before(tp->rx_opt.rcv_tsecr, when);
2262}
2263
2264
2265
2266
2267static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
2268 const struct sk_buff *skb)
2269{
2270 return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) &&
2271 tcp_tsopt_ecr_before(tp, tcp_skb_timestamp(skb));
2272}
2273
2274
2275
2276
2277static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
2278{
2279 return tp->retrans_stamp &&
2280 tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
2281}
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299static bool tcp_any_retrans_done(const struct sock *sk)
2300{
2301 const struct tcp_sock *tp = tcp_sk(sk);
2302 struct sk_buff *skb;
2303
2304 if (tp->retrans_out)
2305 return true;
2306
2307 skb = tcp_rtx_queue_head(sk);
2308 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2309 return true;
2310
2311 return false;
2312}
2313
2314static void DBGUNDO(struct sock *sk, const char *msg)
2315{
2316#if FASTRETRANS_DEBUG > 1
2317 struct tcp_sock *tp = tcp_sk(sk);
2318 struct inet_sock *inet = inet_sk(sk);
2319
2320 if (sk->sk_family == AF_INET) {
2321 pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2322 msg,
2323 &inet->inet_daddr, ntohs(inet->inet_dport),
2324 tp->snd_cwnd, tcp_left_out(tp),
2325 tp->snd_ssthresh, tp->prior_ssthresh,
2326 tp->packets_out);
2327 }
2328#if IS_ENABLED(CONFIG_IPV6)
2329 else if (sk->sk_family == AF_INET6) {
2330 pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2331 msg,
2332 &sk->sk_v6_daddr, ntohs(inet->inet_dport),
2333 tp->snd_cwnd, tcp_left_out(tp),
2334 tp->snd_ssthresh, tp->prior_ssthresh,
2335 tp->packets_out);
2336 }
2337#endif
2338#endif
2339}
2340
2341static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
2342{
2343 struct tcp_sock *tp = tcp_sk(sk);
2344
2345 if (unmark_loss) {
2346 struct sk_buff *skb;
2347
2348 skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
2349 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2350 }
2351 tp->lost_out = 0;
2352 tcp_clear_all_retrans_hints(tp);
2353 }
2354
2355 if (tp->prior_ssthresh) {
2356 const struct inet_connection_sock *icsk = inet_csk(sk);
2357
2358 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
2359
2360 if (tp->prior_ssthresh > tp->snd_ssthresh) {
2361 tp->snd_ssthresh = tp->prior_ssthresh;
2362 tcp_ecn_withdraw_cwr(tp);
2363 }
2364 }
2365 tp->snd_cwnd_stamp = tcp_jiffies32;
2366 tp->undo_marker = 0;
2367 tp->rack.advanced = 1;
2368}
2369
2370static inline bool tcp_may_undo(const struct tcp_sock *tp)
2371{
2372 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2373}
2374
2375
2376static bool tcp_try_undo_recovery(struct sock *sk)
2377{
2378 struct tcp_sock *tp = tcp_sk(sk);
2379
2380 if (tcp_may_undo(tp)) {
2381 int mib_idx;
2382
2383
2384
2385
2386 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2387 tcp_undo_cwnd_reduction(sk, false);
2388 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2389 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2390 else
2391 mib_idx = LINUX_MIB_TCPFULLUNDO;
2392
2393 NET_INC_STATS(sock_net(sk), mib_idx);
2394 } else if (tp->rack.reo_wnd_persist) {
2395 tp->rack.reo_wnd_persist--;
2396 }
2397 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2398
2399
2400
2401 if (!tcp_any_retrans_done(sk))
2402 tp->retrans_stamp = 0;
2403 return true;
2404 }
2405 tcp_set_ca_state(sk, TCP_CA_Open);
2406 tp->is_sack_reneg = 0;
2407 return false;
2408}
2409
2410
2411static bool tcp_try_undo_dsack(struct sock *sk)
2412{
2413 struct tcp_sock *tp = tcp_sk(sk);
2414
2415 if (tp->undo_marker && !tp->undo_retrans) {
2416 tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH,
2417 tp->rack.reo_wnd_persist + 1);
2418 DBGUNDO(sk, "D-SACK");
2419 tcp_undo_cwnd_reduction(sk, false);
2420 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2421 return true;
2422 }
2423 return false;
2424}
2425
2426
2427static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
2428{
2429 struct tcp_sock *tp = tcp_sk(sk);
2430
2431 if (frto_undo || tcp_may_undo(tp)) {
2432 tcp_undo_cwnd_reduction(sk, true);
2433
2434 DBGUNDO(sk, "partial loss");
2435 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2436 if (frto_undo)
2437 NET_INC_STATS(sock_net(sk),
2438 LINUX_MIB_TCPSPURIOUSRTOS);
2439 inet_csk(sk)->icsk_retransmits = 0;
2440 if (frto_undo || tcp_is_sack(tp)) {
2441 tcp_set_ca_state(sk, TCP_CA_Open);
2442 tp->is_sack_reneg = 0;
2443 }
2444 return true;
2445 }
2446 return false;
2447}
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458static void tcp_init_cwnd_reduction(struct sock *sk)
2459{
2460 struct tcp_sock *tp = tcp_sk(sk);
2461
2462 tp->high_seq = tp->snd_nxt;
2463 tp->tlp_high_seq = 0;
2464 tp->snd_cwnd_cnt = 0;
2465 tp->prior_cwnd = tp->snd_cwnd;
2466 tp->prr_delivered = 0;
2467 tp->prr_out = 0;
2468 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
2469 tcp_ecn_queue_cwr(tp);
2470}
2471
2472void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int flag)
2473{
2474 struct tcp_sock *tp = tcp_sk(sk);
2475 int sndcnt = 0;
2476 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2477
2478 if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd))
2479 return;
2480
2481 tp->prr_delivered += newly_acked_sacked;
2482 if (delta < 0) {
2483 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2484 tp->prior_cwnd - 1;
2485 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2486 } else if ((flag & (FLAG_RETRANS_DATA_ACKED | FLAG_LOST_RETRANS)) ==
2487 FLAG_RETRANS_DATA_ACKED) {
2488 sndcnt = min_t(int, delta,
2489 max_t(int, tp->prr_delivered - tp->prr_out,
2490 newly_acked_sacked) + 1);
2491 } else {
2492 sndcnt = min(delta, newly_acked_sacked);
2493 }
2494
2495 sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
2496 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2497}
2498
2499static inline void tcp_end_cwnd_reduction(struct sock *sk)
2500{
2501 struct tcp_sock *tp = tcp_sk(sk);
2502
2503 if (inet_csk(sk)->icsk_ca_ops->cong_control)
2504 return;
2505
2506
2507 if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
2508 (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
2509 tp->snd_cwnd = tp->snd_ssthresh;
2510 tp->snd_cwnd_stamp = tcp_jiffies32;
2511 }
2512 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2513}
2514
2515
2516void tcp_enter_cwr(struct sock *sk)
2517{
2518 struct tcp_sock *tp = tcp_sk(sk);
2519
2520 tp->prior_ssthresh = 0;
2521 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2522 tp->undo_marker = 0;
2523 tcp_init_cwnd_reduction(sk);
2524 tcp_set_ca_state(sk, TCP_CA_CWR);
2525 }
2526}
2527EXPORT_SYMBOL(tcp_enter_cwr);
2528
2529static void tcp_try_keep_open(struct sock *sk)
2530{
2531 struct tcp_sock *tp = tcp_sk(sk);
2532 int state = TCP_CA_Open;
2533
2534 if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
2535 state = TCP_CA_Disorder;
2536
2537 if (inet_csk(sk)->icsk_ca_state != state) {
2538 tcp_set_ca_state(sk, state);
2539 tp->high_seq = tp->snd_nxt;
2540 }
2541}
2542
2543static void tcp_try_to_open(struct sock *sk, int flag)
2544{
2545 struct tcp_sock *tp = tcp_sk(sk);
2546
2547 tcp_verify_left_out(tp);
2548
2549 if (!tcp_any_retrans_done(sk))
2550 tp->retrans_stamp = 0;
2551
2552 if (flag & FLAG_ECE)
2553 tcp_enter_cwr(sk);
2554
2555 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2556 tcp_try_keep_open(sk);
2557 }
2558}
2559
2560static void tcp_mtup_probe_failed(struct sock *sk)
2561{
2562 struct inet_connection_sock *icsk = inet_csk(sk);
2563
2564 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2565 icsk->icsk_mtup.probe_size = 0;
2566 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPFAIL);
2567}
2568
2569static void tcp_mtup_probe_success(struct sock *sk)
2570{
2571 struct tcp_sock *tp = tcp_sk(sk);
2572 struct inet_connection_sock *icsk = inet_csk(sk);
2573
2574
2575 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2576 tp->snd_cwnd = tp->snd_cwnd *
2577 tcp_mss_to_mtu(sk, tp->mss_cache) /
2578 icsk->icsk_mtup.probe_size;
2579 tp->snd_cwnd_cnt = 0;
2580 tp->snd_cwnd_stamp = tcp_jiffies32;
2581 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2582
2583 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2584 icsk->icsk_mtup.probe_size = 0;
2585 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2586 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
2587}
2588
2589
2590
2591
2592
2593void tcp_simple_retransmit(struct sock *sk)
2594{
2595 const struct inet_connection_sock *icsk = inet_csk(sk);
2596 struct tcp_sock *tp = tcp_sk(sk);
2597 struct sk_buff *skb;
2598 unsigned int mss = tcp_current_mss(sk);
2599
2600 skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
2601 if (tcp_skb_seglen(skb) > mss &&
2602 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2603 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2604 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2605 tp->retrans_out -= tcp_skb_pcount(skb);
2606 }
2607 tcp_skb_mark_lost_uncond_verify(tp, skb);
2608 }
2609 }
2610
2611 tcp_clear_retrans_hints_partial(tp);
2612
2613 if (!tp->lost_out)
2614 return;
2615
2616 if (tcp_is_reno(tp))
2617 tcp_limit_reno_sacked(tp);
2618
2619 tcp_verify_left_out(tp);
2620
2621
2622
2623
2624
2625
2626 if (icsk->icsk_ca_state != TCP_CA_Loss) {
2627 tp->high_seq = tp->snd_nxt;
2628 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2629 tp->prior_ssthresh = 0;
2630 tp->undo_marker = 0;
2631 tcp_set_ca_state(sk, TCP_CA_Loss);
2632 }
2633 tcp_xmit_retransmit_queue(sk);
2634}
2635EXPORT_SYMBOL(tcp_simple_retransmit);
2636
2637void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2638{
2639 struct tcp_sock *tp = tcp_sk(sk);
2640 int mib_idx;
2641
2642 if (tcp_is_reno(tp))
2643 mib_idx = LINUX_MIB_TCPRENORECOVERY;
2644 else
2645 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
2646
2647 NET_INC_STATS(sock_net(sk), mib_idx);
2648
2649 tp->prior_ssthresh = 0;
2650 tcp_init_undo(tp);
2651
2652 if (!tcp_in_cwnd_reduction(sk)) {
2653 if (!ece_ack)
2654 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2655 tcp_init_cwnd_reduction(sk);
2656 }
2657 tcp_set_ca_state(sk, TCP_CA_Recovery);
2658}
2659
2660
2661
2662
2663static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
2664 int *rexmit)
2665{
2666 struct tcp_sock *tp = tcp_sk(sk);
2667 bool recovered = !before(tp->snd_una, tp->high_seq);
2668
2669 if ((flag & FLAG_SND_UNA_ADVANCED || tp->fastopen_rsk) &&
2670 tcp_try_undo_loss(sk, false))
2671 return;
2672
2673 if (tp->frto) {
2674
2675
2676
2677 if ((flag & FLAG_ORIG_SACK_ACKED) &&
2678 tcp_try_undo_loss(sk, true))
2679 return;
2680
2681 if (after(tp->snd_nxt, tp->high_seq)) {
2682 if (flag & FLAG_DATA_SACKED || num_dupack)
2683 tp->frto = 0;
2684 } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
2685 tp->high_seq = tp->snd_nxt;
2686
2687
2688
2689
2690 if (!tcp_write_queue_empty(sk) &&
2691 after(tcp_wnd_end(tp), tp->snd_nxt)) {
2692 *rexmit = REXMIT_NEW;
2693 return;
2694 }
2695 tp->frto = 0;
2696 }
2697 }
2698
2699 if (recovered) {
2700
2701 tcp_try_undo_recovery(sk);
2702 return;
2703 }
2704 if (tcp_is_reno(tp)) {
2705
2706
2707
2708 if (after(tp->snd_nxt, tp->high_seq) && num_dupack)
2709 tcp_add_reno_sack(sk, num_dupack);
2710 else if (flag & FLAG_SND_UNA_ADVANCED)
2711 tcp_reset_reno_sack(tp);
2712 }
2713 *rexmit = REXMIT_LOST;
2714}
2715
2716
2717static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
2718{
2719 struct tcp_sock *tp = tcp_sk(sk);
2720
2721 if (tp->undo_marker && tcp_packet_delayed(tp)) {
2722
2723
2724
2725 tcp_check_sack_reordering(sk, prior_snd_una, 1);
2726
2727
2728
2729
2730
2731
2732 if (tp->retrans_out)
2733 return true;
2734
2735 if (!tcp_any_retrans_done(sk))
2736 tp->retrans_stamp = 0;
2737
2738 DBGUNDO(sk, "partial recovery");
2739 tcp_undo_cwnd_reduction(sk, true);
2740 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2741 tcp_try_keep_open(sk);
2742 return true;
2743 }
2744 return false;
2745}
2746
2747static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag)
2748{
2749 struct tcp_sock *tp = tcp_sk(sk);
2750
2751 if (tcp_rtx_queue_empty(sk))
2752 return;
2753
2754 if (unlikely(tcp_is_reno(tp))) {
2755 tcp_newreno_mark_lost(sk, *ack_flag & FLAG_SND_UNA_ADVANCED);
2756 } else if (tcp_is_rack(sk)) {
2757 u32 prior_retrans = tp->retrans_out;
2758
2759 tcp_rack_mark_lost(sk);
2760 if (prior_retrans > tp->retrans_out)
2761 *ack_flag |= FLAG_LOST_RETRANS;
2762 }
2763}
2764
2765static bool tcp_force_fast_retransmit(struct sock *sk)
2766{
2767 struct tcp_sock *tp = tcp_sk(sk);
2768
2769 return after(tcp_highest_sack_seq(tp),
2770 tp->snd_una + tp->reordering * tp->mss_cache);
2771}
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
2786 int num_dupack, int *ack_flag, int *rexmit)
2787{
2788 struct inet_connection_sock *icsk = inet_csk(sk);
2789 struct tcp_sock *tp = tcp_sk(sk);
2790 int fast_rexmit = 0, flag = *ack_flag;
2791 bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) &&
2792 tcp_force_fast_retransmit(sk));
2793
2794 if (!tp->packets_out && tp->sacked_out)
2795 tp->sacked_out = 0;
2796
2797
2798
2799 if (flag & FLAG_ECE)
2800 tp->prior_ssthresh = 0;
2801
2802
2803 if (tcp_check_sack_reneging(sk, flag))
2804 return;
2805
2806
2807 tcp_verify_left_out(tp);
2808
2809
2810
2811 if (icsk->icsk_ca_state == TCP_CA_Open) {
2812 WARN_ON(tp->retrans_out != 0);
2813 tp->retrans_stamp = 0;
2814 } else if (!before(tp->snd_una, tp->high_seq)) {
2815 switch (icsk->icsk_ca_state) {
2816 case TCP_CA_CWR:
2817
2818
2819 if (tp->snd_una != tp->high_seq) {
2820 tcp_end_cwnd_reduction(sk);
2821 tcp_set_ca_state(sk, TCP_CA_Open);
2822 }
2823 break;
2824
2825 case TCP_CA_Recovery:
2826 if (tcp_is_reno(tp))
2827 tcp_reset_reno_sack(tp);
2828 if (tcp_try_undo_recovery(sk))
2829 return;
2830 tcp_end_cwnd_reduction(sk);
2831 break;
2832 }
2833 }
2834
2835
2836 switch (icsk->icsk_ca_state) {
2837 case TCP_CA_Recovery:
2838 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
2839 if (tcp_is_reno(tp))
2840 tcp_add_reno_sack(sk, num_dupack);
2841 } else {
2842 if (tcp_try_undo_partial(sk, prior_snd_una))
2843 return;
2844
2845 do_lost = tcp_is_reno(tp) ||
2846 tcp_force_fast_retransmit(sk);
2847 }
2848 if (tcp_try_undo_dsack(sk)) {
2849 tcp_try_keep_open(sk);
2850 return;
2851 }
2852 tcp_identify_packet_loss(sk, ack_flag);
2853 break;
2854 case TCP_CA_Loss:
2855 tcp_process_loss(sk, flag, num_dupack, rexmit);
2856 tcp_identify_packet_loss(sk, ack_flag);
2857 if (!(icsk->icsk_ca_state == TCP_CA_Open ||
2858 (*ack_flag & FLAG_LOST_RETRANS)))
2859 return;
2860
2861
2862 default:
2863 if (tcp_is_reno(tp)) {
2864 if (flag & FLAG_SND_UNA_ADVANCED)
2865 tcp_reset_reno_sack(tp);
2866 tcp_add_reno_sack(sk, num_dupack);
2867 }
2868
2869 if (icsk->icsk_ca_state <= TCP_CA_Disorder)
2870 tcp_try_undo_dsack(sk);
2871
2872 tcp_identify_packet_loss(sk, ack_flag);
2873 if (!tcp_time_to_recover(sk, flag)) {
2874 tcp_try_to_open(sk, flag);
2875 return;
2876 }
2877
2878
2879 if (icsk->icsk_ca_state < TCP_CA_CWR &&
2880 icsk->icsk_mtup.probe_size &&
2881 tp->snd_una == tp->mtu_probe.probe_seq_start) {
2882 tcp_mtup_probe_failed(sk);
2883
2884 tp->snd_cwnd++;
2885 tcp_simple_retransmit(sk);
2886 return;
2887 }
2888
2889
2890 tcp_enter_recovery(sk, (flag & FLAG_ECE));
2891 fast_rexmit = 1;
2892 }
2893
2894 if (!tcp_is_rack(sk) && do_lost)
2895 tcp_update_scoreboard(sk, fast_rexmit);
2896 *rexmit = REXMIT_LOST;
2897}
2898
2899static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
2900{
2901 u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
2902 struct tcp_sock *tp = tcp_sk(sk);
2903
2904 if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
2905
2906
2907
2908
2909 return;
2910 }
2911 minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
2912 rtt_us ? : jiffies_to_usecs(1));
2913}
2914
2915static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
2916 long seq_rtt_us, long sack_rtt_us,
2917 long ca_rtt_us, struct rate_sample *rs)
2918{
2919 const struct tcp_sock *tp = tcp_sk(sk);
2920
2921
2922
2923
2924
2925
2926 if (seq_rtt_us < 0)
2927 seq_rtt_us = sack_rtt_us;
2928
2929
2930
2931
2932
2933
2934
2935 if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2936 flag & FLAG_ACKED) {
2937 u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
2938
2939 if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
2940 seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
2941 ca_rtt_us = seq_rtt_us;
2942 }
2943 }
2944 rs->rtt_us = ca_rtt_us;
2945 if (seq_rtt_us < 0)
2946 return false;
2947
2948
2949
2950
2951
2952 tcp_update_rtt_min(sk, ca_rtt_us, flag);
2953 tcp_rtt_estimator(sk, seq_rtt_us);
2954 tcp_set_rto(sk);
2955
2956
2957 inet_csk(sk)->icsk_backoff = 0;
2958 return true;
2959}
2960
2961
2962void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
2963{
2964 struct rate_sample rs;
2965 long rtt_us = -1L;
2966
2967 if (req && !req->num_retrans && tcp_rsk(req)->snt_synack)
2968 rtt_us = tcp_stamp_us_delta(tcp_clock_us(), tcp_rsk(req)->snt_synack);
2969
2970 tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us, &rs);
2971}
2972
2973
2974static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
2975{
2976 const struct inet_connection_sock *icsk = inet_csk(sk);
2977
2978 icsk->icsk_ca_ops->cong_avoid(sk, ack, acked);
2979 tcp_sk(sk)->snd_cwnd_stamp = tcp_jiffies32;
2980}
2981
2982
2983
2984
2985void tcp_rearm_rto(struct sock *sk)
2986{
2987 const struct inet_connection_sock *icsk = inet_csk(sk);
2988 struct tcp_sock *tp = tcp_sk(sk);
2989
2990
2991
2992
2993 if (tp->fastopen_rsk)
2994 return;
2995
2996 if (!tp->packets_out) {
2997 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
2998 } else {
2999 u32 rto = inet_csk(sk)->icsk_rto;
3000
3001 if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
3002 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
3003 s64 delta_us = tcp_rto_delta_us(sk);
3004
3005
3006
3007 rto = usecs_to_jiffies(max_t(int, delta_us, 1));
3008 }
3009 tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
3010 TCP_RTO_MAX, tcp_rtx_queue_head(sk));
3011 }
3012}
3013
3014
3015static void tcp_set_xmit_timer(struct sock *sk)
3016{
3017 if (!tcp_schedule_loss_probe(sk, true))
3018 tcp_rearm_rto(sk);
3019}
3020
3021
3022static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
3023{
3024 struct tcp_sock *tp = tcp_sk(sk);
3025 u32 packets_acked;
3026
3027 BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
3028
3029 packets_acked = tcp_skb_pcount(skb);
3030 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
3031 return 0;
3032 packets_acked -= tcp_skb_pcount(skb);
3033
3034 if (packets_acked) {
3035 BUG_ON(tcp_skb_pcount(skb) == 0);
3036 BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
3037 }
3038
3039 return packets_acked;
3040}
3041
3042static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
3043 u32 prior_snd_una)
3044{
3045 const struct skb_shared_info *shinfo;
3046
3047
3048 if (likely(!TCP_SKB_CB(skb)->txstamp_ack))
3049 return;
3050
3051 shinfo = skb_shinfo(skb);
3052 if (!before(shinfo->tskey, prior_snd_una) &&
3053 before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
3054 tcp_skb_tsorted_save(skb) {
3055 __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
3056 } tcp_skb_tsorted_restore(skb);
3057 }
3058}
3059
3060
3061
3062
3063
3064static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
3065 u32 prior_snd_una,
3066 struct tcp_sacktag_state *sack)
3067{
3068 const struct inet_connection_sock *icsk = inet_csk(sk);
3069 u64 first_ackt, last_ackt;
3070 struct tcp_sock *tp = tcp_sk(sk);
3071 u32 prior_sacked = tp->sacked_out;
3072 u32 reord = tp->snd_nxt;
3073 struct sk_buff *skb, *next;
3074 bool fully_acked = true;
3075 long sack_rtt_us = -1L;
3076 long seq_rtt_us = -1L;
3077 long ca_rtt_us = -1L;
3078 u32 pkts_acked = 0;
3079 u32 last_in_flight = 0;
3080 bool rtt_update;
3081 int flag = 0;
3082
3083 first_ackt = 0;
3084
3085 for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
3086 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3087 const u32 start_seq = scb->seq;
3088 u8 sacked = scb->sacked;
3089 u32 acked_pcount;
3090
3091 tcp_ack_tstamp(sk, skb, prior_snd_una);
3092
3093
3094 if (after(scb->end_seq, tp->snd_una)) {
3095 if (tcp_skb_pcount(skb) == 1 ||
3096 !after(tp->snd_una, scb->seq))
3097 break;
3098
3099 acked_pcount = tcp_tso_acked(sk, skb);
3100 if (!acked_pcount)
3101 break;
3102 fully_acked = false;
3103 } else {
3104 acked_pcount = tcp_skb_pcount(skb);
3105 }
3106
3107 if (unlikely(sacked & TCPCB_RETRANS)) {
3108 if (sacked & TCPCB_SACKED_RETRANS)
3109 tp->retrans_out -= acked_pcount;
3110 flag |= FLAG_RETRANS_DATA_ACKED;
3111 } else if (!(sacked & TCPCB_SACKED_ACKED)) {
3112 last_ackt = tcp_skb_timestamp_us(skb);
3113 WARN_ON_ONCE(last_ackt == 0);
3114 if (!first_ackt)
3115 first_ackt = last_ackt;
3116
3117 last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
3118 if (before(start_seq, reord))
3119 reord = start_seq;
3120 if (!after(scb->end_seq, tp->high_seq))
3121 flag |= FLAG_ORIG_SACK_ACKED;
3122 }
3123
3124 if (sacked & TCPCB_SACKED_ACKED) {
3125 tp->sacked_out -= acked_pcount;
3126 } else if (tcp_is_sack(tp)) {
3127 tp->delivered += acked_pcount;
3128 if (!tcp_skb_spurious_retrans(tp, skb))
3129 tcp_rack_advance(tp, sacked, scb->end_seq,
3130 tcp_skb_timestamp_us(skb));
3131 }
3132 if (sacked & TCPCB_LOST)
3133 tp->lost_out -= acked_pcount;
3134
3135 tp->packets_out -= acked_pcount;
3136 pkts_acked += acked_pcount;
3137 tcp_rate_skb_delivered(sk, skb, sack->rate);
3138
3139
3140
3141
3142
3143
3144
3145
3146 if (likely(!(scb->tcp_flags & TCPHDR_SYN))) {
3147 flag |= FLAG_DATA_ACKED;
3148 } else {
3149 flag |= FLAG_SYN_ACKED;
3150 tp->retrans_stamp = 0;
3151 }
3152
3153 if (!fully_acked)
3154 break;
3155
3156 next = skb_rb_next(skb);
3157 if (unlikely(skb == tp->retransmit_skb_hint))
3158 tp->retransmit_skb_hint = NULL;
3159 if (unlikely(skb == tp->lost_skb_hint))
3160 tp->lost_skb_hint = NULL;
3161 tcp_rtx_queue_unlink_and_free(skb, sk);
3162 }
3163
3164 if (!skb)
3165 tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
3166
3167 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
3168 tp->snd_up = tp->snd_una;
3169
3170 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
3171 flag |= FLAG_SACK_RENEGING;
3172
3173 if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
3174 seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
3175 ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
3176
3177 if (pkts_acked == 1 && last_in_flight < tp->mss_cache &&
3178 last_in_flight && !prior_sacked && fully_acked &&
3179 sack->rate->prior_delivered + 1 == tp->delivered &&
3180 !(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) {
3181
3182
3183
3184
3185 flag |= FLAG_ACK_MAYBE_DELAYED;
3186 }
3187 }
3188 if (sack->first_sackt) {
3189 sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt);
3190 ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->last_sackt);
3191 }
3192 rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
3193 ca_rtt_us, sack->rate);
3194
3195 if (flag & FLAG_ACKED) {
3196 flag |= FLAG_SET_XMIT_TIMER;
3197 if (unlikely(icsk->icsk_mtup.probe_size &&
3198 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
3199 tcp_mtup_probe_success(sk);
3200 }
3201
3202 if (tcp_is_reno(tp)) {
3203 tcp_remove_reno_sacks(sk, pkts_acked);
3204
3205
3206
3207
3208
3209
3210
3211 if (flag & FLAG_RETRANS_DATA_ACKED)
3212 flag &= ~FLAG_ORIG_SACK_ACKED;
3213 } else {
3214 int delta;
3215
3216
3217 if (before(reord, prior_fack))
3218 tcp_check_sack_reordering(sk, reord, 0);
3219
3220 delta = prior_sacked - tp->sacked_out;
3221 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3222 }
3223 } else if (skb && rtt_update && sack_rtt_us >= 0 &&
3224 sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp,
3225 tcp_skb_timestamp_us(skb))) {
3226
3227
3228
3229
3230 flag |= FLAG_SET_XMIT_TIMER;
3231 }
3232
3233 if (icsk->icsk_ca_ops->pkts_acked) {
3234 struct ack_sample sample = { .pkts_acked = pkts_acked,
3235 .rtt_us = sack->rate->rtt_us,
3236 .in_flight = last_in_flight };
3237
3238 icsk->icsk_ca_ops->pkts_acked(sk, &sample);
3239 }
3240
3241#if FASTRETRANS_DEBUG > 0
3242 WARN_ON((int)tp->sacked_out < 0);
3243 WARN_ON((int)tp->lost_out < 0);
3244 WARN_ON((int)tp->retrans_out < 0);
3245 if (!tp->packets_out && tcp_is_sack(tp)) {
3246 icsk = inet_csk(sk);
3247 if (tp->lost_out) {
3248 pr_debug("Leak l=%u %d\n",
3249 tp->lost_out, icsk->icsk_ca_state);
3250 tp->lost_out = 0;
3251 }
3252 if (tp->sacked_out) {
3253 pr_debug("Leak s=%u %d\n",
3254 tp->sacked_out, icsk->icsk_ca_state);
3255 tp->sacked_out = 0;
3256 }
3257 if (tp->retrans_out) {
3258 pr_debug("Leak r=%u %d\n",
3259 tp->retrans_out, icsk->icsk_ca_state);
3260 tp->retrans_out = 0;
3261 }
3262 }
3263#endif
3264 return flag;
3265}
3266
3267static void tcp_ack_probe(struct sock *sk)
3268{
3269 struct inet_connection_sock *icsk = inet_csk(sk);
3270 struct sk_buff *head = tcp_send_head(sk);
3271 const struct tcp_sock *tp = tcp_sk(sk);
3272
3273
3274 if (!head)
3275 return;
3276 if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
3277 icsk->icsk_backoff = 0;
3278 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
3279
3280
3281
3282 } else {
3283 unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX);
3284
3285 tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3286 when, TCP_RTO_MAX, NULL);
3287 }
3288}
3289
3290static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
3291{
3292 return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3293 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3294}
3295
3296
3297static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3298{
3299
3300
3301
3302
3303
3304
3305 if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
3306 return flag & FLAG_FORWARD_PROGRESS;
3307
3308 return flag & FLAG_DATA_ACKED;
3309}
3310
3311
3312
3313
3314
3315
3316static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
3317 int flag, const struct rate_sample *rs)
3318{
3319 const struct inet_connection_sock *icsk = inet_csk(sk);
3320
3321 if (icsk->icsk_ca_ops->cong_control) {
3322 icsk->icsk_ca_ops->cong_control(sk, rs);
3323 return;
3324 }
3325
3326 if (tcp_in_cwnd_reduction(sk)) {
3327
3328 tcp_cwnd_reduction(sk, acked_sacked, flag);
3329 } else if (tcp_may_raise_cwnd(sk, flag)) {
3330
3331 tcp_cong_avoid(sk, ack, acked_sacked);
3332 }
3333 tcp_update_pacing_rate(sk);
3334}
3335
3336
3337
3338
3339static inline bool tcp_may_update_window(const struct tcp_sock *tp,
3340 const u32 ack, const u32 ack_seq,
3341 const u32 nwin)
3342{
3343 return after(ack, tp->snd_una) ||
3344 after(ack_seq, tp->snd_wl1) ||
3345 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
3346}
3347
3348
3349static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
3350{
3351 u32 delta = ack - tp->snd_una;
3352
3353 sock_owned_by_me((struct sock *)tp);
3354 tp->bytes_acked += delta;
3355 tp->snd_una = ack;
3356}
3357
3358
3359static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
3360{
3361 u32 delta = seq - tp->rcv_nxt;
3362
3363 sock_owned_by_me((struct sock *)tp);
3364 tp->bytes_received += delta;
3365 tp->rcv_nxt = seq;
3366}
3367
3368
3369
3370
3371
3372
3373static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
3374 u32 ack_seq)
3375{
3376 struct tcp_sock *tp = tcp_sk(sk);
3377 int flag = 0;
3378 u32 nwin = ntohs(tcp_hdr(skb)->window);
3379
3380 if (likely(!tcp_hdr(skb)->syn))
3381 nwin <<= tp->rx_opt.snd_wscale;
3382
3383 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
3384 flag |= FLAG_WIN_UPDATE;
3385 tcp_update_wl(tp, ack_seq);
3386
3387 if (tp->snd_wnd != nwin) {
3388 tp->snd_wnd = nwin;
3389
3390
3391
3392
3393 tp->pred_flags = 0;
3394 tcp_fast_path_check(sk);
3395
3396 if (!tcp_write_queue_empty(sk))
3397 tcp_slow_start_after_idle_check(sk);
3398
3399 if (nwin > tp->max_window) {
3400 tp->max_window = nwin;
3401 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
3402 }
3403 }
3404 }
3405
3406 tcp_snd_una_update(tp, ack);
3407
3408 return flag;
3409}
3410
3411static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
3412 u32 *last_oow_ack_time)
3413{
3414 if (*last_oow_ack_time) {
3415 s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
3416
3417 if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
3418 NET_INC_STATS(net, mib_idx);
3419 return true;
3420 }
3421 }
3422
3423 *last_oow_ack_time = tcp_jiffies32;
3424
3425 return false;
3426}
3427
3428
3429
3430
3431
3432
3433
3434
3435bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
3436 int mib_idx, u32 *last_oow_ack_time)
3437{
3438
3439 if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
3440 !tcp_hdr(skb)->syn)
3441 return false;
3442
3443 return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time);
3444}
3445
3446
3447static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
3448{
3449
3450 static u32 challenge_timestamp;
3451 static unsigned int challenge_count;
3452 struct tcp_sock *tp = tcp_sk(sk);
3453 struct net *net = sock_net(sk);
3454 u32 count, now;
3455
3456
3457 if (__tcp_oow_rate_limited(net,
3458 LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
3459 &tp->last_oow_ack_time))
3460 return;
3461
3462
3463 now = jiffies / HZ;
3464 if (now != challenge_timestamp) {
3465 u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
3466 u32 half = (ack_limit + 1) >> 1;
3467
3468 challenge_timestamp = now;
3469 WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit));
3470 }
3471 count = READ_ONCE(challenge_count);
3472 if (count > 0) {
3473 WRITE_ONCE(challenge_count, count - 1);
3474 NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK);
3475 tcp_send_ack(sk);
3476 }
3477}
3478
3479static void tcp_store_ts_recent(struct tcp_sock *tp)
3480{
3481 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
3482 tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
3483}
3484
3485static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3486{
3487 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
3488
3489
3490
3491
3492
3493
3494
3495 if (tcp_paws_check(&tp->rx_opt, 0))
3496 tcp_store_ts_recent(tp);
3497 }
3498}
3499
3500
3501
3502
3503
3504
3505static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
3506{
3507 struct tcp_sock *tp = tcp_sk(sk);
3508
3509 if (before(ack, tp->tlp_high_seq))
3510 return;
3511
3512 if (flag & FLAG_DSACKING_ACK) {
3513
3514 tp->tlp_high_seq = 0;
3515 } else if (after(ack, tp->tlp_high_seq)) {
3516
3517
3518
3519 tcp_init_cwnd_reduction(sk);
3520 tcp_set_ca_state(sk, TCP_CA_CWR);
3521 tcp_end_cwnd_reduction(sk);
3522 tcp_try_keep_open(sk);
3523 NET_INC_STATS(sock_net(sk),
3524 LINUX_MIB_TCPLOSSPROBERECOVERY);
3525 } else if (!(flag & (FLAG_SND_UNA_ADVANCED |
3526 FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
3527
3528 tp->tlp_high_seq = 0;
3529 }
3530}
3531
3532static inline void tcp_in_ack_event(struct sock *sk, u32 flags)
3533{
3534 const struct inet_connection_sock *icsk = inet_csk(sk);
3535
3536 if (icsk->icsk_ca_ops->in_ack_event)
3537 icsk->icsk_ca_ops->in_ack_event(sk, flags);
3538}
3539
3540
3541
3542
3543
3544static void tcp_xmit_recovery(struct sock *sk, int rexmit)
3545{
3546 struct tcp_sock *tp = tcp_sk(sk);
3547
3548 if (rexmit == REXMIT_NONE || sk->sk_state == TCP_SYN_SENT)
3549 return;
3550
3551 if (unlikely(rexmit == 2)) {
3552 __tcp_push_pending_frames(sk, tcp_current_mss(sk),
3553 TCP_NAGLE_OFF);
3554 if (after(tp->snd_nxt, tp->high_seq))
3555 return;
3556 tp->frto = 0;
3557 }
3558 tcp_xmit_retransmit_queue(sk);
3559}
3560
3561
3562static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag)
3563{
3564 const struct net *net = sock_net(sk);
3565 struct tcp_sock *tp = tcp_sk(sk);
3566 u32 delivered;
3567
3568 delivered = tp->delivered - prior_delivered;
3569 NET_ADD_STATS(net, LINUX_MIB_TCPDELIVERED, delivered);
3570 if (flag & FLAG_ECE) {
3571 tp->delivered_ce += delivered;
3572 NET_ADD_STATS(net, LINUX_MIB_TCPDELIVEREDCE, delivered);
3573 }
3574 return delivered;
3575}
3576
3577
3578static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3579{
3580 struct inet_connection_sock *icsk = inet_csk(sk);
3581 struct tcp_sock *tp = tcp_sk(sk);
3582 struct tcp_sacktag_state sack_state;
3583 struct rate_sample rs = { .prior_delivered = 0 };
3584 u32 prior_snd_una = tp->snd_una;
3585 bool is_sack_reneg = tp->is_sack_reneg;
3586 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3587 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3588 int num_dupack = 0;
3589 int prior_packets = tp->packets_out;
3590 u32 delivered = tp->delivered;
3591 u32 lost = tp->lost;
3592 int rexmit = REXMIT_NONE;
3593 u32 prior_fack;
3594
3595 sack_state.first_sackt = 0;
3596 sack_state.rate = &rs;
3597
3598
3599 prefetch(sk->tcp_rtx_queue.rb_node);
3600
3601
3602
3603
3604 if (before(ack, prior_snd_una)) {
3605
3606 if (before(ack, prior_snd_una - tp->max_window)) {
3607 if (!(flag & FLAG_NO_CHALLENGE_ACK))
3608 tcp_send_challenge_ack(sk, skb);
3609 return -1;
3610 }
3611 goto old_ack;
3612 }
3613
3614
3615
3616
3617 if (after(ack, tp->snd_nxt))
3618 return -1;
3619
3620 if (after(ack, prior_snd_una)) {
3621 flag |= FLAG_SND_UNA_ADVANCED;
3622 icsk->icsk_retransmits = 0;
3623
3624#if IS_ENABLED(CONFIG_TLS_DEVICE)
3625 if (static_branch_unlikely(&clean_acked_data_enabled.key))
3626 if (icsk->icsk_clean_acked)
3627 icsk->icsk_clean_acked(sk, ack);
3628#endif
3629 }
3630
3631 prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
3632 rs.prior_in_flight = tcp_packets_in_flight(tp);
3633
3634
3635
3636
3637 if (flag & FLAG_UPDATE_TS_RECENT)
3638 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
3639
3640 if ((flag & (FLAG_SLOWPATH | FLAG_SND_UNA_ADVANCED)) ==
3641 FLAG_SND_UNA_ADVANCED) {
3642
3643
3644
3645
3646 tcp_update_wl(tp, ack_seq);
3647 tcp_snd_una_update(tp, ack);
3648 flag |= FLAG_WIN_UPDATE;
3649
3650 tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
3651
3652 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
3653 } else {
3654 u32 ack_ev_flags = CA_ACK_SLOWPATH;
3655
3656 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3657 flag |= FLAG_DATA;
3658 else
3659 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
3660
3661 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3662
3663 if (TCP_SKB_CB(skb)->sacked)
3664 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3665 &sack_state);
3666
3667 if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
3668 flag |= FLAG_ECE;
3669 ack_ev_flags |= CA_ACK_ECE;
3670 }
3671
3672 if (flag & FLAG_WIN_UPDATE)
3673 ack_ev_flags |= CA_ACK_WIN_UPDATE;
3674
3675 tcp_in_ack_event(sk, ack_ev_flags);
3676 }
3677
3678
3679
3680
3681 sk->sk_err_soft = 0;
3682 icsk->icsk_probes_out = 0;
3683 tp->rcv_tstamp = tcp_jiffies32;
3684 if (!prior_packets)
3685 goto no_queue;
3686
3687
3688 flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state);
3689
3690 tcp_rack_update_reo_wnd(sk, &rs);
3691
3692 if (tp->tlp_high_seq)
3693 tcp_process_tlp_ack(sk, ack, flag);
3694
3695 if (flag & FLAG_SET_XMIT_TIMER)
3696 tcp_set_xmit_timer(sk);
3697
3698 if (tcp_ack_is_dubious(sk, flag)) {
3699 if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) {
3700 num_dupack = 1;
3701
3702 if (!(flag & FLAG_DATA))
3703 num_dupack = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
3704 }
3705 tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
3706 &rexmit);
3707 }
3708
3709 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
3710 sk_dst_confirm(sk);
3711
3712 delivered = tcp_newly_delivered(sk, delivered, flag);
3713 lost = tp->lost - lost;
3714 rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
3715 tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
3716 tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
3717 tcp_xmit_recovery(sk, rexmit);
3718 return 1;
3719
3720no_queue:
3721
3722 if (flag & FLAG_DSACKING_ACK) {
3723 tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
3724 &rexmit);
3725 tcp_newly_delivered(sk, delivered, flag);
3726 }
3727
3728
3729
3730
3731 tcp_ack_probe(sk);
3732
3733 if (tp->tlp_high_seq)
3734 tcp_process_tlp_ack(sk, ack, flag);
3735 return 1;
3736
3737old_ack:
3738
3739
3740
3741 if (TCP_SKB_CB(skb)->sacked) {
3742 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3743 &sack_state);
3744 tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
3745 &rexmit);
3746 tcp_newly_delivered(sk, delivered, flag);
3747 tcp_xmit_recovery(sk, rexmit);
3748 }
3749
3750 return 0;
3751}
3752
3753static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
3754 bool syn, struct tcp_fastopen_cookie *foc,
3755 bool exp_opt)
3756{
3757
3758 if (!foc || !syn || len < 0 || (len & 1))
3759 return;
3760
3761 if (len >= TCP_FASTOPEN_COOKIE_MIN &&
3762 len <= TCP_FASTOPEN_COOKIE_MAX)
3763 memcpy(foc->val, cookie, len);
3764 else if (len != 0)
3765 len = -1;
3766 foc->len = len;
3767 foc->exp = exp_opt;
3768}
3769
3770static void smc_parse_options(const struct tcphdr *th,
3771 struct tcp_options_received *opt_rx,
3772 const unsigned char *ptr,
3773 int opsize)
3774{
3775#if IS_ENABLED(CONFIG_SMC)
3776 if (static_branch_unlikely(&tcp_have_smc)) {
3777 if (th->syn && !(opsize & 1) &&
3778 opsize >= TCPOLEN_EXP_SMC_BASE &&
3779 get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC)
3780 opt_rx->smc_ok = 1;
3781 }
3782#endif
3783}
3784
3785
3786
3787
3788
3789void tcp_parse_options(const struct net *net,
3790 const struct sk_buff *skb,
3791 struct tcp_options_received *opt_rx, int estab,
3792 struct tcp_fastopen_cookie *foc)
3793{
3794 const unsigned char *ptr;
3795 const struct tcphdr *th = tcp_hdr(skb);
3796 int length = (th->doff * 4) - sizeof(struct tcphdr);
3797
3798 ptr = (const unsigned char *)(th + 1);
3799 opt_rx->saw_tstamp = 0;
3800
3801 while (length > 0) {
3802 int opcode = *ptr++;
3803 int opsize;
3804
3805 switch (opcode) {
3806 case TCPOPT_EOL:
3807 return;
3808 case TCPOPT_NOP:
3809 length--;
3810 continue;
3811 default:
3812 if (length < 2)
3813 return;
3814 opsize = *ptr++;
3815 if (opsize < 2)
3816 return;
3817 if (opsize > length)
3818 return;
3819 switch (opcode) {
3820 case TCPOPT_MSS:
3821 if (opsize == TCPOLEN_MSS && th->syn && !estab) {
3822 u16 in_mss = get_unaligned_be16(ptr);
3823 if (in_mss) {
3824 if (opt_rx->user_mss &&
3825 opt_rx->user_mss < in_mss)
3826 in_mss = opt_rx->user_mss;
3827 opt_rx->mss_clamp = in_mss;
3828 }
3829 }
3830 break;
3831 case TCPOPT_WINDOW:
3832 if (opsize == TCPOLEN_WINDOW && th->syn &&
3833 !estab && net->ipv4.sysctl_tcp_window_scaling) {
3834 __u8 snd_wscale = *(__u8 *)ptr;
3835 opt_rx->wscale_ok = 1;
3836 if (snd_wscale > TCP_MAX_WSCALE) {
3837 net_info_ratelimited("%s: Illegal window scaling value %d > %u received\n",
3838 __func__,
3839 snd_wscale,
3840 TCP_MAX_WSCALE);
3841 snd_wscale = TCP_MAX_WSCALE;
3842 }
3843 opt_rx->snd_wscale = snd_wscale;
3844 }
3845 break;
3846 case TCPOPT_TIMESTAMP:
3847 if ((opsize == TCPOLEN_TIMESTAMP) &&
3848 ((estab && opt_rx->tstamp_ok) ||
3849 (!estab && net->ipv4.sysctl_tcp_timestamps))) {
3850 opt_rx->saw_tstamp = 1;
3851 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
3852 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
3853 }
3854 break;
3855 case TCPOPT_SACK_PERM:
3856 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3857 !estab && net->ipv4.sysctl_tcp_sack) {
3858 opt_rx->sack_ok = TCP_SACK_SEEN;
3859 tcp_sack_reset(opt_rx);
3860 }
3861 break;
3862
3863 case TCPOPT_SACK:
3864 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
3865 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
3866 opt_rx->sack_ok) {
3867 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
3868 }
3869 break;
3870#ifdef CONFIG_TCP_MD5SIG
3871 case TCPOPT_MD5SIG:
3872
3873
3874
3875
3876 break;
3877#endif
3878 case TCPOPT_FASTOPEN:
3879 tcp_parse_fastopen_option(
3880 opsize - TCPOLEN_FASTOPEN_BASE,
3881 ptr, th->syn, foc, false);
3882 break;
3883
3884 case TCPOPT_EXP:
3885
3886
3887
3888 if (opsize >= TCPOLEN_EXP_FASTOPEN_BASE &&
3889 get_unaligned_be16(ptr) ==
3890 TCPOPT_FASTOPEN_MAGIC)
3891 tcp_parse_fastopen_option(opsize -
3892 TCPOLEN_EXP_FASTOPEN_BASE,
3893 ptr + 2, th->syn, foc, true);
3894 else
3895 smc_parse_options(th, opt_rx, ptr,
3896 opsize);
3897 break;
3898
3899 }
3900 ptr += opsize-2;
3901 length -= opsize;
3902 }
3903 }
3904}
3905EXPORT_SYMBOL(tcp_parse_options);
3906
3907static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
3908{
3909 const __be32 *ptr = (const __be32 *)(th + 1);
3910
3911 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
3912 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
3913 tp->rx_opt.saw_tstamp = 1;
3914 ++ptr;
3915 tp->rx_opt.rcv_tsval = ntohl(*ptr);
3916 ++ptr;
3917 if (*ptr)
3918 tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
3919 else
3920 tp->rx_opt.rcv_tsecr = 0;
3921 return true;
3922 }
3923 return false;
3924}
3925
3926
3927
3928
3929static bool tcp_fast_parse_options(const struct net *net,
3930 const struct sk_buff *skb,
3931 const struct tcphdr *th, struct tcp_sock *tp)
3932{
3933
3934
3935
3936 if (th->doff == (sizeof(*th) / 4)) {
3937 tp->rx_opt.saw_tstamp = 0;
3938 return false;
3939 } else if (tp->rx_opt.tstamp_ok &&
3940 th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
3941 if (tcp_parse_aligned_timestamp(tp, th))
3942 return true;
3943 }
3944
3945 tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL);
3946 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
3947 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
3948
3949 return true;
3950}
3951
3952#ifdef CONFIG_TCP_MD5SIG
3953
3954
3955
3956const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
3957{
3958 int length = (th->doff << 2) - sizeof(*th);
3959 const u8 *ptr = (const u8 *)(th + 1);
3960
3961
3962 while (length >= TCPOLEN_MD5SIG) {
3963 int opcode = *ptr++;
3964 int opsize;
3965
3966 switch (opcode) {
3967 case TCPOPT_EOL:
3968 return NULL;
3969 case TCPOPT_NOP:
3970 length--;
3971 continue;
3972 default:
3973 opsize = *ptr++;
3974 if (opsize < 2 || opsize > length)
3975 return NULL;
3976 if (opcode == TCPOPT_MD5SIG)
3977 return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
3978 }
3979 ptr += opsize - 2;
3980 length -= opsize;
3981 }
3982 return NULL;
3983}
3984EXPORT_SYMBOL(tcp_parse_md5sig_option);
3985#endif
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
4011{
4012 const struct tcp_sock *tp = tcp_sk(sk);
4013 const struct tcphdr *th = tcp_hdr(skb);
4014 u32 seq = TCP_SKB_CB(skb)->seq;
4015 u32 ack = TCP_SKB_CB(skb)->ack_seq;
4016
4017 return (
4018 (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
4019
4020
4021 ack == tp->snd_una &&
4022
4023
4024 !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
4025
4026
4027 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
4028}
4029
4030static inline bool tcp_paws_discard(const struct sock *sk,
4031 const struct sk_buff *skb)
4032{
4033 const struct tcp_sock *tp = tcp_sk(sk);
4034
4035 return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
4036 !tcp_disordered_ack(sk, skb);
4037}
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
4053{
4054 return !before(end_seq, tp->rcv_wup) &&
4055 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
4056}
4057
4058
4059void tcp_reset(struct sock *sk)
4060{
4061 trace_tcp_receive_reset(sk);
4062
4063
4064 switch (sk->sk_state) {
4065 case TCP_SYN_SENT:
4066 sk->sk_err = ECONNREFUSED;
4067 break;
4068 case TCP_CLOSE_WAIT:
4069 sk->sk_err = EPIPE;
4070 break;
4071 case TCP_CLOSE:
4072 return;
4073 default:
4074 sk->sk_err = ECONNRESET;
4075 }
4076
4077 smp_wmb();
4078
4079 tcp_write_queue_purge(sk);
4080 tcp_done(sk);
4081
4082 if (!sock_flag(sk, SOCK_DEAD))
4083 sk->sk_error_report(sk);
4084}
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100void tcp_fin(struct sock *sk)
4101{
4102 struct tcp_sock *tp = tcp_sk(sk);
4103
4104 inet_csk_schedule_ack(sk);
4105
4106 sk->sk_shutdown |= RCV_SHUTDOWN;
4107 sock_set_flag(sk, SOCK_DONE);
4108
4109 switch (sk->sk_state) {
4110 case TCP_SYN_RECV:
4111 case TCP_ESTABLISHED:
4112
4113 tcp_set_state(sk, TCP_CLOSE_WAIT);
4114 inet_csk_enter_pingpong_mode(sk);
4115 break;
4116
4117 case TCP_CLOSE_WAIT:
4118 case TCP_CLOSING:
4119
4120
4121
4122 break;
4123 case TCP_LAST_ACK:
4124
4125 break;
4126
4127 case TCP_FIN_WAIT1:
4128
4129
4130
4131
4132 tcp_send_ack(sk);
4133 tcp_set_state(sk, TCP_CLOSING);
4134 break;
4135 case TCP_FIN_WAIT2:
4136
4137 tcp_send_ack(sk);
4138 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
4139 break;
4140 default:
4141
4142
4143
4144 pr_err("%s: Impossible, sk->sk_state=%d\n",
4145 __func__, sk->sk_state);
4146 break;
4147 }
4148
4149
4150
4151
4152 skb_rbtree_purge(&tp->out_of_order_queue);
4153 if (tcp_is_sack(tp))
4154 tcp_sack_reset(&tp->rx_opt);
4155 sk_mem_reclaim(sk);
4156
4157 if (!sock_flag(sk, SOCK_DEAD)) {
4158 sk->sk_state_change(sk);
4159
4160
4161 if (sk->sk_shutdown == SHUTDOWN_MASK ||
4162 sk->sk_state == TCP_CLOSE)
4163 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
4164 else
4165 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
4166 }
4167}
4168
4169static inline bool tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
4170 u32 end_seq)
4171{
4172 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
4173 if (before(seq, sp->start_seq))
4174 sp->start_seq = seq;
4175 if (after(end_seq, sp->end_seq))
4176 sp->end_seq = end_seq;
4177 return true;
4178 }
4179 return false;
4180}
4181
4182static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4183{
4184 struct tcp_sock *tp = tcp_sk(sk);
4185
4186 if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
4187 int mib_idx;
4188
4189 if (before(seq, tp->rcv_nxt))
4190 mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
4191 else
4192 mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
4193
4194 NET_INC_STATS(sock_net(sk), mib_idx);
4195
4196 tp->rx_opt.dsack = 1;
4197 tp->duplicate_sack[0].start_seq = seq;
4198 tp->duplicate_sack[0].end_seq = end_seq;
4199 }
4200}
4201
4202static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
4203{
4204 struct tcp_sock *tp = tcp_sk(sk);
4205
4206 if (!tp->rx_opt.dsack)
4207 tcp_dsack_set(sk, seq, end_seq);
4208 else
4209 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
4210}
4211
4212static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
4213{
4214
4215
4216
4217
4218
4219 if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq)
4220 sk_rethink_txhash(sk);
4221}
4222
4223static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
4224{
4225 struct tcp_sock *tp = tcp_sk(sk);
4226
4227 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
4228 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4229 NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4230 tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
4231
4232 if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
4233 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4234
4235 tcp_rcv_spurious_retrans(sk, skb);
4236 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
4237 end_seq = tp->rcv_nxt;
4238 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
4239 }
4240 }
4241
4242 tcp_send_ack(sk);
4243}
4244
4245
4246
4247
4248static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
4249{
4250 int this_sack;
4251 struct tcp_sack_block *sp = &tp->selective_acks[0];
4252 struct tcp_sack_block *swalk = sp + 1;
4253
4254
4255
4256
4257 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
4258 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
4259 int i;
4260
4261
4262
4263
4264 tp->rx_opt.num_sacks--;
4265 for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
4266 sp[i] = sp[i + 1];
4267 continue;
4268 }
4269 this_sack++, swalk++;
4270 }
4271}
4272
4273static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
4274{
4275 struct tcp_sock *tp = tcp_sk(sk);
4276 struct tcp_sack_block *sp = &tp->selective_acks[0];
4277 int cur_sacks = tp->rx_opt.num_sacks;
4278 int this_sack;
4279
4280 if (!cur_sacks)
4281 goto new_sack;
4282
4283 for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
4284 if (tcp_sack_extend(sp, seq, end_seq)) {
4285
4286 for (; this_sack > 0; this_sack--, sp--)
4287 swap(*sp, *(sp - 1));
4288 if (cur_sacks > 1)
4289 tcp_sack_maybe_coalesce(tp);
4290 return;
4291 }
4292 }
4293
4294
4295
4296
4297
4298
4299
4300 if (this_sack >= TCP_NUM_SACKS) {
4301 if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
4302 tcp_send_ack(sk);
4303 this_sack--;
4304 tp->rx_opt.num_sacks--;
4305 sp--;
4306 }
4307 for (; this_sack > 0; this_sack--, sp--)
4308 *sp = *(sp - 1);
4309
4310new_sack:
4311
4312 sp->start_seq = seq;
4313 sp->end_seq = end_seq;
4314 tp->rx_opt.num_sacks++;
4315}
4316
4317
4318
4319static void tcp_sack_remove(struct tcp_sock *tp)
4320{
4321 struct tcp_sack_block *sp = &tp->selective_acks[0];
4322 int num_sacks = tp->rx_opt.num_sacks;
4323 int this_sack;
4324
4325
4326 if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
4327 tp->rx_opt.num_sacks = 0;
4328 return;
4329 }
4330
4331 for (this_sack = 0; this_sack < num_sacks;) {
4332
4333 if (!before(tp->rcv_nxt, sp->start_seq)) {
4334 int i;
4335
4336
4337 WARN_ON(before(tp->rcv_nxt, sp->end_seq));
4338
4339
4340 for (i = this_sack+1; i < num_sacks; i++)
4341 tp->selective_acks[i-1] = tp->selective_acks[i];
4342 num_sacks--;
4343 continue;
4344 }
4345 this_sack++;
4346 sp++;
4347 }
4348 tp->rx_opt.num_sacks = num_sacks;
4349}
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365static bool tcp_try_coalesce(struct sock *sk,
4366 struct sk_buff *to,
4367 struct sk_buff *from,
4368 bool *fragstolen)
4369{
4370 int delta;
4371
4372 *fragstolen = false;
4373
4374
4375 if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
4376 return false;
4377
4378#ifdef CONFIG_TLS_DEVICE
4379 if (from->decrypted != to->decrypted)
4380 return false;
4381#endif
4382
4383 if (!skb_try_coalesce(to, from, fragstolen, &delta))
4384 return false;
4385
4386 atomic_add(delta, &sk->sk_rmem_alloc);
4387 sk_mem_charge(sk, delta);
4388 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
4389 TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
4390 TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
4391 TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
4392
4393 if (TCP_SKB_CB(from)->has_rxtstamp) {
4394 TCP_SKB_CB(to)->has_rxtstamp = true;
4395 to->tstamp = from->tstamp;
4396 skb_hwtstamps(to)->hwtstamp = skb_hwtstamps(from)->hwtstamp;
4397 }
4398
4399 return true;
4400}
4401
4402static bool tcp_ooo_try_coalesce(struct sock *sk,
4403 struct sk_buff *to,
4404 struct sk_buff *from,
4405 bool *fragstolen)
4406{
4407 bool res = tcp_try_coalesce(sk, to, from, fragstolen);
4408
4409
4410 if (res) {
4411 u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) +
4412 max_t(u16, 1, skb_shinfo(from)->gso_segs);
4413
4414 skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
4415 }
4416 return res;
4417}
4418
4419static void tcp_drop(struct sock *sk, struct sk_buff *skb)
4420{
4421 sk_drops_add(sk, skb);
4422 __kfree_skb(skb);
4423}
4424
4425
4426
4427
4428static void tcp_ofo_queue(struct sock *sk)
4429{
4430 struct tcp_sock *tp = tcp_sk(sk);
4431 __u32 dsack_high = tp->rcv_nxt;
4432 bool fin, fragstolen, eaten;
4433 struct sk_buff *skb, *tail;
4434 struct rb_node *p;
4435
4436 p = rb_first(&tp->out_of_order_queue);
4437 while (p) {
4438 skb = rb_to_skb(p);
4439 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
4440 break;
4441
4442 if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
4443 __u32 dsack = dsack_high;
4444 if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
4445 dsack_high = TCP_SKB_CB(skb)->end_seq;
4446 tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
4447 }
4448 p = rb_next(p);
4449 rb_erase(&skb->rbnode, &tp->out_of_order_queue);
4450
4451 if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
4452 tcp_drop(sk, skb);
4453 continue;
4454 }
4455
4456 tail = skb_peek_tail(&sk->sk_receive_queue);
4457 eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
4458 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
4459 fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
4460 if (!eaten)
4461 __skb_queue_tail(&sk->sk_receive_queue, skb);
4462 else
4463 kfree_skb_partial(skb, fragstolen);
4464
4465 if (unlikely(fin)) {
4466 tcp_fin(sk);
4467
4468
4469
4470 break;
4471 }
4472 }
4473}
4474
4475static bool tcp_prune_ofo_queue(struct sock *sk);
4476static int tcp_prune_queue(struct sock *sk);
4477
4478static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
4479 unsigned int size)
4480{
4481 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
4482 !sk_rmem_schedule(sk, skb, size)) {
4483
4484 if (tcp_prune_queue(sk) < 0)
4485 return -1;
4486
4487 while (!sk_rmem_schedule(sk, skb, size)) {
4488 if (!tcp_prune_ofo_queue(sk))
4489 return -1;
4490 }
4491 }
4492 return 0;
4493}
4494
4495static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4496{
4497 struct tcp_sock *tp = tcp_sk(sk);
4498 struct rb_node **p, *parent;
4499 struct sk_buff *skb1;
4500 u32 seq, end_seq;
4501 bool fragstolen;
4502
4503 tcp_ecn_check_ce(sk, skb);
4504
4505 if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
4506 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
4507 tcp_drop(sk, skb);
4508 return;
4509 }
4510
4511
4512 tp->pred_flags = 0;
4513 inet_csk_schedule_ack(sk);
4514
4515 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
4516 seq = TCP_SKB_CB(skb)->seq;
4517 end_seq = TCP_SKB_CB(skb)->end_seq;
4518
4519 p = &tp->out_of_order_queue.rb_node;
4520 if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
4521
4522 if (tcp_is_sack(tp)) {
4523 tp->rx_opt.num_sacks = 1;
4524 tp->selective_acks[0].start_seq = seq;
4525 tp->selective_acks[0].end_seq = end_seq;
4526 }
4527 rb_link_node(&skb->rbnode, NULL, p);
4528 rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
4529 tp->ooo_last_skb = skb;
4530 goto end;
4531 }
4532
4533
4534
4535
4536 if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb,
4537 skb, &fragstolen)) {
4538coalesce_done:
4539 tcp_grow_window(sk, skb);
4540 kfree_skb_partial(skb, fragstolen);
4541 skb = NULL;
4542 goto add_sack;
4543 }
4544
4545 if (!before(seq, TCP_SKB_CB(tp->ooo_last_skb)->end_seq)) {
4546 parent = &tp->ooo_last_skb->rbnode;
4547 p = &parent->rb_right;
4548 goto insert;
4549 }
4550
4551
4552 parent = NULL;
4553 while (*p) {
4554 parent = *p;
4555 skb1 = rb_to_skb(parent);
4556 if (before(seq, TCP_SKB_CB(skb1)->seq)) {
4557 p = &parent->rb_left;
4558 continue;
4559 }
4560 if (before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4561 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4562
4563 NET_INC_STATS(sock_net(sk),
4564 LINUX_MIB_TCPOFOMERGE);
4565 tcp_drop(sk, skb);
4566 skb = NULL;
4567 tcp_dsack_set(sk, seq, end_seq);
4568 goto add_sack;
4569 }
4570 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4571
4572 tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq);
4573 } else {
4574
4575
4576
4577 rb_replace_node(&skb1->rbnode, &skb->rbnode,
4578 &tp->out_of_order_queue);
4579 tcp_dsack_extend(sk,
4580 TCP_SKB_CB(skb1)->seq,
4581 TCP_SKB_CB(skb1)->end_seq);
4582 NET_INC_STATS(sock_net(sk),
4583 LINUX_MIB_TCPOFOMERGE);
4584 tcp_drop(sk, skb1);
4585 goto merge_right;
4586 }
4587 } else if (tcp_ooo_try_coalesce(sk, skb1,
4588 skb, &fragstolen)) {
4589 goto coalesce_done;
4590 }
4591 p = &parent->rb_right;
4592 }
4593insert:
4594
4595 rb_link_node(&skb->rbnode, parent, p);
4596 rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
4597
4598merge_right:
4599
4600 while ((skb1 = skb_rb_next(skb)) != NULL) {
4601 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4602 break;
4603 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4604 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4605 end_seq);
4606 break;
4607 }
4608 rb_erase(&skb1->rbnode, &tp->out_of_order_queue);
4609 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4610 TCP_SKB_CB(skb1)->end_seq);
4611 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
4612 tcp_drop(sk, skb1);
4613 }
4614
4615 if (!skb1)
4616 tp->ooo_last_skb = skb;
4617
4618add_sack:
4619 if (tcp_is_sack(tp))
4620 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4621end:
4622 if (skb) {
4623 tcp_grow_window(sk, skb);
4624 skb_condense(skb);
4625 skb_set_owner_r(skb, sk);
4626 }
4627}
4628
4629static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
4630 bool *fragstolen)
4631{
4632 int eaten;
4633 struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
4634
4635 eaten = (tail &&
4636 tcp_try_coalesce(sk, tail,
4637 skb, fragstolen)) ? 1 : 0;
4638 tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
4639 if (!eaten) {
4640 __skb_queue_tail(&sk->sk_receive_queue, skb);
4641 skb_set_owner_r(skb, sk);
4642 }
4643 return eaten;
4644}
4645
4646int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
4647{
4648 struct sk_buff *skb;
4649 int err = -ENOMEM;
4650 int data_len = 0;
4651 bool fragstolen;
4652
4653 if (size == 0)
4654 return 0;
4655
4656 if (size > PAGE_SIZE) {
4657 int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS);
4658
4659 data_len = npages << PAGE_SHIFT;
4660 size = data_len + (size & ~PAGE_MASK);
4661 }
4662 skb = alloc_skb_with_frags(size - data_len, data_len,
4663 PAGE_ALLOC_COSTLY_ORDER,
4664 &err, sk->sk_allocation);
4665 if (!skb)
4666 goto err;
4667
4668 skb_put(skb, size - data_len);
4669 skb->data_len = data_len;
4670 skb->len = size;
4671
4672 if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
4673 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
4674 goto err_free;
4675 }
4676
4677 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
4678 if (err)
4679 goto err_free;
4680
4681 TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
4682 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
4683 TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
4684
4685 if (tcp_queue_rcv(sk, skb, &fragstolen)) {
4686 WARN_ON_ONCE(fragstolen);
4687 __kfree_skb(skb);
4688 }
4689 return size;
4690
4691err_free:
4692 kfree_skb(skb);
4693err:
4694 return err;
4695
4696}
4697
4698void tcp_data_ready(struct sock *sk)
4699{
4700 const struct tcp_sock *tp = tcp_sk(sk);
4701 int avail = tp->rcv_nxt - tp->copied_seq;
4702
4703 if (avail < sk->sk_rcvlowat && !sock_flag(sk, SOCK_DONE))
4704 return;
4705
4706 sk->sk_data_ready(sk);
4707}
4708
4709static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4710{
4711 struct tcp_sock *tp = tcp_sk(sk);
4712 bool fragstolen;
4713 int eaten;
4714
4715 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
4716 __kfree_skb(skb);
4717 return;
4718 }
4719 skb_dst_drop(skb);
4720 __skb_pull(skb, tcp_hdr(skb)->doff * 4);
4721
4722 tcp_ecn_accept_cwr(sk, skb);
4723
4724 tp->rx_opt.dsack = 0;
4725
4726
4727
4728
4729
4730 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
4731 if (tcp_receive_window(tp) == 0) {
4732 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
4733 goto out_of_window;
4734 }
4735
4736
4737queue_and_out:
4738 if (skb_queue_len(&sk->sk_receive_queue) == 0)
4739 sk_forced_mem_schedule(sk, skb->truesize);
4740 else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
4741 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
4742 goto drop;
4743 }
4744
4745 eaten = tcp_queue_rcv(sk, skb, &fragstolen);
4746 if (skb->len)
4747 tcp_event_data_recv(sk, skb);
4748 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
4749 tcp_fin(sk);
4750
4751 if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
4752 tcp_ofo_queue(sk);
4753
4754
4755
4756
4757 if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
4758 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
4759 }
4760
4761 if (tp->rx_opt.num_sacks)
4762 tcp_sack_remove(tp);
4763
4764 tcp_fast_path_check(sk);
4765
4766 if (eaten > 0)
4767 kfree_skb_partial(skb, fragstolen);
4768 if (!sock_flag(sk, SOCK_DEAD))
4769 tcp_data_ready(sk);
4770 return;
4771 }
4772
4773 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4774 tcp_rcv_spurious_retrans(sk, skb);
4775
4776 NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4777 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4778
4779out_of_window:
4780 tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
4781 inet_csk_schedule_ack(sk);
4782drop:
4783 tcp_drop(sk, skb);
4784 return;
4785 }
4786
4787
4788 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
4789 goto out_of_window;
4790
4791 if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4792
4793 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
4794
4795
4796
4797
4798 if (!tcp_receive_window(tp)) {
4799 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
4800 goto out_of_window;
4801 }
4802 goto queue_and_out;
4803 }
4804
4805 tcp_data_queue_ofo(sk, skb);
4806}
4807
4808static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *list)
4809{
4810 if (list)
4811 return !skb_queue_is_last(list, skb) ? skb->next : NULL;
4812
4813 return skb_rb_next(skb);
4814}
4815
4816static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
4817 struct sk_buff_head *list,
4818 struct rb_root *root)
4819{
4820 struct sk_buff *next = tcp_skb_next(skb, list);
4821
4822 if (list)
4823 __skb_unlink(skb, list);
4824 else
4825 rb_erase(&skb->rbnode, root);
4826
4827 __kfree_skb(skb);
4828 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
4829
4830 return next;
4831}
4832
4833
4834void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
4835{
4836 struct rb_node **p = &root->rb_node;
4837 struct rb_node *parent = NULL;
4838 struct sk_buff *skb1;
4839
4840 while (*p) {
4841 parent = *p;
4842 skb1 = rb_to_skb(parent);
4843 if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
4844 p = &parent->rb_left;
4845 else
4846 p = &parent->rb_right;
4847 }
4848 rb_link_node(&skb->rbnode, parent, p);
4849 rb_insert_color(&skb->rbnode, root);
4850}
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860static void
4861tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
4862 struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end)
4863{
4864 struct sk_buff *skb = head, *n;
4865 struct sk_buff_head tmp;
4866 bool end_of_skbs;
4867
4868
4869
4870
4871restart:
4872 for (end_of_skbs = true; skb != NULL && skb != tail; skb = n) {
4873 n = tcp_skb_next(skb, list);
4874
4875
4876 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4877 skb = tcp_collapse_one(sk, skb, list, root);
4878 if (!skb)
4879 break;
4880 goto restart;
4881 }
4882
4883
4884
4885
4886
4887
4888 if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
4889 (tcp_win_from_space(sk, skb->truesize) > skb->len ||
4890 before(TCP_SKB_CB(skb)->seq, start))) {
4891 end_of_skbs = false;
4892 break;
4893 }
4894
4895 if (n && n != tail &&
4896 TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) {
4897 end_of_skbs = false;
4898 break;
4899 }
4900
4901
4902 start = TCP_SKB_CB(skb)->end_seq;
4903 }
4904 if (end_of_skbs ||
4905 (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
4906 return;
4907
4908 __skb_queue_head_init(&tmp);
4909
4910 while (before(start, end)) {
4911 int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start);
4912 struct sk_buff *nskb;
4913
4914 nskb = alloc_skb(copy, GFP_ATOMIC);
4915 if (!nskb)
4916 break;
4917
4918 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
4919#ifdef CONFIG_TLS_DEVICE
4920 nskb->decrypted = skb->decrypted;
4921#endif
4922 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
4923 if (list)
4924 __skb_queue_before(list, skb, nskb);
4925 else
4926 __skb_queue_tail(&tmp, nskb);
4927 skb_set_owner_r(nskb, sk);
4928
4929
4930 while (copy > 0) {
4931 int offset = start - TCP_SKB_CB(skb)->seq;
4932 int size = TCP_SKB_CB(skb)->end_seq - start;
4933
4934 BUG_ON(offset < 0);
4935 if (size > 0) {
4936 size = min(copy, size);
4937 if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
4938 BUG();
4939 TCP_SKB_CB(nskb)->end_seq += size;
4940 copy -= size;
4941 start += size;
4942 }
4943 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4944 skb = tcp_collapse_one(sk, skb, list, root);
4945 if (!skb ||
4946 skb == tail ||
4947 (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
4948 goto end;
4949#ifdef CONFIG_TLS_DEVICE
4950 if (skb->decrypted != nskb->decrypted)
4951 goto end;
4952#endif
4953 }
4954 }
4955 }
4956end:
4957 skb_queue_walk_safe(&tmp, skb, n)
4958 tcp_rbtree_insert(root, skb);
4959}
4960
4961
4962
4963
4964static void tcp_collapse_ofo_queue(struct sock *sk)
4965{
4966 struct tcp_sock *tp = tcp_sk(sk);
4967 u32 range_truesize, sum_tiny = 0;
4968 struct sk_buff *skb, *head;
4969 u32 start, end;
4970
4971 skb = skb_rb_first(&tp->out_of_order_queue);
4972new_range:
4973 if (!skb) {
4974 tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue);
4975 return;
4976 }
4977 start = TCP_SKB_CB(skb)->seq;
4978 end = TCP_SKB_CB(skb)->end_seq;
4979 range_truesize = skb->truesize;
4980
4981 for (head = skb;;) {
4982 skb = skb_rb_next(skb);
4983
4984
4985
4986
4987 if (!skb ||
4988 after(TCP_SKB_CB(skb)->seq, end) ||
4989 before(TCP_SKB_CB(skb)->end_seq, start)) {
4990
4991 if (range_truesize != head->truesize ||
4992 end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) {
4993 tcp_collapse(sk, NULL, &tp->out_of_order_queue,
4994 head, skb, start, end);
4995 } else {
4996 sum_tiny += range_truesize;
4997 if (sum_tiny > sk->sk_rcvbuf >> 3)
4998 return;
4999 }
5000 goto new_range;
5001 }
5002
5003 range_truesize += skb->truesize;
5004 if (unlikely(before(TCP_SKB_CB(skb)->seq, start)))
5005 start = TCP_SKB_CB(skb)->seq;
5006 if (after(TCP_SKB_CB(skb)->end_seq, end))
5007 end = TCP_SKB_CB(skb)->end_seq;
5008 }
5009}
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022static bool tcp_prune_ofo_queue(struct sock *sk)
5023{
5024 struct tcp_sock *tp = tcp_sk(sk);
5025 struct rb_node *node, *prev;
5026 int goal;
5027
5028 if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
5029 return false;
5030
5031 NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
5032 goal = sk->sk_rcvbuf >> 3;
5033 node = &tp->ooo_last_skb->rbnode;
5034 do {
5035 prev = rb_prev(node);
5036 rb_erase(node, &tp->out_of_order_queue);
5037 goal -= rb_to_skb(node)->truesize;
5038 tcp_drop(sk, rb_to_skb(node));
5039 if (!prev || goal <= 0) {
5040 sk_mem_reclaim(sk);
5041 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
5042 !tcp_under_memory_pressure(sk))
5043 break;
5044 goal = sk->sk_rcvbuf >> 3;
5045 }
5046 node = prev;
5047 } while (node);
5048 tp->ooo_last_skb = rb_to_skb(prev);
5049
5050
5051
5052
5053
5054
5055 if (tp->rx_opt.sack_ok)
5056 tcp_sack_reset(&tp->rx_opt);
5057 return true;
5058}
5059
5060
5061
5062
5063
5064
5065
5066
5067static int tcp_prune_queue(struct sock *sk)
5068{
5069 struct tcp_sock *tp = tcp_sk(sk);
5070
5071 NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
5072
5073 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
5074 tcp_clamp_window(sk);
5075 else if (tcp_under_memory_pressure(sk))
5076 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
5077
5078 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
5079 return 0;
5080
5081 tcp_collapse_ofo_queue(sk);
5082 if (!skb_queue_empty(&sk->sk_receive_queue))
5083 tcp_collapse(sk, &sk->sk_receive_queue, NULL,
5084 skb_peek(&sk->sk_receive_queue),
5085 NULL,
5086 tp->copied_seq, tp->rcv_nxt);
5087 sk_mem_reclaim(sk);
5088
5089 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
5090 return 0;
5091
5092
5093
5094
5095 tcp_prune_ofo_queue(sk);
5096
5097 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
5098 return 0;
5099
5100
5101
5102
5103
5104 NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED);
5105
5106
5107 tp->pred_flags = 0;
5108 return -1;
5109}
5110
5111static bool tcp_should_expand_sndbuf(const struct sock *sk)
5112{
5113 const struct tcp_sock *tp = tcp_sk(sk);
5114
5115
5116
5117
5118 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
5119 return false;
5120
5121
5122 if (tcp_under_memory_pressure(sk))
5123 return false;
5124
5125
5126 if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
5127 return false;
5128
5129
5130 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
5131 return false;
5132
5133 return true;
5134}
5135
5136
5137
5138
5139
5140
5141
5142static void tcp_new_space(struct sock *sk)
5143{
5144 struct tcp_sock *tp = tcp_sk(sk);
5145
5146 if (tcp_should_expand_sndbuf(sk)) {
5147 tcp_sndbuf_expand(sk);
5148 tp->snd_cwnd_stamp = tcp_jiffies32;
5149 }
5150
5151 sk->sk_write_space(sk);
5152}
5153
5154static void tcp_check_space(struct sock *sk)
5155{
5156 if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
5157 sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
5158
5159 smp_mb();
5160 if (sk->sk_socket &&
5161 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
5162 tcp_new_space(sk);
5163 if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
5164 tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
5165 }
5166 }
5167}
5168
5169static inline void tcp_data_snd_check(struct sock *sk)
5170{
5171 tcp_push_pending_frames(sk);
5172 tcp_check_space(sk);
5173}
5174
5175
5176
5177
5178static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
5179{
5180 struct tcp_sock *tp = tcp_sk(sk);
5181 unsigned long rtt, delay;
5182
5183
5184 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
5185
5186
5187
5188
5189
5190 (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
5191 __tcp_select_window(sk) >= tp->rcv_wnd)) ||
5192
5193 tcp_in_quickack_mode(sk) ||
5194
5195 inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOW) {
5196send_now:
5197 tcp_send_ack(sk);
5198 return;
5199 }
5200
5201 if (!ofo_possible || RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
5202 tcp_send_delayed_ack(sk);
5203 return;
5204 }
5205
5206 if (!tcp_is_sack(tp) ||
5207 tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
5208 goto send_now;
5209
5210 if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
5211 tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
5212 if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
5213 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
5214 tp->compressed_ack - TCP_FASTRETRANS_THRESH);
5215 tp->compressed_ack = 0;
5216 }
5217
5218 if (++tp->compressed_ack <= TCP_FASTRETRANS_THRESH)
5219 goto send_now;
5220
5221 if (hrtimer_is_queued(&tp->compressed_ack_timer))
5222 return;
5223
5224
5225
5226 rtt = tp->rcv_rtt_est.rtt_us;
5227 if (tp->srtt_us && tp->srtt_us < rtt)
5228 rtt = tp->srtt_us;
5229
5230 delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
5231 rtt * (NSEC_PER_USEC >> 3)/20);
5232 sock_hold(sk);
5233 hrtimer_start(&tp->compressed_ack_timer, ns_to_ktime(delay),
5234 HRTIMER_MODE_REL_PINNED_SOFT);
5235}
5236
5237static inline void tcp_ack_snd_check(struct sock *sk)
5238{
5239 if (!inet_csk_ack_scheduled(sk)) {
5240
5241 return;
5242 }
5243 __tcp_ack_snd_check(sk, 1);
5244}
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
5257{
5258 struct tcp_sock *tp = tcp_sk(sk);
5259 u32 ptr = ntohs(th->urg_ptr);
5260
5261 if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
5262 ptr--;
5263 ptr += ntohl(th->seq);
5264
5265
5266 if (after(tp->copied_seq, ptr))
5267 return;
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279 if (before(ptr, tp->rcv_nxt))
5280 return;
5281
5282
5283 if (tp->urg_data && !after(ptr, tp->urg_seq))
5284 return;
5285
5286
5287 sk_send_sigurg(sk);
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
5305 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
5306 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
5307 tp->copied_seq++;
5308 if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
5309 __skb_unlink(skb, &sk->sk_receive_queue);
5310 __kfree_skb(skb);
5311 }
5312 }
5313
5314 tp->urg_data = TCP_URG_NOTYET;
5315 tp->urg_seq = ptr;
5316
5317
5318 tp->pred_flags = 0;
5319}
5320
5321
5322static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
5323{
5324 struct tcp_sock *tp = tcp_sk(sk);
5325
5326
5327 if (th->urg)
5328 tcp_check_urg(sk, th);
5329
5330
5331 if (tp->urg_data == TCP_URG_NOTYET) {
5332 u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
5333 th->syn;
5334
5335
5336 if (ptr < skb->len) {
5337 u8 tmp;
5338 if (skb_copy_bits(skb, ptr, &tmp, 1))
5339 BUG();
5340 tp->urg_data = TCP_URG_VALID | tmp;
5341 if (!sock_flag(sk, SOCK_DEAD))
5342 sk->sk_data_ready(sk);
5343 }
5344 }
5345}
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355static bool tcp_reset_check(const struct sock *sk, const struct sk_buff *skb)
5356{
5357 struct tcp_sock *tp = tcp_sk(sk);
5358
5359 return unlikely(TCP_SKB_CB(skb)->seq == (tp->rcv_nxt - 1) &&
5360 (1 << sk->sk_state) & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK |
5361 TCPF_CLOSING));
5362}
5363
5364
5365
5366
5367static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5368 const struct tcphdr *th, int syn_inerr)
5369{
5370 struct tcp_sock *tp = tcp_sk(sk);
5371 bool rst_seq_match = false;
5372
5373
5374 if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) &&
5375 tp->rx_opt.saw_tstamp &&
5376 tcp_paws_discard(sk, skb)) {
5377 if (!th->rst) {
5378 NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
5379 if (!tcp_oow_rate_limited(sock_net(sk), skb,
5380 LINUX_MIB_TCPACKSKIPPEDPAWS,
5381 &tp->last_oow_ack_time))
5382 tcp_send_dupack(sk, skb);
5383 goto discard;
5384 }
5385
5386 }
5387
5388
5389 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
5390
5391
5392
5393
5394
5395
5396 if (!th->rst) {
5397 if (th->syn)
5398 goto syn_challenge;
5399 if (!tcp_oow_rate_limited(sock_net(sk), skb,
5400 LINUX_MIB_TCPACKSKIPPEDSEQ,
5401 &tp->last_oow_ack_time))
5402 tcp_send_dupack(sk, skb);
5403 } else if (tcp_reset_check(sk, skb)) {
5404 tcp_reset(sk);
5405 }
5406 goto discard;
5407 }
5408
5409
5410 if (th->rst) {
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt ||
5421 tcp_reset_check(sk, skb)) {
5422 rst_seq_match = true;
5423 } else if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
5424 struct tcp_sack_block *sp = &tp->selective_acks[0];
5425 int max_sack = sp[0].end_seq;
5426 int this_sack;
5427
5428 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;
5429 ++this_sack) {
5430 max_sack = after(sp[this_sack].end_seq,
5431 max_sack) ?
5432 sp[this_sack].end_seq : max_sack;
5433 }
5434
5435 if (TCP_SKB_CB(skb)->seq == max_sack)
5436 rst_seq_match = true;
5437 }
5438
5439 if (rst_seq_match)
5440 tcp_reset(sk);
5441 else {
5442
5443
5444
5445
5446 if (tp->syn_fastopen && !tp->data_segs_in &&
5447 sk->sk_state == TCP_ESTABLISHED)
5448 tcp_fastopen_active_disable(sk);
5449 tcp_send_challenge_ack(sk, skb);
5450 }
5451 goto discard;
5452 }
5453
5454
5455
5456
5457
5458
5459 if (th->syn) {
5460syn_challenge:
5461 if (syn_inerr)
5462 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
5463 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
5464 tcp_send_challenge_ack(sk, skb);
5465 goto discard;
5466 }
5467
5468 return true;
5469
5470discard:
5471 tcp_drop(sk, skb);
5472 return false;
5473}
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
5499{
5500 const struct tcphdr *th = (const struct tcphdr *)skb->data;
5501 struct tcp_sock *tp = tcp_sk(sk);
5502 unsigned int len = skb->len;
5503
5504
5505 trace_tcp_probe(sk, skb);
5506
5507 tcp_mstamp_refresh(tp);
5508 if (unlikely(!sk->sk_rx_dst))
5509 inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525 tp->rx_opt.saw_tstamp = 0;
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
5537 TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
5538 !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
5539 int tcp_header_len = tp->tcp_header_len;
5540
5541
5542
5543
5544
5545
5546
5547 if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
5548
5549 if (!tcp_parse_aligned_timestamp(tp, th))
5550 goto slow_path;
5551
5552
5553 if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
5554 goto slow_path;
5555
5556
5557
5558
5559
5560
5561 }
5562
5563 if (len <= tcp_header_len) {
5564
5565 if (len == tcp_header_len) {
5566
5567
5568
5569
5570 if (tcp_header_len ==
5571 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5572 tp->rcv_nxt == tp->rcv_wup)
5573 tcp_store_ts_recent(tp);
5574
5575
5576
5577
5578 tcp_ack(sk, skb, 0);
5579 __kfree_skb(skb);
5580 tcp_data_snd_check(sk);
5581
5582
5583
5584
5585 tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
5586 return;
5587 } else {
5588 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
5589 goto discard;
5590 }
5591 } else {
5592 int eaten = 0;
5593 bool fragstolen = false;
5594
5595 if (tcp_checksum_complete(skb))
5596 goto csum_error;
5597
5598 if ((int)skb->truesize > sk->sk_forward_alloc)
5599 goto step5;
5600
5601
5602
5603
5604
5605 if (tcp_header_len ==
5606 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5607 tp->rcv_nxt == tp->rcv_wup)
5608 tcp_store_ts_recent(tp);
5609
5610 tcp_rcv_rtt_measure_ts(sk, skb);
5611
5612 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
5613
5614
5615 __skb_pull(skb, tcp_header_len);
5616 eaten = tcp_queue_rcv(sk, skb, &fragstolen);
5617
5618 tcp_event_data_recv(sk, skb);
5619
5620 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
5621
5622 tcp_ack(sk, skb, FLAG_DATA);
5623 tcp_data_snd_check(sk);
5624 if (!inet_csk_ack_scheduled(sk))
5625 goto no_ack;
5626 }
5627
5628 __tcp_ack_snd_check(sk, 0);
5629no_ack:
5630 if (eaten)
5631 kfree_skb_partial(skb, fragstolen);
5632 tcp_data_ready(sk);
5633 return;
5634 }
5635 }
5636
5637slow_path:
5638 if (len < (th->doff << 2) || tcp_checksum_complete(skb))
5639 goto csum_error;
5640
5641 if (!th->ack && !th->rst && !th->syn)
5642 goto discard;
5643
5644
5645
5646
5647
5648 if (!tcp_validate_incoming(sk, skb, th, 1))
5649 return;
5650
5651step5:
5652 if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
5653 goto discard;
5654
5655 tcp_rcv_rtt_measure_ts(sk, skb);
5656
5657
5658 tcp_urg(sk, skb, th);
5659
5660
5661 tcp_data_queue(sk, skb);
5662
5663 tcp_data_snd_check(sk);
5664 tcp_ack_snd_check(sk);
5665 return;
5666
5667csum_error:
5668 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
5669 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
5670
5671discard:
5672 tcp_drop(sk, skb);
5673}
5674EXPORT_SYMBOL(tcp_rcv_established);
5675
5676void tcp_init_transfer(struct sock *sk, int bpf_op)
5677{
5678 struct inet_connection_sock *icsk = inet_csk(sk);
5679 struct tcp_sock *tp = tcp_sk(sk);
5680
5681 tcp_mtup_init(sk);
5682 icsk->icsk_af_ops->rebuild_header(sk);
5683 tcp_init_metrics(sk);
5684
5685
5686
5687
5688
5689
5690
5691 if (tp->total_retrans > 1 && tp->undo_marker)
5692 tp->snd_cwnd = 1;
5693 else
5694 tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
5695 tp->snd_cwnd_stamp = tcp_jiffies32;
5696
5697 tcp_call_bpf(sk, bpf_op, 0, NULL);
5698 tcp_init_congestion_control(sk);
5699 tcp_init_buffer_space(sk);
5700}
5701
5702void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
5703{
5704 struct tcp_sock *tp = tcp_sk(sk);
5705 struct inet_connection_sock *icsk = inet_csk(sk);
5706
5707 tcp_set_state(sk, TCP_ESTABLISHED);
5708 icsk->icsk_ack.lrcvtime = tcp_jiffies32;
5709
5710 if (skb) {
5711 icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
5712 security_inet_conn_established(sk, skb);
5713 sk_mark_napi_id(sk, skb);
5714 }
5715
5716 tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
5717
5718
5719
5720
5721 tp->lsndtime = tcp_jiffies32;
5722
5723 if (sock_flag(sk, SOCK_KEEPOPEN))
5724 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
5725
5726 if (!tp->rx_opt.snd_wscale)
5727 __tcp_fast_path_on(tp, tp->snd_wnd);
5728 else
5729 tp->pred_flags = 0;
5730}
5731
5732static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5733 struct tcp_fastopen_cookie *cookie)
5734{
5735 struct tcp_sock *tp = tcp_sk(sk);
5736 struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL;
5737 u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
5738 bool syn_drop = false;
5739
5740 if (mss == tp->rx_opt.user_mss) {
5741 struct tcp_options_received opt;
5742
5743
5744 tcp_clear_options(&opt);
5745 opt.user_mss = opt.mss_clamp = 0;
5746 tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL);
5747 mss = opt.mss_clamp;
5748 }
5749
5750 if (!tp->syn_fastopen) {
5751
5752 cookie->len = -1;
5753 } else if (tp->total_retrans) {
5754
5755
5756
5757
5758
5759 syn_drop = (cookie->len < 0 && data);
5760 } else if (cookie->len < 0 && !tp->syn_data) {
5761
5762
5763
5764
5765 try_exp = tp->syn_fastopen_exp ? 2 : 1;
5766 }
5767
5768 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
5769
5770 if (data) {
5771 skb_rbtree_walk_from(data) {
5772 if (__tcp_retransmit_skb(sk, data, 1))
5773 break;
5774 }
5775 tcp_rearm_rto(sk);
5776 NET_INC_STATS(sock_net(sk),
5777 LINUX_MIB_TCPFASTOPENACTIVEFAIL);
5778 return true;
5779 }
5780 tp->syn_data_acked = tp->syn_data;
5781 if (tp->syn_data_acked) {
5782 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
5783
5784 if (tp->delivered > 1)
5785 --tp->delivered;
5786 }
5787
5788 tcp_fastopen_add_skb(sk, synack);
5789
5790 return false;
5791}
5792
5793static void smc_check_reset_syn(struct tcp_sock *tp)
5794{
5795#if IS_ENABLED(CONFIG_SMC)
5796 if (static_branch_unlikely(&tcp_have_smc)) {
5797 if (tp->syn_smc && !tp->rx_opt.smc_ok)
5798 tp->syn_smc = 0;
5799 }
5800#endif
5801}
5802
5803static void tcp_try_undo_spurious_syn(struct sock *sk)
5804{
5805 struct tcp_sock *tp = tcp_sk(sk);
5806 u32 syn_stamp;
5807
5808
5809
5810
5811
5812 syn_stamp = tp->retrans_stamp;
5813 if (tp->undo_marker && syn_stamp && tp->rx_opt.saw_tstamp &&
5814 syn_stamp == tp->rx_opt.rcv_tsecr)
5815 tp->undo_marker = 0;
5816}
5817
5818static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5819 const struct tcphdr *th)
5820{
5821 struct inet_connection_sock *icsk = inet_csk(sk);
5822 struct tcp_sock *tp = tcp_sk(sk);
5823 struct tcp_fastopen_cookie foc = { .len = -1 };
5824 int saved_clamp = tp->rx_opt.mss_clamp;
5825 bool fastopen_fail;
5826
5827 tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc);
5828 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
5829 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
5830
5831 if (th->ack) {
5832
5833
5834
5835
5836
5837
5838
5839
5840 if (!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_una) ||
5841 after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt))
5842 goto reset_and_undo;
5843
5844 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
5845 !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
5846 tcp_time_stamp(tp))) {
5847 NET_INC_STATS(sock_net(sk),
5848 LINUX_MIB_PAWSACTIVEREJECTED);
5849 goto reset_and_undo;
5850 }
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860 if (th->rst) {
5861 tcp_reset(sk);
5862 goto discard;
5863 }
5864
5865
5866
5867
5868
5869
5870
5871
5872 if (!th->syn)
5873 goto discard_and_undo;
5874
5875
5876
5877
5878
5879
5880
5881
5882 tcp_ecn_rcv_synack(tp, th);
5883
5884 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5885 tcp_try_undo_spurious_syn(sk);
5886 tcp_ack(sk, skb, FLAG_SLOWPATH);
5887
5888
5889
5890
5891 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5892 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5893
5894
5895
5896
5897 tp->snd_wnd = ntohs(th->window);
5898
5899 if (!tp->rx_opt.wscale_ok) {
5900 tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
5901 tp->window_clamp = min(tp->window_clamp, 65535U);
5902 }
5903
5904 if (tp->rx_opt.saw_tstamp) {
5905 tp->rx_opt.tstamp_ok = 1;
5906 tp->tcp_header_len =
5907 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5908 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5909 tcp_store_ts_recent(tp);
5910 } else {
5911 tp->tcp_header_len = sizeof(struct tcphdr);
5912 }
5913
5914 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5915 tcp_initialize_rcv_mss(sk);
5916
5917
5918
5919
5920 tp->copied_seq = tp->rcv_nxt;
5921
5922 smc_check_reset_syn(tp);
5923
5924 smp_mb();
5925
5926 tcp_finish_connect(sk, skb);
5927
5928 fastopen_fail = (tp->syn_fastopen || tp->syn_data) &&
5929 tcp_rcv_fastopen_synack(sk, skb, &foc);
5930
5931 if (!sock_flag(sk, SOCK_DEAD)) {
5932 sk->sk_state_change(sk);
5933 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
5934 }
5935 if (fastopen_fail)
5936 return -1;
5937 if (sk->sk_write_pending ||
5938 icsk->icsk_accept_queue.rskq_defer_accept ||
5939 inet_csk_in_pingpong_mode(sk)) {
5940
5941
5942
5943
5944
5945
5946
5947 inet_csk_schedule_ack(sk);
5948 tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
5949 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
5950 TCP_DELACK_MAX, TCP_RTO_MAX);
5951
5952discard:
5953 tcp_drop(sk, skb);
5954 return 0;
5955 } else {
5956 tcp_send_ack(sk);
5957 }
5958 return -1;
5959 }
5960
5961
5962
5963 if (th->rst) {
5964
5965
5966
5967
5968
5969
5970 goto discard_and_undo;
5971 }
5972
5973
5974 if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
5975 tcp_paws_reject(&tp->rx_opt, 0))
5976 goto discard_and_undo;
5977
5978 if (th->syn) {
5979
5980
5981
5982
5983 tcp_set_state(sk, TCP_SYN_RECV);
5984
5985 if (tp->rx_opt.saw_tstamp) {
5986 tp->rx_opt.tstamp_ok = 1;
5987 tcp_store_ts_recent(tp);
5988 tp->tcp_header_len =
5989 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5990 } else {
5991 tp->tcp_header_len = sizeof(struct tcphdr);
5992 }
5993
5994 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5995 tp->copied_seq = tp->rcv_nxt;
5996 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5997
5998
5999
6000
6001 tp->snd_wnd = ntohs(th->window);
6002 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
6003 tp->max_window = tp->snd_wnd;
6004
6005 tcp_ecn_rcv_syn(tp, th);
6006
6007 tcp_mtup_init(sk);
6008 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
6009 tcp_initialize_rcv_mss(sk);
6010
6011 tcp_send_synack(sk);
6012#if 0
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024 return -1;
6025#else
6026 goto discard;
6027#endif
6028 }
6029
6030
6031
6032
6033discard_and_undo:
6034 tcp_clear_options(&tp->rx_opt);
6035 tp->rx_opt.mss_clamp = saved_clamp;
6036 goto discard;
6037
6038reset_and_undo:
6039 tcp_clear_options(&tp->rx_opt);
6040 tp->rx_opt.mss_clamp = saved_clamp;
6041 return 1;
6042}
6043
6044static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
6045{
6046 tcp_try_undo_loss(sk, false);
6047
6048
6049 tcp_sk(sk)->retrans_stamp = 0;
6050 inet_csk(sk)->icsk_retransmits = 0;
6051
6052
6053
6054
6055 reqsk_fastopen_remove(sk, tcp_sk(sk)->fastopen_rsk, false);
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065 tcp_rearm_rto(sk);
6066}
6067
6068
6069
6070
6071
6072
6073
6074
6075int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
6076{
6077 struct tcp_sock *tp = tcp_sk(sk);
6078 struct inet_connection_sock *icsk = inet_csk(sk);
6079 const struct tcphdr *th = tcp_hdr(skb);
6080 struct request_sock *req;
6081 int queued = 0;
6082 bool acceptable;
6083
6084 switch (sk->sk_state) {
6085 case TCP_CLOSE:
6086 goto discard;
6087
6088 case TCP_LISTEN:
6089 if (th->ack)
6090 return 1;
6091
6092 if (th->rst)
6093 goto discard;
6094
6095 if (th->syn) {
6096 if (th->fin)
6097 goto discard;
6098
6099
6100
6101 rcu_read_lock();
6102 local_bh_disable();
6103 acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
6104 local_bh_enable();
6105 rcu_read_unlock();
6106
6107 if (!acceptable)
6108 return 1;
6109 consume_skb(skb);
6110 return 0;
6111 }
6112 goto discard;
6113
6114 case TCP_SYN_SENT:
6115 tp->rx_opt.saw_tstamp = 0;
6116 tcp_mstamp_refresh(tp);
6117 queued = tcp_rcv_synsent_state_process(sk, skb, th);
6118 if (queued >= 0)
6119 return queued;
6120
6121
6122 tcp_urg(sk, skb, th);
6123 __kfree_skb(skb);
6124 tcp_data_snd_check(sk);
6125 return 0;
6126 }
6127
6128 tcp_mstamp_refresh(tp);
6129 tp->rx_opt.saw_tstamp = 0;
6130 req = tp->fastopen_rsk;
6131 if (req) {
6132 bool req_stolen;
6133
6134 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
6135 sk->sk_state != TCP_FIN_WAIT1);
6136
6137 if (!tcp_check_req(sk, skb, req, true, &req_stolen))
6138 goto discard;
6139 }
6140
6141 if (!th->ack && !th->rst && !th->syn)
6142 goto discard;
6143
6144 if (!tcp_validate_incoming(sk, skb, th, 0))
6145 return 0;
6146
6147
6148 acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
6149 FLAG_UPDATE_TS_RECENT |
6150 FLAG_NO_CHALLENGE_ACK) > 0;
6151
6152 if (!acceptable) {
6153 if (sk->sk_state == TCP_SYN_RECV)
6154 return 1;
6155 tcp_send_challenge_ack(sk, skb);
6156 goto discard;
6157 }
6158 switch (sk->sk_state) {
6159 case TCP_SYN_RECV:
6160 tp->delivered++;
6161 if (!tp->srtt_us)
6162 tcp_synack_rtt_meas(sk, req);
6163
6164 if (req) {
6165 tcp_rcv_synrecv_state_fastopen(sk);
6166 } else {
6167 tcp_try_undo_spurious_syn(sk);
6168 tp->retrans_stamp = 0;
6169 tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
6170 tp->copied_seq = tp->rcv_nxt;
6171 }
6172 smp_mb();
6173 tcp_set_state(sk, TCP_ESTABLISHED);
6174 sk->sk_state_change(sk);
6175
6176
6177
6178
6179
6180 if (sk->sk_socket)
6181 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
6182
6183 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
6184 tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
6185 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
6186
6187 if (tp->rx_opt.tstamp_ok)
6188 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
6189
6190 if (!inet_csk(sk)->icsk_ca_ops->cong_control)
6191 tcp_update_pacing_rate(sk);
6192
6193
6194 tp->lsndtime = tcp_jiffies32;
6195
6196 tcp_initialize_rcv_mss(sk);
6197 tcp_fast_path_on(tp);
6198 break;
6199
6200 case TCP_FIN_WAIT1: {
6201 int tmo;
6202
6203 if (req)
6204 tcp_rcv_synrecv_state_fastopen(sk);
6205
6206 if (tp->snd_una != tp->write_seq)
6207 break;
6208
6209 tcp_set_state(sk, TCP_FIN_WAIT2);
6210 sk->sk_shutdown |= SEND_SHUTDOWN;
6211
6212 sk_dst_confirm(sk);
6213
6214 if (!sock_flag(sk, SOCK_DEAD)) {
6215
6216 sk->sk_state_change(sk);
6217 break;
6218 }
6219
6220 if (tp->linger2 < 0) {
6221 tcp_done(sk);
6222 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
6223 return 1;
6224 }
6225 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
6226 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
6227
6228 if (tp->syn_fastopen && th->fin)
6229 tcp_fastopen_active_disable(sk);
6230 tcp_done(sk);
6231 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
6232 return 1;
6233 }
6234
6235 tmo = tcp_fin_time(sk);
6236 if (tmo > TCP_TIMEWAIT_LEN) {
6237 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
6238 } else if (th->fin || sock_owned_by_user(sk)) {
6239
6240
6241
6242
6243
6244
6245 inet_csk_reset_keepalive_timer(sk, tmo);
6246 } else {
6247 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
6248 goto discard;
6249 }
6250 break;
6251 }
6252
6253 case TCP_CLOSING:
6254 if (tp->snd_una == tp->write_seq) {
6255 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
6256 goto discard;
6257 }
6258 break;
6259
6260 case TCP_LAST_ACK:
6261 if (tp->snd_una == tp->write_seq) {
6262 tcp_update_metrics(sk);
6263 tcp_done(sk);
6264 goto discard;
6265 }
6266 break;
6267 }
6268
6269
6270 tcp_urg(sk, skb, th);
6271
6272
6273 switch (sk->sk_state) {
6274 case TCP_CLOSE_WAIT:
6275 case TCP_CLOSING:
6276 case TCP_LAST_ACK:
6277 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
6278 break;
6279
6280 case TCP_FIN_WAIT1:
6281 case TCP_FIN_WAIT2:
6282
6283
6284
6285
6286 if (sk->sk_shutdown & RCV_SHUTDOWN) {
6287 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
6288 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
6289 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
6290 tcp_reset(sk);
6291 return 1;
6292 }
6293 }
6294
6295 case TCP_ESTABLISHED:
6296 tcp_data_queue(sk, skb);
6297 queued = 1;
6298 break;
6299 }
6300
6301
6302 if (sk->sk_state != TCP_CLOSE) {
6303 tcp_data_snd_check(sk);
6304 tcp_ack_snd_check(sk);
6305 }
6306
6307 if (!queued) {
6308discard:
6309 tcp_drop(sk, skb);
6310 }
6311 return 0;
6312}
6313EXPORT_SYMBOL(tcp_rcv_state_process);
6314
6315static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
6316{
6317 struct inet_request_sock *ireq = inet_rsk(req);
6318
6319 if (family == AF_INET)
6320 net_dbg_ratelimited("drop open request from %pI4/%u\n",
6321 &ireq->ir_rmt_addr, port);
6322#if IS_ENABLED(CONFIG_IPV6)
6323 else if (family == AF_INET6)
6324 net_dbg_ratelimited("drop open request from %pI6/%u\n",
6325 &ireq->ir_v6_rmt_addr, port);
6326#endif
6327}
6328
6329
6330
6331
6332
6333
6334
6335
6336
6337
6338
6339
6340
6341
6342
6343
6344
6345
6346static void tcp_ecn_create_request(struct request_sock *req,
6347 const struct sk_buff *skb,
6348 const struct sock *listen_sk,
6349 const struct dst_entry *dst)
6350{
6351 const struct tcphdr *th = tcp_hdr(skb);
6352 const struct net *net = sock_net(listen_sk);
6353 bool th_ecn = th->ece && th->cwr;
6354 bool ect, ecn_ok;
6355 u32 ecn_ok_dst;
6356
6357 if (!th_ecn)
6358 return;
6359
6360 ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
6361 ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK);
6362 ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
6363
6364 if (((!ect || th->res1) && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
6365 (ecn_ok_dst & DST_FEATURE_ECN_CA) ||
6366 tcp_bpf_ca_needs_ecn((struct sock *)req))
6367 inet_rsk(req)->ecn_ok = 1;
6368}
6369
6370static void tcp_openreq_init(struct request_sock *req,
6371 const struct tcp_options_received *rx_opt,
6372 struct sk_buff *skb, const struct sock *sk)
6373{
6374 struct inet_request_sock *ireq = inet_rsk(req);
6375
6376 req->rsk_rcv_wnd = 0;
6377 req->cookie_ts = 0;
6378 tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
6379 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
6380 tcp_rsk(req)->snt_synack = 0;
6381 tcp_rsk(req)->last_oow_ack_time = 0;
6382 req->mss = rx_opt->mss_clamp;
6383 req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
6384 ireq->tstamp_ok = rx_opt->tstamp_ok;
6385 ireq->sack_ok = rx_opt->sack_ok;
6386 ireq->snd_wscale = rx_opt->snd_wscale;
6387 ireq->wscale_ok = rx_opt->wscale_ok;
6388 ireq->acked = 0;
6389 ireq->ecn_ok = 0;
6390 ireq->ir_rmt_port = tcp_hdr(skb)->source;
6391 ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
6392 ireq->ir_mark = inet_request_mark(sk, skb);
6393#if IS_ENABLED(CONFIG_SMC)
6394 ireq->smc_ok = rx_opt->smc_ok;
6395#endif
6396}
6397
6398struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
6399 struct sock *sk_listener,
6400 bool attach_listener)
6401{
6402 struct request_sock *req = reqsk_alloc(ops, sk_listener,
6403 attach_listener);
6404
6405 if (req) {
6406 struct inet_request_sock *ireq = inet_rsk(req);
6407
6408 ireq->ireq_opt = NULL;
6409#if IS_ENABLED(CONFIG_IPV6)
6410 ireq->pktopts = NULL;
6411#endif
6412 atomic64_set(&ireq->ir_cookie, 0);
6413 ireq->ireq_state = TCP_NEW_SYN_RECV;
6414 write_pnet(&ireq->ireq_net, sock_net(sk_listener));
6415 ireq->ireq_family = sk_listener->sk_family;
6416 }
6417
6418 return req;
6419}
6420EXPORT_SYMBOL(inet_reqsk_alloc);
6421
6422
6423
6424
6425static bool tcp_syn_flood_action(const struct sock *sk,
6426 const struct sk_buff *skb,
6427 const char *proto)
6428{
6429 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
6430 const char *msg = "Dropping request";
6431 bool want_cookie = false;
6432 struct net *net = sock_net(sk);
6433
6434#ifdef CONFIG_SYN_COOKIES
6435 if (net->ipv4.sysctl_tcp_syncookies) {
6436 msg = "Sending cookies";
6437 want_cookie = true;
6438 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
6439 } else
6440#endif
6441 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
6442
6443 if (!queue->synflood_warned &&
6444 net->ipv4.sysctl_tcp_syncookies != 2 &&
6445 xchg(&queue->synflood_warned, 1) == 0)
6446 net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
6447 proto, ntohs(tcp_hdr(skb)->dest), msg);
6448
6449 return want_cookie;
6450}
6451
6452static void tcp_reqsk_record_syn(const struct sock *sk,
6453 struct request_sock *req,
6454 const struct sk_buff *skb)
6455{
6456 if (tcp_sk(sk)->save_syn) {
6457 u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb);
6458 u32 *copy;
6459
6460 copy = kmalloc(len + sizeof(u32), GFP_ATOMIC);
6461 if (copy) {
6462 copy[0] = len;
6463 memcpy(©[1], skb_network_header(skb), len);
6464 req->saved_syn = copy;
6465 }
6466 }
6467}
6468
6469int tcp_conn_request(struct request_sock_ops *rsk_ops,
6470 const struct tcp_request_sock_ops *af_ops,
6471 struct sock *sk, struct sk_buff *skb)
6472{
6473 struct tcp_fastopen_cookie foc = { .len = -1 };
6474 __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
6475 struct tcp_options_received tmp_opt;
6476 struct tcp_sock *tp = tcp_sk(sk);
6477 struct net *net = sock_net(sk);
6478 struct sock *fastopen_sk = NULL;
6479 struct request_sock *req;
6480 bool want_cookie = false;
6481 struct dst_entry *dst;
6482 struct flowi fl;
6483
6484
6485
6486
6487
6488 if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
6489 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
6490 want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name);
6491 if (!want_cookie)
6492 goto drop;
6493 }
6494
6495 if (sk_acceptq_is_full(sk)) {
6496 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
6497 goto drop;
6498 }
6499
6500 req = inet_reqsk_alloc(rsk_ops, sk, !want_cookie);
6501 if (!req)
6502 goto drop;
6503
6504 tcp_rsk(req)->af_specific = af_ops;
6505 tcp_rsk(req)->ts_off = 0;
6506
6507 tcp_clear_options(&tmp_opt);
6508 tmp_opt.mss_clamp = af_ops->mss_clamp;
6509 tmp_opt.user_mss = tp->rx_opt.user_mss;
6510 tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0,
6511 want_cookie ? NULL : &foc);
6512
6513 if (want_cookie && !tmp_opt.saw_tstamp)
6514 tcp_clear_options(&tmp_opt);
6515
6516 if (IS_ENABLED(CONFIG_SMC) && want_cookie)
6517 tmp_opt.smc_ok = 0;
6518
6519 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
6520 tcp_openreq_init(req, &tmp_opt, skb, sk);
6521 inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent;
6522
6523
6524 inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
6525
6526 af_ops->init_req(req, sk, skb);
6527
6528 if (security_inet_conn_request(sk, skb, req))
6529 goto drop_and_free;
6530
6531 if (tmp_opt.tstamp_ok)
6532 tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
6533
6534 dst = af_ops->route_req(sk, &fl, req);
6535 if (!dst)
6536 goto drop_and_free;
6537
6538 if (!want_cookie && !isn) {
6539
6540 if (!net->ipv4.sysctl_tcp_syncookies &&
6541 (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
6542 (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
6543 !tcp_peer_is_proven(req, dst)) {
6544
6545
6546
6547
6548
6549
6550
6551 pr_drop_req(req, ntohs(tcp_hdr(skb)->source),
6552 rsk_ops->family);
6553 goto drop_and_release;
6554 }
6555
6556 isn = af_ops->init_seq(skb);
6557 }
6558
6559 tcp_ecn_create_request(req, skb, sk, dst);
6560
6561 if (want_cookie) {
6562 isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
6563 req->cookie_ts = tmp_opt.tstamp_ok;
6564 if (!tmp_opt.tstamp_ok)
6565 inet_rsk(req)->ecn_ok = 0;
6566 }
6567
6568 tcp_rsk(req)->snt_isn = isn;
6569 tcp_rsk(req)->txhash = net_tx_rndhash();
6570 tcp_openreq_init_rwin(req, sk, dst);
6571 sk_rx_queue_set(req_to_sk(req), skb);
6572 if (!want_cookie) {
6573 tcp_reqsk_record_syn(sk, req, skb);
6574 fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
6575 }
6576 if (fastopen_sk) {
6577 af_ops->send_synack(fastopen_sk, dst, &fl, req,
6578 &foc, TCP_SYNACK_FASTOPEN);
6579
6580 if (!inet_csk_reqsk_queue_add(sk, req, fastopen_sk)) {
6581 reqsk_fastopen_remove(fastopen_sk, req, false);
6582 bh_unlock_sock(fastopen_sk);
6583 sock_put(fastopen_sk);
6584 goto drop_and_free;
6585 }
6586 sk->sk_data_ready(sk);
6587 bh_unlock_sock(fastopen_sk);
6588 sock_put(fastopen_sk);
6589 } else {
6590 tcp_rsk(req)->tfo_listener = false;
6591 if (!want_cookie)
6592 inet_csk_reqsk_queue_hash_add(sk, req,
6593 tcp_timeout_init((struct sock *)req));
6594 af_ops->send_synack(sk, dst, &fl, req, &foc,
6595 !want_cookie ? TCP_SYNACK_NORMAL :
6596 TCP_SYNACK_COOKIE);
6597 if (want_cookie) {
6598 reqsk_free(req);
6599 return 0;
6600 }
6601 }
6602 reqsk_put(req);
6603 return 0;
6604
6605drop_and_release:
6606 dst_release(dst);
6607drop_and_free:
6608 __reqsk_free(req);
6609drop:
6610 tcp_listendrop(sk);
6611 return 0;
6612}
6613EXPORT_SYMBOL(tcp_conn_request);
6614