1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65#define pr_fmt(fmt) "TCP: " fmt
66
67#include <linux/mm.h>
68#include <linux/slab.h>
69#include <linux/module.h>
70#include <linux/sysctl.h>
71#include <linux/kernel.h>
72#include <linux/prefetch.h>
73#include <net/dst.h>
74#include <net/tcp.h>
75#include <net/inet_common.h>
76#include <linux/ipsec.h>
77#include <asm/unaligned.h>
78#include <linux/errqueue.h>
79#include <trace/events/tcp.h>
80#include <linux/static_key.h>
81#include <net/busy_poll.h>
82
83int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
84
85#define FLAG_DATA 0x01
86#define FLAG_WIN_UPDATE 0x02
87#define FLAG_DATA_ACKED 0x04
88#define FLAG_RETRANS_DATA_ACKED 0x08
89#define FLAG_SYN_ACKED 0x10
90#define FLAG_DATA_SACKED 0x20
91#define FLAG_ECE 0x40
92#define FLAG_LOST_RETRANS 0x80
93#define FLAG_SLOWPATH 0x100
94#define FLAG_ORIG_SACK_ACKED 0x200
95#define FLAG_SND_UNA_ADVANCED 0x400
96#define FLAG_DSACKING_ACK 0x800
97#define FLAG_SET_XMIT_TIMER 0x1000
98#define FLAG_SACK_RENEGING 0x2000
99#define FLAG_UPDATE_TS_RECENT 0x4000
100#define FLAG_NO_CHALLENGE_ACK 0x8000
101#define FLAG_ACK_MAYBE_DELAYED 0x10000
102
103#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
104#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
105#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK)
106#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
107
108#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
109#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
110
111#define REXMIT_NONE 0
112#define REXMIT_LOST 1
113#define REXMIT_NEW 2
114
115#if IS_ENABLED(CONFIG_TLS_DEVICE)
116static DEFINE_STATIC_KEY_FALSE(clean_acked_data_enabled);
117
118void clean_acked_data_enable(struct inet_connection_sock *icsk,
119 void (*cad)(struct sock *sk, u32 ack_seq))
120{
121 icsk->icsk_clean_acked = cad;
122 static_branch_inc(&clean_acked_data_enabled);
123}
124EXPORT_SYMBOL_GPL(clean_acked_data_enable);
125
126void clean_acked_data_disable(struct inet_connection_sock *icsk)
127{
128 static_branch_dec(&clean_acked_data_enabled);
129 icsk->icsk_clean_acked = NULL;
130}
131EXPORT_SYMBOL_GPL(clean_acked_data_disable);
132#endif
133
134static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
135 unsigned int len)
136{
137 static bool __once __read_mostly;
138
139 if (!__once) {
140 struct net_device *dev;
141
142 __once = true;
143
144 rcu_read_lock();
145 dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
146 if (!dev || len >= dev->mtu)
147 pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
148 dev ? dev->name : "Unknown driver");
149 rcu_read_unlock();
150 }
151}
152
153
154
155
156static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
157{
158 struct inet_connection_sock *icsk = inet_csk(sk);
159 const unsigned int lss = icsk->icsk_ack.last_seg_size;
160 unsigned int len;
161
162 icsk->icsk_ack.last_seg_size = 0;
163
164
165
166
167 len = skb_shinfo(skb)->gso_size ? : skb->len;
168 if (len >= icsk->icsk_ack.rcv_mss) {
169 icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
170 tcp_sk(sk)->advmss);
171
172 if (unlikely(len > icsk->icsk_ack.rcv_mss +
173 MAX_TCP_OPTION_SPACE))
174 tcp_gro_dev_warn(sk, skb, len);
175 } else {
176
177
178
179
180
181 len += skb->data - skb_transport_header(skb);
182 if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
183
184
185
186
187
188 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
189 !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
190
191
192
193
194 len -= tcp_sk(sk)->tcp_header_len;
195 icsk->icsk_ack.last_seg_size = len;
196 if (len == lss) {
197 icsk->icsk_ack.rcv_mss = len;
198 return;
199 }
200 }
201 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
202 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
203 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
204 }
205}
206
207static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
208{
209 struct inet_connection_sock *icsk = inet_csk(sk);
210 unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
211
212 if (quickacks == 0)
213 quickacks = 2;
214 quickacks = min(quickacks, max_quickacks);
215 if (quickacks > icsk->icsk_ack.quick)
216 icsk->icsk_ack.quick = quickacks;
217}
218
219void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
220{
221 struct inet_connection_sock *icsk = inet_csk(sk);
222
223 tcp_incr_quickack(sk, max_quickacks);
224 icsk->icsk_ack.pingpong = 0;
225 icsk->icsk_ack.ato = TCP_ATO_MIN;
226}
227EXPORT_SYMBOL(tcp_enter_quickack_mode);
228
229
230
231
232
233static bool tcp_in_quickack_mode(struct sock *sk)
234{
235 const struct inet_connection_sock *icsk = inet_csk(sk);
236 const struct dst_entry *dst = __sk_dst_get(sk);
237
238 return (dst && dst_metric(dst, RTAX_QUICKACK)) ||
239 (icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong);
240}
241
242static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
243{
244 if (tp->ecn_flags & TCP_ECN_OK)
245 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
246}
247
248static void tcp_ecn_accept_cwr(struct sock *sk, const struct sk_buff *skb)
249{
250 if (tcp_hdr(skb)->cwr) {
251 tcp_sk(sk)->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
252
253
254
255
256
257 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
258 }
259}
260
261static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
262{
263 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
264}
265
266static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
267{
268 struct tcp_sock *tp = tcp_sk(sk);
269
270 switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
271 case INET_ECN_NOT_ECT:
272
273
274
275
276 if (tp->ecn_flags & TCP_ECN_SEEN)
277 tcp_enter_quickack_mode(sk, 2);
278 break;
279 case INET_ECN_CE:
280 if (tcp_ca_needs_ecn(sk))
281 tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
282
283 if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
284
285 tcp_enter_quickack_mode(sk, 2);
286 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
287 }
288 tp->ecn_flags |= TCP_ECN_SEEN;
289 break;
290 default:
291 if (tcp_ca_needs_ecn(sk))
292 tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
293 tp->ecn_flags |= TCP_ECN_SEEN;
294 break;
295 }
296}
297
298static void tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
299{
300 if (tcp_sk(sk)->ecn_flags & TCP_ECN_OK)
301 __tcp_ecn_check_ce(sk, skb);
302}
303
304static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
305{
306 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
307 tp->ecn_flags &= ~TCP_ECN_OK;
308}
309
310static void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
311{
312 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
313 tp->ecn_flags &= ~TCP_ECN_OK;
314}
315
316static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
317{
318 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
319 return true;
320 return false;
321}
322
323
324
325
326
327
328static void tcp_sndbuf_expand(struct sock *sk)
329{
330 const struct tcp_sock *tp = tcp_sk(sk);
331 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
332 int sndmem, per_mss;
333 u32 nr_segs;
334
335
336
337
338 per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
339 MAX_TCP_HEADER +
340 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
341
342 per_mss = roundup_pow_of_two(per_mss) +
343 SKB_DATA_ALIGN(sizeof(struct sk_buff));
344
345 nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
346 nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
347
348
349
350
351
352 sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2;
353 sndmem *= nr_segs * per_mss;
354
355 if (sk->sk_sndbuf < sndmem)
356 sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]);
357}
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
386{
387 struct tcp_sock *tp = tcp_sk(sk);
388
389 int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
390 int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
391
392 while (tp->rcv_ssthresh <= window) {
393 if (truesize <= skb->len)
394 return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
395
396 truesize >>= 1;
397 window >>= 1;
398 }
399 return 0;
400}
401
402static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
403{
404 struct tcp_sock *tp = tcp_sk(sk);
405
406
407 if (tp->rcv_ssthresh < tp->window_clamp &&
408 (int)tp->rcv_ssthresh < tcp_space(sk) &&
409 !tcp_under_memory_pressure(sk)) {
410 int incr;
411
412
413
414
415 if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
416 incr = 2 * tp->advmss;
417 else
418 incr = __tcp_grow_window(sk, skb);
419
420 if (incr) {
421 incr = max_t(int, incr, 2 * skb->len);
422 tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
423 tp->window_clamp);
424 inet_csk(sk)->icsk_ack.quick |= 1;
425 }
426 }
427}
428
429
430static void tcp_fixup_rcvbuf(struct sock *sk)
431{
432 u32 mss = tcp_sk(sk)->advmss;
433 int rcvmem;
434
435 rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) *
436 tcp_default_init_rwnd(mss);
437
438
439
440
441 if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)
442 rcvmem <<= 2;
443
444 if (sk->sk_rcvbuf < rcvmem)
445 sk->sk_rcvbuf = min(rcvmem, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
446}
447
448
449
450
451void tcp_init_buffer_space(struct sock *sk)
452{
453 int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
454 struct tcp_sock *tp = tcp_sk(sk);
455 int maxwin;
456
457 if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
458 tcp_fixup_rcvbuf(sk);
459 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
460 tcp_sndbuf_expand(sk);
461
462 tp->rcvq_space.space = tp->rcv_wnd;
463 tcp_mstamp_refresh(tp);
464 tp->rcvq_space.time = tp->tcp_mstamp;
465 tp->rcvq_space.seq = tp->copied_seq;
466
467 maxwin = tcp_full_space(sk);
468
469 if (tp->window_clamp >= maxwin) {
470 tp->window_clamp = maxwin;
471
472 if (tcp_app_win && maxwin > 4 * tp->advmss)
473 tp->window_clamp = max(maxwin -
474 (maxwin >> tcp_app_win),
475 4 * tp->advmss);
476 }
477
478
479 if (tcp_app_win &&
480 tp->window_clamp > 2 * tp->advmss &&
481 tp->window_clamp + tp->advmss > maxwin)
482 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
483
484 tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
485 tp->snd_cwnd_stamp = tcp_jiffies32;
486}
487
488
489static void tcp_clamp_window(struct sock *sk)
490{
491 struct tcp_sock *tp = tcp_sk(sk);
492 struct inet_connection_sock *icsk = inet_csk(sk);
493 struct net *net = sock_net(sk);
494
495 icsk->icsk_ack.quick = 0;
496
497 if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
498 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
499 !tcp_under_memory_pressure(sk) &&
500 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
501 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
502 net->ipv4.sysctl_tcp_rmem[2]);
503 }
504 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
505 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
506}
507
508
509
510
511
512
513
514
515void tcp_initialize_rcv_mss(struct sock *sk)
516{
517 const struct tcp_sock *tp = tcp_sk(sk);
518 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
519
520 hint = min(hint, tp->rcv_wnd / 2);
521 hint = min(hint, TCP_MSS_DEFAULT);
522 hint = max(hint, TCP_MIN_MSS);
523
524 inet_csk(sk)->icsk_ack.rcv_mss = hint;
525}
526EXPORT_SYMBOL(tcp_initialize_rcv_mss);
527
528
529
530
531
532
533
534
535
536
537
538
539static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
540{
541 u32 new_sample = tp->rcv_rtt_est.rtt_us;
542 long m = sample;
543
544 if (new_sample != 0) {
545
546
547
548
549
550
551
552
553
554
555 if (!win_dep) {
556 m -= (new_sample >> 3);
557 new_sample += m;
558 } else {
559 m <<= 3;
560 if (m < new_sample)
561 new_sample = m;
562 }
563 } else {
564
565 new_sample = m << 3;
566 }
567
568 tp->rcv_rtt_est.rtt_us = new_sample;
569}
570
571static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
572{
573 u32 delta_us;
574
575 if (tp->rcv_rtt_est.time == 0)
576 goto new_measure;
577 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
578 return;
579 delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time);
580 if (!delta_us)
581 delta_us = 1;
582 tcp_rcv_rtt_update(tp, delta_us, 1);
583
584new_measure:
585 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
586 tp->rcv_rtt_est.time = tp->tcp_mstamp;
587}
588
589static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
590 const struct sk_buff *skb)
591{
592 struct tcp_sock *tp = tcp_sk(sk);
593
594 if (tp->rx_opt.rcv_tsecr == tp->rcv_rtt_last_tsecr)
595 return;
596 tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
597
598 if (TCP_SKB_CB(skb)->end_seq -
599 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) {
600 u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
601 u32 delta_us;
602
603 if (!delta)
604 delta = 1;
605 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
606 tcp_rcv_rtt_update(tp, delta_us, 0);
607 }
608}
609
610
611
612
613
614void tcp_rcv_space_adjust(struct sock *sk)
615{
616 struct tcp_sock *tp = tcp_sk(sk);
617 u32 copied;
618 int time;
619
620 trace_tcp_rcv_space_adjust(sk);
621
622 tcp_mstamp_refresh(tp);
623 time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
624 if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
625 return;
626
627
628 copied = tp->copied_seq - tp->rcvq_space.seq;
629 if (copied <= tp->rcvq_space.space)
630 goto new_measure;
631
632
633
634
635
636
637
638
639
640
641 if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
642 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
643 int rcvmem, rcvbuf;
644 u64 rcvwin, grow;
645
646
647
648
649 rcvwin = ((u64)copied << 1) + 16 * tp->advmss;
650
651
652 grow = rcvwin * (copied - tp->rcvq_space.space);
653 do_div(grow, tp->rcvq_space.space);
654 rcvwin += (grow << 1);
655
656 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
657 while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
658 rcvmem += 128;
659
660 do_div(rcvwin, tp->advmss);
661 rcvbuf = min_t(u64, rcvwin * rcvmem,
662 sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
663 if (rcvbuf > sk->sk_rcvbuf) {
664 sk->sk_rcvbuf = rcvbuf;
665
666
667 tp->window_clamp = tcp_win_from_space(sk, rcvbuf);
668 }
669 }
670 tp->rcvq_space.space = copied;
671
672new_measure:
673 tp->rcvq_space.seq = tp->copied_seq;
674 tp->rcvq_space.time = tp->tcp_mstamp;
675}
676
677
678
679
680
681
682
683
684
685
686
687static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
688{
689 struct tcp_sock *tp = tcp_sk(sk);
690 struct inet_connection_sock *icsk = inet_csk(sk);
691 u32 now;
692
693 inet_csk_schedule_ack(sk);
694
695 tcp_measure_rcv_mss(sk, skb);
696
697 tcp_rcv_rtt_measure(tp);
698
699 now = tcp_jiffies32;
700
701 if (!icsk->icsk_ack.ato) {
702
703
704
705 tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
706 icsk->icsk_ack.ato = TCP_ATO_MIN;
707 } else {
708 int m = now - icsk->icsk_ack.lrcvtime;
709
710 if (m <= TCP_ATO_MIN / 2) {
711
712 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
713 } else if (m < icsk->icsk_ack.ato) {
714 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
715 if (icsk->icsk_ack.ato > icsk->icsk_rto)
716 icsk->icsk_ack.ato = icsk->icsk_rto;
717 } else if (m > icsk->icsk_rto) {
718
719
720
721 tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
722 sk_mem_reclaim(sk);
723 }
724 }
725 icsk->icsk_ack.lrcvtime = now;
726
727 tcp_ecn_check_ce(sk, skb);
728
729 if (skb->len >= 128)
730 tcp_grow_window(sk, skb);
731}
732
733
734
735
736
737
738
739
740
741
742static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
743{
744 struct tcp_sock *tp = tcp_sk(sk);
745 long m = mrtt_us;
746 u32 srtt = tp->srtt_us;
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764 if (srtt != 0) {
765 m -= (srtt >> 3);
766 srtt += m;
767 if (m < 0) {
768 m = -m;
769 m -= (tp->mdev_us >> 2);
770
771
772
773
774
775
776
777
778 if (m > 0)
779 m >>= 3;
780 } else {
781 m -= (tp->mdev_us >> 2);
782 }
783 tp->mdev_us += m;
784 if (tp->mdev_us > tp->mdev_max_us) {
785 tp->mdev_max_us = tp->mdev_us;
786 if (tp->mdev_max_us > tp->rttvar_us)
787 tp->rttvar_us = tp->mdev_max_us;
788 }
789 if (after(tp->snd_una, tp->rtt_seq)) {
790 if (tp->mdev_max_us < tp->rttvar_us)
791 tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
792 tp->rtt_seq = tp->snd_nxt;
793 tp->mdev_max_us = tcp_rto_min_us(sk);
794 }
795 } else {
796
797 srtt = m << 3;
798 tp->mdev_us = m << 1;
799 tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
800 tp->mdev_max_us = tp->rttvar_us;
801 tp->rtt_seq = tp->snd_nxt;
802 }
803 tp->srtt_us = max(1U, srtt);
804}
805
806static void tcp_update_pacing_rate(struct sock *sk)
807{
808 const struct tcp_sock *tp = tcp_sk(sk);
809 u64 rate;
810
811
812 rate = (u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3);
813
814
815
816
817
818
819
820
821
822 if (tp->snd_cwnd < tp->snd_ssthresh / 2)
823 rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
824 else
825 rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
826
827 rate *= max(tp->snd_cwnd, tp->packets_out);
828
829 if (likely(tp->srtt_us))
830 do_div(rate, tp->srtt_us);
831
832
833
834
835
836 WRITE_ONCE(sk->sk_pacing_rate, min_t(u64, rate,
837 sk->sk_max_pacing_rate));
838}
839
840
841
842
843static void tcp_set_rto(struct sock *sk)
844{
845 const struct tcp_sock *tp = tcp_sk(sk);
846
847
848
849
850
851
852
853
854
855
856 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
857
858
859
860
861
862
863
864
865
866
867 tcp_bound_rto(sk);
868}
869
870__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
871{
872 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
873
874 if (!cwnd)
875 cwnd = TCP_INIT_CWND;
876 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
877}
878
879
880static void tcp_dsack_seen(struct tcp_sock *tp)
881{
882 tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
883 tp->rack.dsack_seen = 1;
884 tp->dsack_dups++;
885}
886
887
888
889
890
891static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
892 const int ts)
893{
894 struct tcp_sock *tp = tcp_sk(sk);
895 const u32 mss = tp->mss_cache;
896 u32 fack, metric;
897
898 fack = tcp_highest_sack_seq(tp);
899 if (!before(low_seq, fack))
900 return;
901
902 metric = fack - low_seq;
903 if ((metric > tp->reordering * mss) && mss) {
904#if FASTRETRANS_DEBUG > 1
905 pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
906 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
907 tp->reordering,
908 0,
909 tp->sacked_out,
910 tp->undo_marker ? tp->undo_retrans : 0);
911#endif
912 tp->reordering = min_t(u32, (metric + mss - 1) / mss,
913 sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
914 }
915
916
917 tp->reord_seen++;
918 NET_INC_STATS(sock_net(sk),
919 ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
920}
921
922
923static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
924{
925 if (!tp->retransmit_skb_hint ||
926 before(TCP_SKB_CB(skb)->seq,
927 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
928 tp->retransmit_skb_hint = skb;
929}
930
931
932
933
934
935
936
937
938static void tcp_sum_lost(struct tcp_sock *tp, struct sk_buff *skb)
939{
940 __u8 sacked = TCP_SKB_CB(skb)->sacked;
941
942 if (!(sacked & TCPCB_LOST) ||
943 ((sacked & TCPCB_LOST) && (sacked & TCPCB_SACKED_RETRANS)))
944 tp->lost += tcp_skb_pcount(skb);
945}
946
947static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
948{
949 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
950 tcp_verify_retransmit_hint(tp, skb);
951
952 tp->lost_out += tcp_skb_pcount(skb);
953 tcp_sum_lost(tp, skb);
954 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
955 }
956}
957
958void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb)
959{
960 tcp_verify_retransmit_hint(tp, skb);
961
962 tcp_sum_lost(tp, skb);
963 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
964 tp->lost_out += tcp_skb_pcount(skb);
965 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
966 }
967}
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
1063 u32 start_seq, u32 end_seq)
1064{
1065
1066 if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
1067 return false;
1068
1069
1070 if (!before(start_seq, tp->snd_nxt))
1071 return false;
1072
1073
1074
1075
1076 if (after(start_seq, tp->snd_una))
1077 return true;
1078
1079 if (!is_dsack || !tp->undo_marker)
1080 return false;
1081
1082
1083 if (after(end_seq, tp->snd_una))
1084 return false;
1085
1086 if (!before(start_seq, tp->undo_marker))
1087 return true;
1088
1089
1090 if (!after(end_seq, tp->undo_marker))
1091 return false;
1092
1093
1094
1095
1096 return !before(start_seq, end_seq - tp->max_window);
1097}
1098
1099static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1100 struct tcp_sack_block_wire *sp, int num_sacks,
1101 u32 prior_snd_una)
1102{
1103 struct tcp_sock *tp = tcp_sk(sk);
1104 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
1105 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
1106 bool dup_sack = false;
1107
1108 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1109 dup_sack = true;
1110 tcp_dsack_seen(tp);
1111 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
1112 } else if (num_sacks > 1) {
1113 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
1114 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
1115
1116 if (!after(end_seq_0, end_seq_1) &&
1117 !before(start_seq_0, start_seq_1)) {
1118 dup_sack = true;
1119 tcp_dsack_seen(tp);
1120 NET_INC_STATS(sock_net(sk),
1121 LINUX_MIB_TCPDSACKOFORECV);
1122 }
1123 }
1124
1125
1126 if (dup_sack && tp->undo_marker && tp->undo_retrans > 0 &&
1127 !after(end_seq_0, prior_snd_una) &&
1128 after(end_seq_0, tp->undo_marker))
1129 tp->undo_retrans--;
1130
1131 return dup_sack;
1132}
1133
1134struct tcp_sacktag_state {
1135 u32 reord;
1136
1137
1138
1139
1140 u64 first_sackt;
1141 u64 last_sackt;
1142 struct rate_sample *rate;
1143 int flag;
1144 unsigned int mss_now;
1145};
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1156 u32 start_seq, u32 end_seq)
1157{
1158 int err;
1159 bool in_sack;
1160 unsigned int pkt_len;
1161 unsigned int mss;
1162
1163 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1164 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1165
1166 if (tcp_skb_pcount(skb) > 1 && !in_sack &&
1167 after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
1168 mss = tcp_skb_mss(skb);
1169 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1170
1171 if (!in_sack) {
1172 pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
1173 if (pkt_len < mss)
1174 pkt_len = mss;
1175 } else {
1176 pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
1177 if (pkt_len < mss)
1178 return -EINVAL;
1179 }
1180
1181
1182
1183
1184 if (pkt_len > mss) {
1185 unsigned int new_len = (pkt_len / mss) * mss;
1186 if (!in_sack && new_len < pkt_len)
1187 new_len += mss;
1188 pkt_len = new_len;
1189 }
1190
1191 if (pkt_len >= skb->len && !in_sack)
1192 return 0;
1193
1194 err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
1195 pkt_len, mss, GFP_ATOMIC);
1196 if (err < 0)
1197 return err;
1198 }
1199
1200 return in_sack;
1201}
1202
1203
1204static u8 tcp_sacktag_one(struct sock *sk,
1205 struct tcp_sacktag_state *state, u8 sacked,
1206 u32 start_seq, u32 end_seq,
1207 int dup_sack, int pcount,
1208 u64 xmit_time)
1209{
1210 struct tcp_sock *tp = tcp_sk(sk);
1211
1212
1213 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1214 if (tp->undo_marker && tp->undo_retrans > 0 &&
1215 after(end_seq, tp->undo_marker))
1216 tp->undo_retrans--;
1217 if ((sacked & TCPCB_SACKED_ACKED) &&
1218 before(start_seq, state->reord))
1219 state->reord = start_seq;
1220 }
1221
1222
1223 if (!after(end_seq, tp->snd_una))
1224 return sacked;
1225
1226 if (!(sacked & TCPCB_SACKED_ACKED)) {
1227 tcp_rack_advance(tp, sacked, end_seq, xmit_time);
1228
1229 if (sacked & TCPCB_SACKED_RETRANS) {
1230
1231
1232
1233
1234 if (sacked & TCPCB_LOST) {
1235 sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1236 tp->lost_out -= pcount;
1237 tp->retrans_out -= pcount;
1238 }
1239 } else {
1240 if (!(sacked & TCPCB_RETRANS)) {
1241
1242
1243
1244 if (before(start_seq,
1245 tcp_highest_sack_seq(tp)) &&
1246 before(start_seq, state->reord))
1247 state->reord = start_seq;
1248
1249 if (!after(end_seq, tp->high_seq))
1250 state->flag |= FLAG_ORIG_SACK_ACKED;
1251 if (state->first_sackt == 0)
1252 state->first_sackt = xmit_time;
1253 state->last_sackt = xmit_time;
1254 }
1255
1256 if (sacked & TCPCB_LOST) {
1257 sacked &= ~TCPCB_LOST;
1258 tp->lost_out -= pcount;
1259 }
1260 }
1261
1262 sacked |= TCPCB_SACKED_ACKED;
1263 state->flag |= FLAG_DATA_SACKED;
1264 tp->sacked_out += pcount;
1265 tp->delivered += pcount;
1266
1267
1268 if (tp->lost_skb_hint &&
1269 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
1270 tp->lost_cnt_hint += pcount;
1271 }
1272
1273
1274
1275
1276
1277 if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
1278 sacked &= ~TCPCB_SACKED_RETRANS;
1279 tp->retrans_out -= pcount;
1280 }
1281
1282 return sacked;
1283}
1284
1285
1286
1287
1288static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
1289 struct sk_buff *skb,
1290 struct tcp_sacktag_state *state,
1291 unsigned int pcount, int shifted, int mss,
1292 bool dup_sack)
1293{
1294 struct tcp_sock *tp = tcp_sk(sk);
1295 u32 start_seq = TCP_SKB_CB(skb)->seq;
1296 u32 end_seq = start_seq + shifted;
1297
1298 BUG_ON(!pcount);
1299
1300
1301
1302
1303
1304
1305
1306 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1307 start_seq, end_seq, dup_sack, pcount,
1308 skb->skb_mstamp);
1309 tcp_rate_skb_delivered(sk, skb, state->rate);
1310
1311 if (skb == tp->lost_skb_hint)
1312 tp->lost_cnt_hint += pcount;
1313
1314 TCP_SKB_CB(prev)->end_seq += shifted;
1315 TCP_SKB_CB(skb)->seq += shifted;
1316
1317 tcp_skb_pcount_add(prev, pcount);
1318 BUG_ON(tcp_skb_pcount(skb) < pcount);
1319 tcp_skb_pcount_add(skb, -pcount);
1320
1321
1322
1323
1324
1325
1326 if (!TCP_SKB_CB(prev)->tcp_gso_size)
1327 TCP_SKB_CB(prev)->tcp_gso_size = mss;
1328
1329
1330 if (tcp_skb_pcount(skb) <= 1)
1331 TCP_SKB_CB(skb)->tcp_gso_size = 0;
1332
1333
1334 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
1335
1336 if (skb->len > 0) {
1337 BUG_ON(!tcp_skb_pcount(skb));
1338 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTED);
1339 return false;
1340 }
1341
1342
1343
1344 if (skb == tp->retransmit_skb_hint)
1345 tp->retransmit_skb_hint = prev;
1346 if (skb == tp->lost_skb_hint) {
1347 tp->lost_skb_hint = prev;
1348 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1349 }
1350
1351 TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1352 TCP_SKB_CB(prev)->eor = TCP_SKB_CB(skb)->eor;
1353 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1354 TCP_SKB_CB(prev)->end_seq++;
1355
1356 if (skb == tcp_highest_sack(sk))
1357 tcp_advance_highest_sack(sk, skb);
1358
1359 tcp_skb_collapse_tstamp(prev, skb);
1360 if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
1361 TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
1362
1363 tcp_rtx_queue_unlink_and_free(skb, sk);
1364
1365 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
1366
1367 return true;
1368}
1369
1370
1371
1372
1373static int tcp_skb_seglen(const struct sk_buff *skb)
1374{
1375 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
1376}
1377
1378
1379static int skb_can_shift(const struct sk_buff *skb)
1380{
1381 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1382}
1383
1384
1385
1386
1387static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1388 struct tcp_sacktag_state *state,
1389 u32 start_seq, u32 end_seq,
1390 bool dup_sack)
1391{
1392 struct tcp_sock *tp = tcp_sk(sk);
1393 struct sk_buff *prev;
1394 int mss;
1395 int pcount = 0;
1396 int len;
1397 int in_sack;
1398
1399
1400 if (!dup_sack &&
1401 (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
1402 goto fallback;
1403 if (!skb_can_shift(skb))
1404 goto fallback;
1405
1406 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1407 goto fallback;
1408
1409
1410 prev = skb_rb_prev(skb);
1411 if (!prev)
1412 goto fallback;
1413
1414 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1415 goto fallback;
1416
1417 if (!tcp_skb_can_collapse_to(prev))
1418 goto fallback;
1419
1420 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1421 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1422
1423 if (in_sack) {
1424 len = skb->len;
1425 pcount = tcp_skb_pcount(skb);
1426 mss = tcp_skb_seglen(skb);
1427
1428
1429
1430
1431 if (mss != tcp_skb_seglen(prev))
1432 goto fallback;
1433 } else {
1434 if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
1435 goto noop;
1436
1437
1438
1439
1440 if (tcp_skb_pcount(skb) <= 1)
1441 goto noop;
1442
1443 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1444 if (!in_sack) {
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456 goto fallback;
1457 }
1458
1459 len = end_seq - TCP_SKB_CB(skb)->seq;
1460 BUG_ON(len < 0);
1461 BUG_ON(len > skb->len);
1462
1463
1464
1465
1466
1467 mss = tcp_skb_mss(skb);
1468
1469
1470
1471
1472 if (mss != tcp_skb_seglen(prev))
1473 goto fallback;
1474
1475 if (len == mss) {
1476 pcount = 1;
1477 } else if (len < mss) {
1478 goto noop;
1479 } else {
1480 pcount = len / mss;
1481 len = pcount * mss;
1482 }
1483 }
1484
1485
1486 if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
1487 goto fallback;
1488
1489 if (!skb_shift(prev, skb, len))
1490 goto fallback;
1491 if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
1492 goto out;
1493
1494
1495
1496
1497 skb = skb_rb_next(prev);
1498 if (!skb)
1499 goto out;
1500
1501 if (!skb_can_shift(skb) ||
1502 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1503 (mss != tcp_skb_seglen(skb)))
1504 goto out;
1505
1506 len = skb->len;
1507 if (skb_shift(prev, skb, len)) {
1508 pcount += tcp_skb_pcount(skb);
1509 tcp_shifted_skb(sk, prev, skb, state, tcp_skb_pcount(skb),
1510 len, mss, 0);
1511 }
1512
1513out:
1514 return prev;
1515
1516noop:
1517 return skb;
1518
1519fallback:
1520 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
1521 return NULL;
1522}
1523
1524static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1525 struct tcp_sack_block *next_dup,
1526 struct tcp_sacktag_state *state,
1527 u32 start_seq, u32 end_seq,
1528 bool dup_sack_in)
1529{
1530 struct tcp_sock *tp = tcp_sk(sk);
1531 struct sk_buff *tmp;
1532
1533 skb_rbtree_walk_from(skb) {
1534 int in_sack = 0;
1535 bool dup_sack = dup_sack_in;
1536
1537
1538 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1539 break;
1540
1541 if (next_dup &&
1542 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
1543 in_sack = tcp_match_skb_to_sack(sk, skb,
1544 next_dup->start_seq,
1545 next_dup->end_seq);
1546 if (in_sack > 0)
1547 dup_sack = true;
1548 }
1549
1550
1551
1552
1553
1554 if (in_sack <= 0) {
1555 tmp = tcp_shift_skb_data(sk, skb, state,
1556 start_seq, end_seq, dup_sack);
1557 if (tmp) {
1558 if (tmp != skb) {
1559 skb = tmp;
1560 continue;
1561 }
1562
1563 in_sack = 0;
1564 } else {
1565 in_sack = tcp_match_skb_to_sack(sk, skb,
1566 start_seq,
1567 end_seq);
1568 }
1569 }
1570
1571 if (unlikely(in_sack < 0))
1572 break;
1573
1574 if (in_sack) {
1575 TCP_SKB_CB(skb)->sacked =
1576 tcp_sacktag_one(sk,
1577 state,
1578 TCP_SKB_CB(skb)->sacked,
1579 TCP_SKB_CB(skb)->seq,
1580 TCP_SKB_CB(skb)->end_seq,
1581 dup_sack,
1582 tcp_skb_pcount(skb),
1583 skb->skb_mstamp);
1584 tcp_rate_skb_delivered(sk, skb, state->rate);
1585 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1586 list_del_init(&skb->tcp_tsorted_anchor);
1587
1588 if (!before(TCP_SKB_CB(skb)->seq,
1589 tcp_highest_sack_seq(tp)))
1590 tcp_advance_highest_sack(sk, skb);
1591 }
1592 }
1593 return skb;
1594}
1595
1596static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk,
1597 struct tcp_sacktag_state *state,
1598 u32 seq)
1599{
1600 struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
1601 struct sk_buff *skb;
1602
1603 while (*p) {
1604 parent = *p;
1605 skb = rb_to_skb(parent);
1606 if (before(seq, TCP_SKB_CB(skb)->seq)) {
1607 p = &parent->rb_left;
1608 continue;
1609 }
1610 if (!before(seq, TCP_SKB_CB(skb)->end_seq)) {
1611 p = &parent->rb_right;
1612 continue;
1613 }
1614 return skb;
1615 }
1616 return NULL;
1617}
1618
1619static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1620 struct tcp_sacktag_state *state,
1621 u32 skip_to_seq)
1622{
1623 if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq))
1624 return skb;
1625
1626 return tcp_sacktag_bsearch(sk, state, skip_to_seq);
1627}
1628
1629static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1630 struct sock *sk,
1631 struct tcp_sack_block *next_dup,
1632 struct tcp_sacktag_state *state,
1633 u32 skip_to_seq)
1634{
1635 if (!next_dup)
1636 return skb;
1637
1638 if (before(next_dup->start_seq, skip_to_seq)) {
1639 skb = tcp_sacktag_skip(skb, sk, state, next_dup->start_seq);
1640 skb = tcp_sacktag_walk(skb, sk, NULL, state,
1641 next_dup->start_seq, next_dup->end_seq,
1642 1);
1643 }
1644
1645 return skb;
1646}
1647
1648static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
1649{
1650 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1651}
1652
1653static int
1654tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1655 u32 prior_snd_una, struct tcp_sacktag_state *state)
1656{
1657 struct tcp_sock *tp = tcp_sk(sk);
1658 const unsigned char *ptr = (skb_transport_header(ack_skb) +
1659 TCP_SKB_CB(ack_skb)->sacked);
1660 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1661 struct tcp_sack_block sp[TCP_NUM_SACKS];
1662 struct tcp_sack_block *cache;
1663 struct sk_buff *skb;
1664 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1665 int used_sacks;
1666 bool found_dup_sack = false;
1667 int i, j;
1668 int first_sack_index;
1669
1670 state->flag = 0;
1671 state->reord = tp->snd_nxt;
1672
1673 if (!tp->sacked_out)
1674 tcp_highest_sack_reset(sk);
1675
1676 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1677 num_sacks, prior_snd_una);
1678 if (found_dup_sack) {
1679 state->flag |= FLAG_DSACKING_ACK;
1680 tp->delivered++;
1681 }
1682
1683
1684
1685
1686
1687 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
1688 return 0;
1689
1690 if (!tp->packets_out)
1691 goto out;
1692
1693 used_sacks = 0;
1694 first_sack_index = 0;
1695 for (i = 0; i < num_sacks; i++) {
1696 bool dup_sack = !i && found_dup_sack;
1697
1698 sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
1699 sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
1700
1701 if (!tcp_is_sackblock_valid(tp, dup_sack,
1702 sp[used_sacks].start_seq,
1703 sp[used_sacks].end_seq)) {
1704 int mib_idx;
1705
1706 if (dup_sack) {
1707 if (!tp->undo_marker)
1708 mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
1709 else
1710 mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
1711 } else {
1712
1713 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1714 !after(sp[used_sacks].end_seq, tp->snd_una))
1715 continue;
1716 mib_idx = LINUX_MIB_TCPSACKDISCARD;
1717 }
1718
1719 NET_INC_STATS(sock_net(sk), mib_idx);
1720 if (i == 0)
1721 first_sack_index = -1;
1722 continue;
1723 }
1724
1725
1726 if (!after(sp[used_sacks].end_seq, prior_snd_una))
1727 continue;
1728
1729 used_sacks++;
1730 }
1731
1732
1733 for (i = used_sacks - 1; i > 0; i--) {
1734 for (j = 0; j < i; j++) {
1735 if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
1736 swap(sp[j], sp[j + 1]);
1737
1738
1739 if (j == first_sack_index)
1740 first_sack_index = j + 1;
1741 }
1742 }
1743 }
1744
1745 state->mss_now = tcp_current_mss(sk);
1746 skb = NULL;
1747 i = 0;
1748
1749 if (!tp->sacked_out) {
1750
1751 cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1752 } else {
1753 cache = tp->recv_sack_cache;
1754
1755 while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
1756 !cache->end_seq)
1757 cache++;
1758 }
1759
1760 while (i < used_sacks) {
1761 u32 start_seq = sp[i].start_seq;
1762 u32 end_seq = sp[i].end_seq;
1763 bool dup_sack = (found_dup_sack && (i == first_sack_index));
1764 struct tcp_sack_block *next_dup = NULL;
1765
1766 if (found_dup_sack && ((i + 1) == first_sack_index))
1767 next_dup = &sp[i + 1];
1768
1769
1770 while (tcp_sack_cache_ok(tp, cache) &&
1771 !before(start_seq, cache->end_seq))
1772 cache++;
1773
1774
1775 if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
1776 after(end_seq, cache->start_seq)) {
1777
1778
1779 if (before(start_seq, cache->start_seq)) {
1780 skb = tcp_sacktag_skip(skb, sk, state,
1781 start_seq);
1782 skb = tcp_sacktag_walk(skb, sk, next_dup,
1783 state,
1784 start_seq,
1785 cache->start_seq,
1786 dup_sack);
1787 }
1788
1789
1790 if (!after(end_seq, cache->end_seq))
1791 goto advance_sp;
1792
1793 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1794 state,
1795 cache->end_seq);
1796
1797
1798 if (tcp_highest_sack_seq(tp) == cache->end_seq) {
1799
1800 skb = tcp_highest_sack(sk);
1801 if (!skb)
1802 break;
1803 cache++;
1804 goto walk;
1805 }
1806
1807 skb = tcp_sacktag_skip(skb, sk, state, cache->end_seq);
1808
1809 cache++;
1810 continue;
1811 }
1812
1813 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
1814 skb = tcp_highest_sack(sk);
1815 if (!skb)
1816 break;
1817 }
1818 skb = tcp_sacktag_skip(skb, sk, state, start_seq);
1819
1820walk:
1821 skb = tcp_sacktag_walk(skb, sk, next_dup, state,
1822 start_seq, end_seq, dup_sack);
1823
1824advance_sp:
1825 i++;
1826 }
1827
1828
1829 for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
1830 tp->recv_sack_cache[i].start_seq = 0;
1831 tp->recv_sack_cache[i].end_seq = 0;
1832 }
1833 for (j = 0; j < used_sacks; j++)
1834 tp->recv_sack_cache[i++] = sp[j];
1835
1836 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss || tp->undo_marker)
1837 tcp_check_sack_reordering(sk, state->reord, 0);
1838
1839 tcp_verify_left_out(tp);
1840out:
1841
1842#if FASTRETRANS_DEBUG > 0
1843 WARN_ON((int)tp->sacked_out < 0);
1844 WARN_ON((int)tp->lost_out < 0);
1845 WARN_ON((int)tp->retrans_out < 0);
1846 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1847#endif
1848 return state->flag;
1849}
1850
1851
1852
1853
1854static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
1855{
1856 u32 holes;
1857
1858 holes = max(tp->lost_out, 1U);
1859 holes = min(holes, tp->packets_out);
1860
1861 if ((tp->sacked_out + holes) > tp->packets_out) {
1862 tp->sacked_out = tp->packets_out - holes;
1863 return true;
1864 }
1865 return false;
1866}
1867
1868
1869
1870
1871
1872static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1873{
1874 struct tcp_sock *tp = tcp_sk(sk);
1875
1876 if (!tcp_limit_reno_sacked(tp))
1877 return;
1878
1879 tp->reordering = min_t(u32, tp->packets_out + addend,
1880 sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
1881 tp->reord_seen++;
1882 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
1883}
1884
1885
1886
1887static void tcp_add_reno_sack(struct sock *sk)
1888{
1889 struct tcp_sock *tp = tcp_sk(sk);
1890 u32 prior_sacked = tp->sacked_out;
1891
1892 tp->sacked_out++;
1893 tcp_check_reno_reordering(sk, 0);
1894 if (tp->sacked_out > prior_sacked)
1895 tp->delivered++;
1896 tcp_verify_left_out(tp);
1897}
1898
1899
1900
1901static void tcp_remove_reno_sacks(struct sock *sk, int acked)
1902{
1903 struct tcp_sock *tp = tcp_sk(sk);
1904
1905 if (acked > 0) {
1906
1907 tp->delivered += max_t(int, acked - tp->sacked_out, 1);
1908 if (acked - 1 >= tp->sacked_out)
1909 tp->sacked_out = 0;
1910 else
1911 tp->sacked_out -= acked - 1;
1912 }
1913 tcp_check_reno_reordering(sk, acked);
1914 tcp_verify_left_out(tp);
1915}
1916
1917static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1918{
1919 tp->sacked_out = 0;
1920}
1921
1922void tcp_clear_retrans(struct tcp_sock *tp)
1923{
1924 tp->retrans_out = 0;
1925 tp->lost_out = 0;
1926 tp->undo_marker = 0;
1927 tp->undo_retrans = -1;
1928 tp->sacked_out = 0;
1929}
1930
1931static inline void tcp_init_undo(struct tcp_sock *tp)
1932{
1933 tp->undo_marker = tp->snd_una;
1934
1935 tp->undo_retrans = tp->retrans_out ? : -1;
1936}
1937
1938static bool tcp_is_rack(const struct sock *sk)
1939{
1940 return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION;
1941}
1942
1943
1944
1945
1946
1947static void tcp_timeout_mark_lost(struct sock *sk)
1948{
1949 struct tcp_sock *tp = tcp_sk(sk);
1950 struct sk_buff *skb, *head;
1951 bool is_reneg;
1952
1953 head = tcp_rtx_queue_head(sk);
1954 is_reneg = head && (TCP_SKB_CB(head)->sacked & TCPCB_SACKED_ACKED);
1955 if (is_reneg) {
1956 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
1957 tp->sacked_out = 0;
1958
1959 tp->is_sack_reneg = 1;
1960 } else if (tcp_is_reno(tp)) {
1961 tcp_reset_reno_sack(tp);
1962 }
1963
1964 skb = head;
1965 skb_rbtree_walk_from(skb) {
1966 if (is_reneg)
1967 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
1968 else if (tcp_is_rack(sk) && skb != head &&
1969 tcp_rack_skb_timeout(tp, skb, 0) > 0)
1970 continue;
1971 tcp_mark_skb_lost(sk, skb);
1972 }
1973 tcp_verify_left_out(tp);
1974 tcp_clear_all_retrans_hints(tp);
1975}
1976
1977
1978void tcp_enter_loss(struct sock *sk)
1979{
1980 const struct inet_connection_sock *icsk = inet_csk(sk);
1981 struct tcp_sock *tp = tcp_sk(sk);
1982 struct net *net = sock_net(sk);
1983 bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
1984
1985 tcp_timeout_mark_lost(sk);
1986
1987
1988 if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
1989 !after(tp->high_seq, tp->snd_una) ||
1990 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
1991 tp->prior_ssthresh = tcp_current_ssthresh(sk);
1992 tp->prior_cwnd = tp->snd_cwnd;
1993 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1994 tcp_ca_event(sk, CA_EVENT_LOSS);
1995 tcp_init_undo(tp);
1996 }
1997 tp->snd_cwnd = tcp_packets_in_flight(tp) + 1;
1998 tp->snd_cwnd_cnt = 0;
1999 tp->snd_cwnd_stamp = tcp_jiffies32;
2000
2001
2002
2003
2004 if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
2005 tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
2006 tp->reordering = min_t(unsigned int, tp->reordering,
2007 net->ipv4.sysctl_tcp_reordering);
2008 tcp_set_ca_state(sk, TCP_CA_Loss);
2009 tp->high_seq = tp->snd_nxt;
2010 tcp_ecn_queue_cwr(tp);
2011
2012
2013
2014
2015
2016 tp->frto = net->ipv4.sysctl_tcp_frto &&
2017 (new_recovery || icsk->icsk_retransmits) &&
2018 !inet_csk(sk)->icsk_mtup.probe_size;
2019}
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031static bool tcp_check_sack_reneging(struct sock *sk, int flag)
2032{
2033 if (flag & FLAG_SACK_RENEGING) {
2034 struct tcp_sock *tp = tcp_sk(sk);
2035 unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
2036 msecs_to_jiffies(10));
2037
2038 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2039 delay, TCP_RTO_MAX);
2040 return true;
2041 }
2042 return false;
2043}
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
2057{
2058 return tp->sacked_out + 1;
2059}
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158static bool tcp_time_to_recover(struct sock *sk, int flag)
2159{
2160 struct tcp_sock *tp = tcp_sk(sk);
2161
2162
2163 if (tp->lost_out)
2164 return true;
2165
2166
2167 if (!tcp_is_rack(sk) && tcp_dupack_heuristics(tp) > tp->reordering)
2168 return true;
2169
2170 return false;
2171}
2172
2173
2174
2175
2176
2177
2178
2179static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2180{
2181 struct tcp_sock *tp = tcp_sk(sk);
2182 struct sk_buff *skb;
2183 int cnt, oldcnt, lost;
2184 unsigned int mss;
2185
2186 const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
2187
2188 WARN_ON(packets > tp->packets_out);
2189 skb = tp->lost_skb_hint;
2190 if (skb) {
2191
2192 if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una))
2193 return;
2194 cnt = tp->lost_cnt_hint;
2195 } else {
2196 skb = tcp_rtx_queue_head(sk);
2197 cnt = 0;
2198 }
2199
2200 skb_rbtree_walk_from(skb) {
2201
2202
2203 tp->lost_skb_hint = skb;
2204 tp->lost_cnt_hint = cnt;
2205
2206 if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
2207 break;
2208
2209 oldcnt = cnt;
2210 if (tcp_is_reno(tp) ||
2211 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2212 cnt += tcp_skb_pcount(skb);
2213
2214 if (cnt > packets) {
2215 if (tcp_is_sack(tp) ||
2216 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
2217 (oldcnt >= packets))
2218 break;
2219
2220 mss = tcp_skb_mss(skb);
2221
2222 lost = (packets - oldcnt) * mss;
2223 if (lost < skb->len &&
2224 tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
2225 lost, mss, GFP_ATOMIC) < 0)
2226 break;
2227 cnt = packets;
2228 }
2229
2230 tcp_skb_mark_lost(tp, skb);
2231
2232 if (mark_head)
2233 break;
2234 }
2235 tcp_verify_left_out(tp);
2236}
2237
2238
2239
2240static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2241{
2242 struct tcp_sock *tp = tcp_sk(sk);
2243
2244 if (tcp_is_sack(tp)) {
2245 int sacked_upto = tp->sacked_out - tp->reordering;
2246 if (sacked_upto >= 0)
2247 tcp_mark_head_lost(sk, sacked_upto, 0);
2248 else if (fast_rexmit)
2249 tcp_mark_head_lost(sk, 1, 1);
2250 }
2251}
2252
2253static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when)
2254{
2255 return tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2256 before(tp->rx_opt.rcv_tsecr, when);
2257}
2258
2259
2260
2261
2262static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
2263 const struct sk_buff *skb)
2264{
2265 return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) &&
2266 tcp_tsopt_ecr_before(tp, tcp_skb_timestamp(skb));
2267}
2268
2269
2270
2271
2272static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
2273{
2274 return !tp->retrans_stamp ||
2275 tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
2276}
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294static bool tcp_any_retrans_done(const struct sock *sk)
2295{
2296 const struct tcp_sock *tp = tcp_sk(sk);
2297 struct sk_buff *skb;
2298
2299 if (tp->retrans_out)
2300 return true;
2301
2302 skb = tcp_rtx_queue_head(sk);
2303 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2304 return true;
2305
2306 return false;
2307}
2308
2309static void DBGUNDO(struct sock *sk, const char *msg)
2310{
2311#if FASTRETRANS_DEBUG > 1
2312 struct tcp_sock *tp = tcp_sk(sk);
2313 struct inet_sock *inet = inet_sk(sk);
2314
2315 if (sk->sk_family == AF_INET) {
2316 pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2317 msg,
2318 &inet->inet_daddr, ntohs(inet->inet_dport),
2319 tp->snd_cwnd, tcp_left_out(tp),
2320 tp->snd_ssthresh, tp->prior_ssthresh,
2321 tp->packets_out);
2322 }
2323#if IS_ENABLED(CONFIG_IPV6)
2324 else if (sk->sk_family == AF_INET6) {
2325 pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2326 msg,
2327 &sk->sk_v6_daddr, ntohs(inet->inet_dport),
2328 tp->snd_cwnd, tcp_left_out(tp),
2329 tp->snd_ssthresh, tp->prior_ssthresh,
2330 tp->packets_out);
2331 }
2332#endif
2333#endif
2334}
2335
2336static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
2337{
2338 struct tcp_sock *tp = tcp_sk(sk);
2339
2340 if (unmark_loss) {
2341 struct sk_buff *skb;
2342
2343 skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
2344 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2345 }
2346 tp->lost_out = 0;
2347 tcp_clear_all_retrans_hints(tp);
2348 }
2349
2350 if (tp->prior_ssthresh) {
2351 const struct inet_connection_sock *icsk = inet_csk(sk);
2352
2353 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
2354
2355 if (tp->prior_ssthresh > tp->snd_ssthresh) {
2356 tp->snd_ssthresh = tp->prior_ssthresh;
2357 tcp_ecn_withdraw_cwr(tp);
2358 }
2359 }
2360 tp->snd_cwnd_stamp = tcp_jiffies32;
2361 tp->undo_marker = 0;
2362 tp->rack.advanced = 1;
2363}
2364
2365static inline bool tcp_may_undo(const struct tcp_sock *tp)
2366{
2367 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2368}
2369
2370
2371static bool tcp_try_undo_recovery(struct sock *sk)
2372{
2373 struct tcp_sock *tp = tcp_sk(sk);
2374
2375 if (tcp_may_undo(tp)) {
2376 int mib_idx;
2377
2378
2379
2380
2381 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2382 tcp_undo_cwnd_reduction(sk, false);
2383 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2384 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2385 else
2386 mib_idx = LINUX_MIB_TCPFULLUNDO;
2387
2388 NET_INC_STATS(sock_net(sk), mib_idx);
2389 } else if (tp->rack.reo_wnd_persist) {
2390 tp->rack.reo_wnd_persist--;
2391 }
2392 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2393
2394
2395
2396 if (!tcp_any_retrans_done(sk))
2397 tp->retrans_stamp = 0;
2398 return true;
2399 }
2400 tcp_set_ca_state(sk, TCP_CA_Open);
2401 tp->is_sack_reneg = 0;
2402 return false;
2403}
2404
2405
2406static bool tcp_try_undo_dsack(struct sock *sk)
2407{
2408 struct tcp_sock *tp = tcp_sk(sk);
2409
2410 if (tp->undo_marker && !tp->undo_retrans) {
2411 tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH,
2412 tp->rack.reo_wnd_persist + 1);
2413 DBGUNDO(sk, "D-SACK");
2414 tcp_undo_cwnd_reduction(sk, false);
2415 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2416 return true;
2417 }
2418 return false;
2419}
2420
2421
2422static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
2423{
2424 struct tcp_sock *tp = tcp_sk(sk);
2425
2426 if (frto_undo || tcp_may_undo(tp)) {
2427 tcp_undo_cwnd_reduction(sk, true);
2428
2429 DBGUNDO(sk, "partial loss");
2430 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2431 if (frto_undo)
2432 NET_INC_STATS(sock_net(sk),
2433 LINUX_MIB_TCPSPURIOUSRTOS);
2434 inet_csk(sk)->icsk_retransmits = 0;
2435 if (frto_undo || tcp_is_sack(tp)) {
2436 tcp_set_ca_state(sk, TCP_CA_Open);
2437 tp->is_sack_reneg = 0;
2438 }
2439 return true;
2440 }
2441 return false;
2442}
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453static void tcp_init_cwnd_reduction(struct sock *sk)
2454{
2455 struct tcp_sock *tp = tcp_sk(sk);
2456
2457 tp->high_seq = tp->snd_nxt;
2458 tp->tlp_high_seq = 0;
2459 tp->snd_cwnd_cnt = 0;
2460 tp->prior_cwnd = tp->snd_cwnd;
2461 tp->prr_delivered = 0;
2462 tp->prr_out = 0;
2463 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
2464 tcp_ecn_queue_cwr(tp);
2465}
2466
2467void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int flag)
2468{
2469 struct tcp_sock *tp = tcp_sk(sk);
2470 int sndcnt = 0;
2471 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2472
2473 if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd))
2474 return;
2475
2476 tp->prr_delivered += newly_acked_sacked;
2477 if (delta < 0) {
2478 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2479 tp->prior_cwnd - 1;
2480 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2481 } else if ((flag & FLAG_RETRANS_DATA_ACKED) &&
2482 !(flag & FLAG_LOST_RETRANS)) {
2483 sndcnt = min_t(int, delta,
2484 max_t(int, tp->prr_delivered - tp->prr_out,
2485 newly_acked_sacked) + 1);
2486 } else {
2487 sndcnt = min(delta, newly_acked_sacked);
2488 }
2489
2490 sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
2491 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2492}
2493
2494static inline void tcp_end_cwnd_reduction(struct sock *sk)
2495{
2496 struct tcp_sock *tp = tcp_sk(sk);
2497
2498 if (inet_csk(sk)->icsk_ca_ops->cong_control)
2499 return;
2500
2501
2502 if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
2503 (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
2504 tp->snd_cwnd = tp->snd_ssthresh;
2505 tp->snd_cwnd_stamp = tcp_jiffies32;
2506 }
2507 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2508}
2509
2510
2511void tcp_enter_cwr(struct sock *sk)
2512{
2513 struct tcp_sock *tp = tcp_sk(sk);
2514
2515 tp->prior_ssthresh = 0;
2516 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2517 tp->undo_marker = 0;
2518 tcp_init_cwnd_reduction(sk);
2519 tcp_set_ca_state(sk, TCP_CA_CWR);
2520 }
2521}
2522EXPORT_SYMBOL(tcp_enter_cwr);
2523
2524static void tcp_try_keep_open(struct sock *sk)
2525{
2526 struct tcp_sock *tp = tcp_sk(sk);
2527 int state = TCP_CA_Open;
2528
2529 if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
2530 state = TCP_CA_Disorder;
2531
2532 if (inet_csk(sk)->icsk_ca_state != state) {
2533 tcp_set_ca_state(sk, state);
2534 tp->high_seq = tp->snd_nxt;
2535 }
2536}
2537
2538static void tcp_try_to_open(struct sock *sk, int flag)
2539{
2540 struct tcp_sock *tp = tcp_sk(sk);
2541
2542 tcp_verify_left_out(tp);
2543
2544 if (!tcp_any_retrans_done(sk))
2545 tp->retrans_stamp = 0;
2546
2547 if (flag & FLAG_ECE)
2548 tcp_enter_cwr(sk);
2549
2550 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2551 tcp_try_keep_open(sk);
2552 }
2553}
2554
2555static void tcp_mtup_probe_failed(struct sock *sk)
2556{
2557 struct inet_connection_sock *icsk = inet_csk(sk);
2558
2559 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2560 icsk->icsk_mtup.probe_size = 0;
2561 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPFAIL);
2562}
2563
2564static void tcp_mtup_probe_success(struct sock *sk)
2565{
2566 struct tcp_sock *tp = tcp_sk(sk);
2567 struct inet_connection_sock *icsk = inet_csk(sk);
2568
2569
2570 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2571 tp->snd_cwnd = tp->snd_cwnd *
2572 tcp_mss_to_mtu(sk, tp->mss_cache) /
2573 icsk->icsk_mtup.probe_size;
2574 tp->snd_cwnd_cnt = 0;
2575 tp->snd_cwnd_stamp = tcp_jiffies32;
2576 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2577
2578 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2579 icsk->icsk_mtup.probe_size = 0;
2580 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2581 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
2582}
2583
2584
2585
2586
2587
2588void tcp_simple_retransmit(struct sock *sk)
2589{
2590 const struct inet_connection_sock *icsk = inet_csk(sk);
2591 struct tcp_sock *tp = tcp_sk(sk);
2592 struct sk_buff *skb;
2593 unsigned int mss = tcp_current_mss(sk);
2594
2595 skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
2596 if (tcp_skb_seglen(skb) > mss &&
2597 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2598 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2599 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2600 tp->retrans_out -= tcp_skb_pcount(skb);
2601 }
2602 tcp_skb_mark_lost_uncond_verify(tp, skb);
2603 }
2604 }
2605
2606 tcp_clear_retrans_hints_partial(tp);
2607
2608 if (!tp->lost_out)
2609 return;
2610
2611 if (tcp_is_reno(tp))
2612 tcp_limit_reno_sacked(tp);
2613
2614 tcp_verify_left_out(tp);
2615
2616
2617
2618
2619
2620
2621 if (icsk->icsk_ca_state != TCP_CA_Loss) {
2622 tp->high_seq = tp->snd_nxt;
2623 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2624 tp->prior_ssthresh = 0;
2625 tp->undo_marker = 0;
2626 tcp_set_ca_state(sk, TCP_CA_Loss);
2627 }
2628 tcp_xmit_retransmit_queue(sk);
2629}
2630EXPORT_SYMBOL(tcp_simple_retransmit);
2631
2632void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2633{
2634 struct tcp_sock *tp = tcp_sk(sk);
2635 int mib_idx;
2636
2637 if (tcp_is_reno(tp))
2638 mib_idx = LINUX_MIB_TCPRENORECOVERY;
2639 else
2640 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
2641
2642 NET_INC_STATS(sock_net(sk), mib_idx);
2643
2644 tp->prior_ssthresh = 0;
2645 tcp_init_undo(tp);
2646
2647 if (!tcp_in_cwnd_reduction(sk)) {
2648 if (!ece_ack)
2649 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2650 tcp_init_cwnd_reduction(sk);
2651 }
2652 tcp_set_ca_state(sk, TCP_CA_Recovery);
2653}
2654
2655
2656
2657
2658static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
2659 int *rexmit)
2660{
2661 struct tcp_sock *tp = tcp_sk(sk);
2662 bool recovered = !before(tp->snd_una, tp->high_seq);
2663
2664 if ((flag & FLAG_SND_UNA_ADVANCED) &&
2665 tcp_try_undo_loss(sk, false))
2666 return;
2667
2668 if (tp->frto) {
2669
2670
2671
2672 if ((flag & FLAG_ORIG_SACK_ACKED) &&
2673 tcp_try_undo_loss(sk, true))
2674 return;
2675
2676 if (after(tp->snd_nxt, tp->high_seq)) {
2677 if (flag & FLAG_DATA_SACKED || is_dupack)
2678 tp->frto = 0;
2679 } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
2680 tp->high_seq = tp->snd_nxt;
2681
2682
2683
2684
2685 if (!tcp_write_queue_empty(sk) &&
2686 after(tcp_wnd_end(tp), tp->snd_nxt)) {
2687 *rexmit = REXMIT_NEW;
2688 return;
2689 }
2690 tp->frto = 0;
2691 }
2692 }
2693
2694 if (recovered) {
2695
2696 tcp_try_undo_recovery(sk);
2697 return;
2698 }
2699 if (tcp_is_reno(tp)) {
2700
2701
2702
2703 if (after(tp->snd_nxt, tp->high_seq) && is_dupack)
2704 tcp_add_reno_sack(sk);
2705 else if (flag & FLAG_SND_UNA_ADVANCED)
2706 tcp_reset_reno_sack(tp);
2707 }
2708 *rexmit = REXMIT_LOST;
2709}
2710
2711
2712static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
2713{
2714 struct tcp_sock *tp = tcp_sk(sk);
2715
2716 if (tp->undo_marker && tcp_packet_delayed(tp)) {
2717
2718
2719
2720 tcp_check_sack_reordering(sk, prior_snd_una, 1);
2721
2722
2723
2724
2725
2726
2727 if (tp->retrans_out)
2728 return true;
2729
2730 if (!tcp_any_retrans_done(sk))
2731 tp->retrans_stamp = 0;
2732
2733 DBGUNDO(sk, "partial recovery");
2734 tcp_undo_cwnd_reduction(sk, true);
2735 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2736 tcp_try_keep_open(sk);
2737 return true;
2738 }
2739 return false;
2740}
2741
2742static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag)
2743{
2744 struct tcp_sock *tp = tcp_sk(sk);
2745
2746 if (tcp_rtx_queue_empty(sk))
2747 return;
2748
2749 if (unlikely(tcp_is_reno(tp))) {
2750 tcp_newreno_mark_lost(sk, *ack_flag & FLAG_SND_UNA_ADVANCED);
2751 } else if (tcp_is_rack(sk)) {
2752 u32 prior_retrans = tp->retrans_out;
2753
2754 tcp_rack_mark_lost(sk);
2755 if (prior_retrans > tp->retrans_out)
2756 *ack_flag |= FLAG_LOST_RETRANS;
2757 }
2758}
2759
2760static bool tcp_force_fast_retransmit(struct sock *sk)
2761{
2762 struct tcp_sock *tp = tcp_sk(sk);
2763
2764 return after(tcp_highest_sack_seq(tp),
2765 tp->snd_una + tp->reordering * tp->mss_cache);
2766}
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
2781 bool is_dupack, int *ack_flag, int *rexmit)
2782{
2783 struct inet_connection_sock *icsk = inet_csk(sk);
2784 struct tcp_sock *tp = tcp_sk(sk);
2785 int fast_rexmit = 0, flag = *ack_flag;
2786 bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
2787 tcp_force_fast_retransmit(sk));
2788
2789 if (!tp->packets_out && tp->sacked_out)
2790 tp->sacked_out = 0;
2791
2792
2793
2794 if (flag & FLAG_ECE)
2795 tp->prior_ssthresh = 0;
2796
2797
2798 if (tcp_check_sack_reneging(sk, flag))
2799 return;
2800
2801
2802 tcp_verify_left_out(tp);
2803
2804
2805
2806 if (icsk->icsk_ca_state == TCP_CA_Open) {
2807 WARN_ON(tp->retrans_out != 0);
2808 tp->retrans_stamp = 0;
2809 } else if (!before(tp->snd_una, tp->high_seq)) {
2810 switch (icsk->icsk_ca_state) {
2811 case TCP_CA_CWR:
2812
2813
2814 if (tp->snd_una != tp->high_seq) {
2815 tcp_end_cwnd_reduction(sk);
2816 tcp_set_ca_state(sk, TCP_CA_Open);
2817 }
2818 break;
2819
2820 case TCP_CA_Recovery:
2821 if (tcp_is_reno(tp))
2822 tcp_reset_reno_sack(tp);
2823 if (tcp_try_undo_recovery(sk))
2824 return;
2825 tcp_end_cwnd_reduction(sk);
2826 break;
2827 }
2828 }
2829
2830
2831 switch (icsk->icsk_ca_state) {
2832 case TCP_CA_Recovery:
2833 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
2834 if (tcp_is_reno(tp) && is_dupack)
2835 tcp_add_reno_sack(sk);
2836 } else {
2837 if (tcp_try_undo_partial(sk, prior_snd_una))
2838 return;
2839
2840 do_lost = tcp_is_reno(tp) ||
2841 tcp_force_fast_retransmit(sk);
2842 }
2843 if (tcp_try_undo_dsack(sk)) {
2844 tcp_try_keep_open(sk);
2845 return;
2846 }
2847 tcp_identify_packet_loss(sk, ack_flag);
2848 break;
2849 case TCP_CA_Loss:
2850 tcp_process_loss(sk, flag, is_dupack, rexmit);
2851 tcp_identify_packet_loss(sk, ack_flag);
2852 if (!(icsk->icsk_ca_state == TCP_CA_Open ||
2853 (*ack_flag & FLAG_LOST_RETRANS)))
2854 return;
2855
2856
2857 default:
2858 if (tcp_is_reno(tp)) {
2859 if (flag & FLAG_SND_UNA_ADVANCED)
2860 tcp_reset_reno_sack(tp);
2861 if (is_dupack)
2862 tcp_add_reno_sack(sk);
2863 }
2864
2865 if (icsk->icsk_ca_state <= TCP_CA_Disorder)
2866 tcp_try_undo_dsack(sk);
2867
2868 tcp_identify_packet_loss(sk, ack_flag);
2869 if (!tcp_time_to_recover(sk, flag)) {
2870 tcp_try_to_open(sk, flag);
2871 return;
2872 }
2873
2874
2875 if (icsk->icsk_ca_state < TCP_CA_CWR &&
2876 icsk->icsk_mtup.probe_size &&
2877 tp->snd_una == tp->mtu_probe.probe_seq_start) {
2878 tcp_mtup_probe_failed(sk);
2879
2880 tp->snd_cwnd++;
2881 tcp_simple_retransmit(sk);
2882 return;
2883 }
2884
2885
2886 tcp_enter_recovery(sk, (flag & FLAG_ECE));
2887 fast_rexmit = 1;
2888 }
2889
2890 if (!tcp_is_rack(sk) && do_lost)
2891 tcp_update_scoreboard(sk, fast_rexmit);
2892 *rexmit = REXMIT_LOST;
2893}
2894
2895static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
2896{
2897 u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
2898 struct tcp_sock *tp = tcp_sk(sk);
2899
2900 if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
2901
2902
2903
2904
2905 return;
2906 }
2907 minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
2908 rtt_us ? : jiffies_to_usecs(1));
2909}
2910
2911static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
2912 long seq_rtt_us, long sack_rtt_us,
2913 long ca_rtt_us, struct rate_sample *rs)
2914{
2915 const struct tcp_sock *tp = tcp_sk(sk);
2916
2917
2918
2919
2920
2921
2922 if (seq_rtt_us < 0)
2923 seq_rtt_us = sack_rtt_us;
2924
2925
2926
2927
2928
2929
2930
2931 if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2932 flag & FLAG_ACKED) {
2933 u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
2934 u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
2935
2936 seq_rtt_us = ca_rtt_us = delta_us;
2937 }
2938 rs->rtt_us = ca_rtt_us;
2939 if (seq_rtt_us < 0)
2940 return false;
2941
2942
2943
2944
2945
2946 tcp_update_rtt_min(sk, ca_rtt_us, flag);
2947 tcp_rtt_estimator(sk, seq_rtt_us);
2948 tcp_set_rto(sk);
2949
2950
2951 inet_csk(sk)->icsk_backoff = 0;
2952 return true;
2953}
2954
2955
2956void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
2957{
2958 struct rate_sample rs;
2959 long rtt_us = -1L;
2960
2961 if (req && !req->num_retrans && tcp_rsk(req)->snt_synack)
2962 rtt_us = tcp_stamp_us_delta(tcp_clock_us(), tcp_rsk(req)->snt_synack);
2963
2964 tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us, &rs);
2965}
2966
2967
2968static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
2969{
2970 const struct inet_connection_sock *icsk = inet_csk(sk);
2971
2972 icsk->icsk_ca_ops->cong_avoid(sk, ack, acked);
2973 tcp_sk(sk)->snd_cwnd_stamp = tcp_jiffies32;
2974}
2975
2976
2977
2978
2979void tcp_rearm_rto(struct sock *sk)
2980{
2981 const struct inet_connection_sock *icsk = inet_csk(sk);
2982 struct tcp_sock *tp = tcp_sk(sk);
2983
2984
2985
2986
2987 if (tp->fastopen_rsk)
2988 return;
2989
2990 if (!tp->packets_out) {
2991 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
2992 } else {
2993 u32 rto = inet_csk(sk)->icsk_rto;
2994
2995 if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2996 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2997 s64 delta_us = tcp_rto_delta_us(sk);
2998
2999
3000
3001 rto = usecs_to_jiffies(max_t(int, delta_us, 1));
3002 }
3003 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
3004 TCP_RTO_MAX);
3005 }
3006}
3007
3008
3009static void tcp_set_xmit_timer(struct sock *sk)
3010{
3011 if (!tcp_schedule_loss_probe(sk, true))
3012 tcp_rearm_rto(sk);
3013}
3014
3015
3016static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
3017{
3018 struct tcp_sock *tp = tcp_sk(sk);
3019 u32 packets_acked;
3020
3021 BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
3022
3023 packets_acked = tcp_skb_pcount(skb);
3024 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
3025 return 0;
3026 packets_acked -= tcp_skb_pcount(skb);
3027
3028 if (packets_acked) {
3029 BUG_ON(tcp_skb_pcount(skb) == 0);
3030 BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
3031 }
3032
3033 return packets_acked;
3034}
3035
3036static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
3037 u32 prior_snd_una)
3038{
3039 const struct skb_shared_info *shinfo;
3040
3041
3042 if (likely(!TCP_SKB_CB(skb)->txstamp_ack))
3043 return;
3044
3045 shinfo = skb_shinfo(skb);
3046 if (!before(shinfo->tskey, prior_snd_una) &&
3047 before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
3048 tcp_skb_tsorted_save(skb) {
3049 __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
3050 } tcp_skb_tsorted_restore(skb);
3051 }
3052}
3053
3054
3055
3056
3057
3058static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
3059 u32 prior_snd_una,
3060 struct tcp_sacktag_state *sack)
3061{
3062 const struct inet_connection_sock *icsk = inet_csk(sk);
3063 u64 first_ackt, last_ackt;
3064 struct tcp_sock *tp = tcp_sk(sk);
3065 u32 prior_sacked = tp->sacked_out;
3066 u32 reord = tp->snd_nxt;
3067 struct sk_buff *skb, *next;
3068 bool fully_acked = true;
3069 long sack_rtt_us = -1L;
3070 long seq_rtt_us = -1L;
3071 long ca_rtt_us = -1L;
3072 u32 pkts_acked = 0;
3073 u32 last_in_flight = 0;
3074 bool rtt_update;
3075 int flag = 0;
3076
3077 first_ackt = 0;
3078
3079 for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
3080 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3081 const u32 start_seq = scb->seq;
3082 u8 sacked = scb->sacked;
3083 u32 acked_pcount;
3084
3085 tcp_ack_tstamp(sk, skb, prior_snd_una);
3086
3087
3088 if (after(scb->end_seq, tp->snd_una)) {
3089 if (tcp_skb_pcount(skb) == 1 ||
3090 !after(tp->snd_una, scb->seq))
3091 break;
3092
3093 acked_pcount = tcp_tso_acked(sk, skb);
3094 if (!acked_pcount)
3095 break;
3096 fully_acked = false;
3097 } else {
3098 acked_pcount = tcp_skb_pcount(skb);
3099 }
3100
3101 if (unlikely(sacked & TCPCB_RETRANS)) {
3102 if (sacked & TCPCB_SACKED_RETRANS)
3103 tp->retrans_out -= acked_pcount;
3104 flag |= FLAG_RETRANS_DATA_ACKED;
3105 } else if (!(sacked & TCPCB_SACKED_ACKED)) {
3106 last_ackt = skb->skb_mstamp;
3107 WARN_ON_ONCE(last_ackt == 0);
3108 if (!first_ackt)
3109 first_ackt = last_ackt;
3110
3111 last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
3112 if (before(start_seq, reord))
3113 reord = start_seq;
3114 if (!after(scb->end_seq, tp->high_seq))
3115 flag |= FLAG_ORIG_SACK_ACKED;
3116 }
3117
3118 if (sacked & TCPCB_SACKED_ACKED) {
3119 tp->sacked_out -= acked_pcount;
3120 } else if (tcp_is_sack(tp)) {
3121 tp->delivered += acked_pcount;
3122 if (!tcp_skb_spurious_retrans(tp, skb))
3123 tcp_rack_advance(tp, sacked, scb->end_seq,
3124 skb->skb_mstamp);
3125 }
3126 if (sacked & TCPCB_LOST)
3127 tp->lost_out -= acked_pcount;
3128
3129 tp->packets_out -= acked_pcount;
3130 pkts_acked += acked_pcount;
3131 tcp_rate_skb_delivered(sk, skb, sack->rate);
3132
3133
3134
3135
3136
3137
3138
3139
3140 if (likely(!(scb->tcp_flags & TCPHDR_SYN))) {
3141 flag |= FLAG_DATA_ACKED;
3142 } else {
3143 flag |= FLAG_SYN_ACKED;
3144 tp->retrans_stamp = 0;
3145 }
3146
3147 if (!fully_acked)
3148 break;
3149
3150 next = skb_rb_next(skb);
3151 if (unlikely(skb == tp->retransmit_skb_hint))
3152 tp->retransmit_skb_hint = NULL;
3153 if (unlikely(skb == tp->lost_skb_hint))
3154 tp->lost_skb_hint = NULL;
3155 tcp_rtx_queue_unlink_and_free(skb, sk);
3156 }
3157
3158 if (!skb)
3159 tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
3160
3161 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
3162 tp->snd_up = tp->snd_una;
3163
3164 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
3165 flag |= FLAG_SACK_RENEGING;
3166
3167 if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
3168 seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
3169 ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
3170
3171 if (pkts_acked == 1 && last_in_flight < tp->mss_cache &&
3172 last_in_flight && !prior_sacked && fully_acked &&
3173 sack->rate->prior_delivered + 1 == tp->delivered &&
3174 !(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) {
3175
3176
3177
3178
3179 flag |= FLAG_ACK_MAYBE_DELAYED;
3180 }
3181 }
3182 if (sack->first_sackt) {
3183 sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt);
3184 ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->last_sackt);
3185 }
3186 rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
3187 ca_rtt_us, sack->rate);
3188
3189 if (flag & FLAG_ACKED) {
3190 flag |= FLAG_SET_XMIT_TIMER;
3191 if (unlikely(icsk->icsk_mtup.probe_size &&
3192 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
3193 tcp_mtup_probe_success(sk);
3194 }
3195
3196 if (tcp_is_reno(tp)) {
3197 tcp_remove_reno_sacks(sk, pkts_acked);
3198
3199
3200
3201
3202
3203
3204
3205 if (flag & FLAG_RETRANS_DATA_ACKED)
3206 flag &= ~FLAG_ORIG_SACK_ACKED;
3207 } else {
3208 int delta;
3209
3210
3211 if (before(reord, prior_fack))
3212 tcp_check_sack_reordering(sk, reord, 0);
3213
3214 delta = prior_sacked - tp->sacked_out;
3215 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3216 }
3217 } else if (skb && rtt_update && sack_rtt_us >= 0 &&
3218 sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) {
3219
3220
3221
3222
3223 flag |= FLAG_SET_XMIT_TIMER;
3224 }
3225
3226 if (icsk->icsk_ca_ops->pkts_acked) {
3227 struct ack_sample sample = { .pkts_acked = pkts_acked,
3228 .rtt_us = sack->rate->rtt_us,
3229 .in_flight = last_in_flight };
3230
3231 icsk->icsk_ca_ops->pkts_acked(sk, &sample);
3232 }
3233
3234#if FASTRETRANS_DEBUG > 0
3235 WARN_ON((int)tp->sacked_out < 0);
3236 WARN_ON((int)tp->lost_out < 0);
3237 WARN_ON((int)tp->retrans_out < 0);
3238 if (!tp->packets_out && tcp_is_sack(tp)) {
3239 icsk = inet_csk(sk);
3240 if (tp->lost_out) {
3241 pr_debug("Leak l=%u %d\n",
3242 tp->lost_out, icsk->icsk_ca_state);
3243 tp->lost_out = 0;
3244 }
3245 if (tp->sacked_out) {
3246 pr_debug("Leak s=%u %d\n",
3247 tp->sacked_out, icsk->icsk_ca_state);
3248 tp->sacked_out = 0;
3249 }
3250 if (tp->retrans_out) {
3251 pr_debug("Leak r=%u %d\n",
3252 tp->retrans_out, icsk->icsk_ca_state);
3253 tp->retrans_out = 0;
3254 }
3255 }
3256#endif
3257 return flag;
3258}
3259
3260static void tcp_ack_probe(struct sock *sk)
3261{
3262 struct inet_connection_sock *icsk = inet_csk(sk);
3263 struct sk_buff *head = tcp_send_head(sk);
3264 const struct tcp_sock *tp = tcp_sk(sk);
3265
3266
3267 if (!head)
3268 return;
3269 if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
3270 icsk->icsk_backoff = 0;
3271 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
3272
3273
3274
3275 } else {
3276 unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX);
3277
3278 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3279 when, TCP_RTO_MAX);
3280 }
3281}
3282
3283static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
3284{
3285 return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3286 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3287}
3288
3289
3290static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3291{
3292
3293
3294
3295
3296
3297
3298 if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
3299 return flag & FLAG_FORWARD_PROGRESS;
3300
3301 return flag & FLAG_DATA_ACKED;
3302}
3303
3304
3305
3306
3307
3308
3309static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
3310 int flag, const struct rate_sample *rs)
3311{
3312 const struct inet_connection_sock *icsk = inet_csk(sk);
3313
3314 if (icsk->icsk_ca_ops->cong_control) {
3315 icsk->icsk_ca_ops->cong_control(sk, rs);
3316 return;
3317 }
3318
3319 if (tcp_in_cwnd_reduction(sk)) {
3320
3321 tcp_cwnd_reduction(sk, acked_sacked, flag);
3322 } else if (tcp_may_raise_cwnd(sk, flag)) {
3323
3324 tcp_cong_avoid(sk, ack, acked_sacked);
3325 }
3326 tcp_update_pacing_rate(sk);
3327}
3328
3329
3330
3331
3332static inline bool tcp_may_update_window(const struct tcp_sock *tp,
3333 const u32 ack, const u32 ack_seq,
3334 const u32 nwin)
3335{
3336 return after(ack, tp->snd_una) ||
3337 after(ack_seq, tp->snd_wl1) ||
3338 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
3339}
3340
3341
3342static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
3343{
3344 u32 delta = ack - tp->snd_una;
3345
3346 sock_owned_by_me((struct sock *)tp);
3347 tp->bytes_acked += delta;
3348 tp->snd_una = ack;
3349}
3350
3351
3352static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
3353{
3354 u32 delta = seq - tp->rcv_nxt;
3355
3356 sock_owned_by_me((struct sock *)tp);
3357 tp->bytes_received += delta;
3358 tp->rcv_nxt = seq;
3359}
3360
3361
3362
3363
3364
3365
3366static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
3367 u32 ack_seq)
3368{
3369 struct tcp_sock *tp = tcp_sk(sk);
3370 int flag = 0;
3371 u32 nwin = ntohs(tcp_hdr(skb)->window);
3372
3373 if (likely(!tcp_hdr(skb)->syn))
3374 nwin <<= tp->rx_opt.snd_wscale;
3375
3376 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
3377 flag |= FLAG_WIN_UPDATE;
3378 tcp_update_wl(tp, ack_seq);
3379
3380 if (tp->snd_wnd != nwin) {
3381 tp->snd_wnd = nwin;
3382
3383
3384
3385
3386 tp->pred_flags = 0;
3387 tcp_fast_path_check(sk);
3388
3389 if (!tcp_write_queue_empty(sk))
3390 tcp_slow_start_after_idle_check(sk);
3391
3392 if (nwin > tp->max_window) {
3393 tp->max_window = nwin;
3394 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
3395 }
3396 }
3397 }
3398
3399 tcp_snd_una_update(tp, ack);
3400
3401 return flag;
3402}
3403
3404static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
3405 u32 *last_oow_ack_time)
3406{
3407 if (*last_oow_ack_time) {
3408 s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
3409
3410 if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
3411 NET_INC_STATS(net, mib_idx);
3412 return true;
3413 }
3414 }
3415
3416 *last_oow_ack_time = tcp_jiffies32;
3417
3418 return false;
3419}
3420
3421
3422
3423
3424
3425
3426
3427
3428bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
3429 int mib_idx, u32 *last_oow_ack_time)
3430{
3431
3432 if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
3433 !tcp_hdr(skb)->syn)
3434 return false;
3435
3436 return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time);
3437}
3438
3439
3440static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
3441{
3442
3443 static u32 challenge_timestamp;
3444 static unsigned int challenge_count;
3445 struct tcp_sock *tp = tcp_sk(sk);
3446 struct net *net = sock_net(sk);
3447 u32 count, now;
3448
3449
3450 if (__tcp_oow_rate_limited(net,
3451 LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
3452 &tp->last_oow_ack_time))
3453 return;
3454
3455
3456 now = jiffies / HZ;
3457 if (now != challenge_timestamp) {
3458 u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
3459 u32 half = (ack_limit + 1) >> 1;
3460
3461 challenge_timestamp = now;
3462 WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit));
3463 }
3464 count = READ_ONCE(challenge_count);
3465 if (count > 0) {
3466 WRITE_ONCE(challenge_count, count - 1);
3467 NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK);
3468 tcp_send_ack(sk);
3469 }
3470}
3471
3472static void tcp_store_ts_recent(struct tcp_sock *tp)
3473{
3474 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
3475 tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
3476}
3477
3478static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3479{
3480 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
3481
3482
3483
3484
3485
3486
3487
3488 if (tcp_paws_check(&tp->rx_opt, 0))
3489 tcp_store_ts_recent(tp);
3490 }
3491}
3492
3493
3494
3495
3496
3497
3498static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
3499{
3500 struct tcp_sock *tp = tcp_sk(sk);
3501
3502 if (before(ack, tp->tlp_high_seq))
3503 return;
3504
3505 if (flag & FLAG_DSACKING_ACK) {
3506
3507 tp->tlp_high_seq = 0;
3508 } else if (after(ack, tp->tlp_high_seq)) {
3509
3510
3511
3512 tcp_init_cwnd_reduction(sk);
3513 tcp_set_ca_state(sk, TCP_CA_CWR);
3514 tcp_end_cwnd_reduction(sk);
3515 tcp_try_keep_open(sk);
3516 NET_INC_STATS(sock_net(sk),
3517 LINUX_MIB_TCPLOSSPROBERECOVERY);
3518 } else if (!(flag & (FLAG_SND_UNA_ADVANCED |
3519 FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
3520
3521 tp->tlp_high_seq = 0;
3522 }
3523}
3524
3525static inline void tcp_in_ack_event(struct sock *sk, u32 flags)
3526{
3527 const struct inet_connection_sock *icsk = inet_csk(sk);
3528
3529 if (icsk->icsk_ca_ops->in_ack_event)
3530 icsk->icsk_ca_ops->in_ack_event(sk, flags);
3531}
3532
3533
3534
3535
3536
3537static void tcp_xmit_recovery(struct sock *sk, int rexmit)
3538{
3539 struct tcp_sock *tp = tcp_sk(sk);
3540
3541 if (rexmit == REXMIT_NONE)
3542 return;
3543
3544 if (unlikely(rexmit == 2)) {
3545 __tcp_push_pending_frames(sk, tcp_current_mss(sk),
3546 TCP_NAGLE_OFF);
3547 if (after(tp->snd_nxt, tp->high_seq))
3548 return;
3549 tp->frto = 0;
3550 }
3551 tcp_xmit_retransmit_queue(sk);
3552}
3553
3554
3555static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag)
3556{
3557 const struct net *net = sock_net(sk);
3558 struct tcp_sock *tp = tcp_sk(sk);
3559 u32 delivered;
3560
3561 delivered = tp->delivered - prior_delivered;
3562 NET_ADD_STATS(net, LINUX_MIB_TCPDELIVERED, delivered);
3563 if (flag & FLAG_ECE) {
3564 tp->delivered_ce += delivered;
3565 NET_ADD_STATS(net, LINUX_MIB_TCPDELIVEREDCE, delivered);
3566 }
3567 return delivered;
3568}
3569
3570
3571static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3572{
3573 struct inet_connection_sock *icsk = inet_csk(sk);
3574 struct tcp_sock *tp = tcp_sk(sk);
3575 struct tcp_sacktag_state sack_state;
3576 struct rate_sample rs = { .prior_delivered = 0 };
3577 u32 prior_snd_una = tp->snd_una;
3578 bool is_sack_reneg = tp->is_sack_reneg;
3579 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3580 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3581 bool is_dupack = false;
3582 int prior_packets = tp->packets_out;
3583 u32 delivered = tp->delivered;
3584 u32 lost = tp->lost;
3585 int rexmit = REXMIT_NONE;
3586 u32 prior_fack;
3587
3588 sack_state.first_sackt = 0;
3589 sack_state.rate = &rs;
3590
3591
3592 prefetch(sk->tcp_rtx_queue.rb_node);
3593
3594
3595
3596
3597 if (before(ack, prior_snd_una)) {
3598
3599 if (before(ack, prior_snd_una - tp->max_window)) {
3600 if (!(flag & FLAG_NO_CHALLENGE_ACK))
3601 tcp_send_challenge_ack(sk, skb);
3602 return -1;
3603 }
3604 goto old_ack;
3605 }
3606
3607
3608
3609
3610 if (after(ack, tp->snd_nxt))
3611 goto invalid_ack;
3612
3613 if (after(ack, prior_snd_una)) {
3614 flag |= FLAG_SND_UNA_ADVANCED;
3615 icsk->icsk_retransmits = 0;
3616
3617#if IS_ENABLED(CONFIG_TLS_DEVICE)
3618 if (static_branch_unlikely(&clean_acked_data_enabled))
3619 if (icsk->icsk_clean_acked)
3620 icsk->icsk_clean_acked(sk, ack);
3621#endif
3622 }
3623
3624 prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
3625 rs.prior_in_flight = tcp_packets_in_flight(tp);
3626
3627
3628
3629
3630 if (flag & FLAG_UPDATE_TS_RECENT)
3631 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
3632
3633 if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
3634
3635
3636
3637
3638 tcp_update_wl(tp, ack_seq);
3639 tcp_snd_una_update(tp, ack);
3640 flag |= FLAG_WIN_UPDATE;
3641
3642 tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
3643
3644 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
3645 } else {
3646 u32 ack_ev_flags = CA_ACK_SLOWPATH;
3647
3648 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3649 flag |= FLAG_DATA;
3650 else
3651 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
3652
3653 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3654
3655 if (TCP_SKB_CB(skb)->sacked)
3656 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3657 &sack_state);
3658
3659 if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
3660 flag |= FLAG_ECE;
3661 ack_ev_flags |= CA_ACK_ECE;
3662 }
3663
3664 if (flag & FLAG_WIN_UPDATE)
3665 ack_ev_flags |= CA_ACK_WIN_UPDATE;
3666
3667 tcp_in_ack_event(sk, ack_ev_flags);
3668 }
3669
3670
3671
3672
3673 sk->sk_err_soft = 0;
3674 icsk->icsk_probes_out = 0;
3675 tp->rcv_tstamp = tcp_jiffies32;
3676 if (!prior_packets)
3677 goto no_queue;
3678
3679
3680 flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state);
3681
3682 tcp_rack_update_reo_wnd(sk, &rs);
3683
3684 if (tp->tlp_high_seq)
3685 tcp_process_tlp_ack(sk, ack, flag);
3686
3687 if (flag & FLAG_SET_XMIT_TIMER)
3688 tcp_set_xmit_timer(sk);
3689
3690 if (tcp_ack_is_dubious(sk, flag)) {
3691 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3692 tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
3693 &rexmit);
3694 }
3695
3696 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
3697 sk_dst_confirm(sk);
3698
3699 delivered = tcp_newly_delivered(sk, delivered, flag);
3700 lost = tp->lost - lost;
3701 rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
3702 tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
3703 tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
3704 tcp_xmit_recovery(sk, rexmit);
3705 return 1;
3706
3707no_queue:
3708
3709 if (flag & FLAG_DSACKING_ACK) {
3710 tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
3711 &rexmit);
3712 tcp_newly_delivered(sk, delivered, flag);
3713 }
3714
3715
3716
3717
3718 tcp_ack_probe(sk);
3719
3720 if (tp->tlp_high_seq)
3721 tcp_process_tlp_ack(sk, ack, flag);
3722 return 1;
3723
3724invalid_ack:
3725 SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3726 return -1;
3727
3728old_ack:
3729
3730
3731
3732 if (TCP_SKB_CB(skb)->sacked) {
3733 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3734 &sack_state);
3735 tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
3736 &rexmit);
3737 tcp_newly_delivered(sk, delivered, flag);
3738 tcp_xmit_recovery(sk, rexmit);
3739 }
3740
3741 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3742 return 0;
3743}
3744
3745static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
3746 bool syn, struct tcp_fastopen_cookie *foc,
3747 bool exp_opt)
3748{
3749
3750 if (!foc || !syn || len < 0 || (len & 1))
3751 return;
3752
3753 if (len >= TCP_FASTOPEN_COOKIE_MIN &&
3754 len <= TCP_FASTOPEN_COOKIE_MAX)
3755 memcpy(foc->val, cookie, len);
3756 else if (len != 0)
3757 len = -1;
3758 foc->len = len;
3759 foc->exp = exp_opt;
3760}
3761
3762static void smc_parse_options(const struct tcphdr *th,
3763 struct tcp_options_received *opt_rx,
3764 const unsigned char *ptr,
3765 int opsize)
3766{
3767#if IS_ENABLED(CONFIG_SMC)
3768 if (static_branch_unlikely(&tcp_have_smc)) {
3769 if (th->syn && !(opsize & 1) &&
3770 opsize >= TCPOLEN_EXP_SMC_BASE &&
3771 get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC)
3772 opt_rx->smc_ok = 1;
3773 }
3774#endif
3775}
3776
3777
3778
3779
3780
3781void tcp_parse_options(const struct net *net,
3782 const struct sk_buff *skb,
3783 struct tcp_options_received *opt_rx, int estab,
3784 struct tcp_fastopen_cookie *foc)
3785{
3786 const unsigned char *ptr;
3787 const struct tcphdr *th = tcp_hdr(skb);
3788 int length = (th->doff * 4) - sizeof(struct tcphdr);
3789
3790 ptr = (const unsigned char *)(th + 1);
3791 opt_rx->saw_tstamp = 0;
3792
3793 while (length > 0) {
3794 int opcode = *ptr++;
3795 int opsize;
3796
3797 switch (opcode) {
3798 case TCPOPT_EOL:
3799 return;
3800 case TCPOPT_NOP:
3801 length--;
3802 continue;
3803 default:
3804 opsize = *ptr++;
3805 if (opsize < 2)
3806 return;
3807 if (opsize > length)
3808 return;
3809 switch (opcode) {
3810 case TCPOPT_MSS:
3811 if (opsize == TCPOLEN_MSS && th->syn && !estab) {
3812 u16 in_mss = get_unaligned_be16(ptr);
3813 if (in_mss) {
3814 if (opt_rx->user_mss &&
3815 opt_rx->user_mss < in_mss)
3816 in_mss = opt_rx->user_mss;
3817 opt_rx->mss_clamp = in_mss;
3818 }
3819 }
3820 break;
3821 case TCPOPT_WINDOW:
3822 if (opsize == TCPOLEN_WINDOW && th->syn &&
3823 !estab && net->ipv4.sysctl_tcp_window_scaling) {
3824 __u8 snd_wscale = *(__u8 *)ptr;
3825 opt_rx->wscale_ok = 1;
3826 if (snd_wscale > TCP_MAX_WSCALE) {
3827 net_info_ratelimited("%s: Illegal window scaling value %d > %u received\n",
3828 __func__,
3829 snd_wscale,
3830 TCP_MAX_WSCALE);
3831 snd_wscale = TCP_MAX_WSCALE;
3832 }
3833 opt_rx->snd_wscale = snd_wscale;
3834 }
3835 break;
3836 case TCPOPT_TIMESTAMP:
3837 if ((opsize == TCPOLEN_TIMESTAMP) &&
3838 ((estab && opt_rx->tstamp_ok) ||
3839 (!estab && net->ipv4.sysctl_tcp_timestamps))) {
3840 opt_rx->saw_tstamp = 1;
3841 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
3842 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
3843 }
3844 break;
3845 case TCPOPT_SACK_PERM:
3846 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3847 !estab && net->ipv4.sysctl_tcp_sack) {
3848 opt_rx->sack_ok = TCP_SACK_SEEN;
3849 tcp_sack_reset(opt_rx);
3850 }
3851 break;
3852
3853 case TCPOPT_SACK:
3854 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
3855 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
3856 opt_rx->sack_ok) {
3857 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
3858 }
3859 break;
3860#ifdef CONFIG_TCP_MD5SIG
3861 case TCPOPT_MD5SIG:
3862
3863
3864
3865
3866 break;
3867#endif
3868 case TCPOPT_FASTOPEN:
3869 tcp_parse_fastopen_option(
3870 opsize - TCPOLEN_FASTOPEN_BASE,
3871 ptr, th->syn, foc, false);
3872 break;
3873
3874 case TCPOPT_EXP:
3875
3876
3877
3878 if (opsize >= TCPOLEN_EXP_FASTOPEN_BASE &&
3879 get_unaligned_be16(ptr) ==
3880 TCPOPT_FASTOPEN_MAGIC)
3881 tcp_parse_fastopen_option(opsize -
3882 TCPOLEN_EXP_FASTOPEN_BASE,
3883 ptr + 2, th->syn, foc, true);
3884 else
3885 smc_parse_options(th, opt_rx, ptr,
3886 opsize);
3887 break;
3888
3889 }
3890 ptr += opsize-2;
3891 length -= opsize;
3892 }
3893 }
3894}
3895EXPORT_SYMBOL(tcp_parse_options);
3896
3897static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
3898{
3899 const __be32 *ptr = (const __be32 *)(th + 1);
3900
3901 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
3902 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
3903 tp->rx_opt.saw_tstamp = 1;
3904 ++ptr;
3905 tp->rx_opt.rcv_tsval = ntohl(*ptr);
3906 ++ptr;
3907 if (*ptr)
3908 tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
3909 else
3910 tp->rx_opt.rcv_tsecr = 0;
3911 return true;
3912 }
3913 return false;
3914}
3915
3916
3917
3918
3919static bool tcp_fast_parse_options(const struct net *net,
3920 const struct sk_buff *skb,
3921 const struct tcphdr *th, struct tcp_sock *tp)
3922{
3923
3924
3925
3926 if (th->doff == (sizeof(*th) / 4)) {
3927 tp->rx_opt.saw_tstamp = 0;
3928 return false;
3929 } else if (tp->rx_opt.tstamp_ok &&
3930 th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
3931 if (tcp_parse_aligned_timestamp(tp, th))
3932 return true;
3933 }
3934
3935 tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL);
3936 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
3937 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
3938
3939 return true;
3940}
3941
3942#ifdef CONFIG_TCP_MD5SIG
3943
3944
3945
3946const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
3947{
3948 int length = (th->doff << 2) - sizeof(*th);
3949 const u8 *ptr = (const u8 *)(th + 1);
3950
3951
3952 while (length >= TCPOLEN_MD5SIG) {
3953 int opcode = *ptr++;
3954 int opsize;
3955
3956 switch (opcode) {
3957 case TCPOPT_EOL:
3958 return NULL;
3959 case TCPOPT_NOP:
3960 length--;
3961 continue;
3962 default:
3963 opsize = *ptr++;
3964 if (opsize < 2 || opsize > length)
3965 return NULL;
3966 if (opcode == TCPOPT_MD5SIG)
3967 return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
3968 }
3969 ptr += opsize - 2;
3970 length -= opsize;
3971 }
3972 return NULL;
3973}
3974EXPORT_SYMBOL(tcp_parse_md5sig_option);
3975#endif
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
4001{
4002 const struct tcp_sock *tp = tcp_sk(sk);
4003 const struct tcphdr *th = tcp_hdr(skb);
4004 u32 seq = TCP_SKB_CB(skb)->seq;
4005 u32 ack = TCP_SKB_CB(skb)->ack_seq;
4006
4007 return (
4008 (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
4009
4010
4011 ack == tp->snd_una &&
4012
4013
4014 !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
4015
4016
4017 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
4018}
4019
4020static inline bool tcp_paws_discard(const struct sock *sk,
4021 const struct sk_buff *skb)
4022{
4023 const struct tcp_sock *tp = tcp_sk(sk);
4024
4025 return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
4026 !tcp_disordered_ack(sk, skb);
4027}
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
4043{
4044 return !before(end_seq, tp->rcv_wup) &&
4045 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
4046}
4047
4048
4049void tcp_reset(struct sock *sk)
4050{
4051 trace_tcp_receive_reset(sk);
4052
4053
4054 switch (sk->sk_state) {
4055 case TCP_SYN_SENT:
4056 sk->sk_err = ECONNREFUSED;
4057 break;
4058 case TCP_CLOSE_WAIT:
4059 sk->sk_err = EPIPE;
4060 break;
4061 case TCP_CLOSE:
4062 return;
4063 default:
4064 sk->sk_err = ECONNRESET;
4065 }
4066
4067 smp_wmb();
4068
4069 tcp_write_queue_purge(sk);
4070 tcp_done(sk);
4071
4072 if (!sock_flag(sk, SOCK_DEAD))
4073 sk->sk_error_report(sk);
4074}
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090void tcp_fin(struct sock *sk)
4091{
4092 struct tcp_sock *tp = tcp_sk(sk);
4093
4094 inet_csk_schedule_ack(sk);
4095
4096 sk->sk_shutdown |= RCV_SHUTDOWN;
4097 sock_set_flag(sk, SOCK_DONE);
4098
4099 switch (sk->sk_state) {
4100 case TCP_SYN_RECV:
4101 case TCP_ESTABLISHED:
4102
4103 tcp_set_state(sk, TCP_CLOSE_WAIT);
4104 inet_csk(sk)->icsk_ack.pingpong = 1;
4105 break;
4106
4107 case TCP_CLOSE_WAIT:
4108 case TCP_CLOSING:
4109
4110
4111
4112 break;
4113 case TCP_LAST_ACK:
4114
4115 break;
4116
4117 case TCP_FIN_WAIT1:
4118
4119
4120
4121
4122 tcp_send_ack(sk);
4123 tcp_set_state(sk, TCP_CLOSING);
4124 break;
4125 case TCP_FIN_WAIT2:
4126
4127 tcp_send_ack(sk);
4128 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
4129 break;
4130 default:
4131
4132
4133
4134 pr_err("%s: Impossible, sk->sk_state=%d\n",
4135 __func__, sk->sk_state);
4136 break;
4137 }
4138
4139
4140
4141
4142 skb_rbtree_purge(&tp->out_of_order_queue);
4143 if (tcp_is_sack(tp))
4144 tcp_sack_reset(&tp->rx_opt);
4145 sk_mem_reclaim(sk);
4146
4147 if (!sock_flag(sk, SOCK_DEAD)) {
4148 sk->sk_state_change(sk);
4149
4150
4151 if (sk->sk_shutdown == SHUTDOWN_MASK ||
4152 sk->sk_state == TCP_CLOSE)
4153 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
4154 else
4155 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
4156 }
4157}
4158
4159static inline bool tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
4160 u32 end_seq)
4161{
4162 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
4163 if (before(seq, sp->start_seq))
4164 sp->start_seq = seq;
4165 if (after(end_seq, sp->end_seq))
4166 sp->end_seq = end_seq;
4167 return true;
4168 }
4169 return false;
4170}
4171
4172static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4173{
4174 struct tcp_sock *tp = tcp_sk(sk);
4175
4176 if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
4177 int mib_idx;
4178
4179 if (before(seq, tp->rcv_nxt))
4180 mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
4181 else
4182 mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
4183
4184 NET_INC_STATS(sock_net(sk), mib_idx);
4185
4186 tp->rx_opt.dsack = 1;
4187 tp->duplicate_sack[0].start_seq = seq;
4188 tp->duplicate_sack[0].end_seq = end_seq;
4189 }
4190}
4191
4192static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
4193{
4194 struct tcp_sock *tp = tcp_sk(sk);
4195
4196 if (!tp->rx_opt.dsack)
4197 tcp_dsack_set(sk, seq, end_seq);
4198 else
4199 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
4200}
4201
4202static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
4203{
4204 struct tcp_sock *tp = tcp_sk(sk);
4205
4206 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
4207 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4208 NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4209 tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
4210
4211 if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
4212 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4213
4214 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
4215 end_seq = tp->rcv_nxt;
4216 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
4217 }
4218 }
4219
4220 tcp_send_ack(sk);
4221}
4222
4223
4224
4225
4226static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
4227{
4228 int this_sack;
4229 struct tcp_sack_block *sp = &tp->selective_acks[0];
4230 struct tcp_sack_block *swalk = sp + 1;
4231
4232
4233
4234
4235 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
4236 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
4237 int i;
4238
4239
4240
4241
4242 tp->rx_opt.num_sacks--;
4243 for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
4244 sp[i] = sp[i + 1];
4245 continue;
4246 }
4247 this_sack++, swalk++;
4248 }
4249}
4250
4251static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
4252{
4253 struct tcp_sock *tp = tcp_sk(sk);
4254 struct tcp_sack_block *sp = &tp->selective_acks[0];
4255 int cur_sacks = tp->rx_opt.num_sacks;
4256 int this_sack;
4257
4258 if (!cur_sacks)
4259 goto new_sack;
4260
4261 for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
4262 if (tcp_sack_extend(sp, seq, end_seq)) {
4263
4264 for (; this_sack > 0; this_sack--, sp--)
4265 swap(*sp, *(sp - 1));
4266 if (cur_sacks > 1)
4267 tcp_sack_maybe_coalesce(tp);
4268 return;
4269 }
4270 }
4271
4272
4273
4274
4275
4276
4277
4278 if (this_sack >= TCP_NUM_SACKS) {
4279 if (tp->compressed_ack)
4280 tcp_send_ack(sk);
4281 this_sack--;
4282 tp->rx_opt.num_sacks--;
4283 sp--;
4284 }
4285 for (; this_sack > 0; this_sack--, sp--)
4286 *sp = *(sp - 1);
4287
4288new_sack:
4289
4290 sp->start_seq = seq;
4291 sp->end_seq = end_seq;
4292 tp->rx_opt.num_sacks++;
4293}
4294
4295
4296
4297static void tcp_sack_remove(struct tcp_sock *tp)
4298{
4299 struct tcp_sack_block *sp = &tp->selective_acks[0];
4300 int num_sacks = tp->rx_opt.num_sacks;
4301 int this_sack;
4302
4303
4304 if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
4305 tp->rx_opt.num_sacks = 0;
4306 return;
4307 }
4308
4309 for (this_sack = 0; this_sack < num_sacks;) {
4310
4311 if (!before(tp->rcv_nxt, sp->start_seq)) {
4312 int i;
4313
4314
4315 WARN_ON(before(tp->rcv_nxt, sp->end_seq));
4316
4317
4318 for (i = this_sack+1; i < num_sacks; i++)
4319 tp->selective_acks[i-1] = tp->selective_acks[i];
4320 num_sacks--;
4321 continue;
4322 }
4323 this_sack++;
4324 sp++;
4325 }
4326 tp->rx_opt.num_sacks = num_sacks;
4327}
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343static bool tcp_try_coalesce(struct sock *sk,
4344 struct sk_buff *to,
4345 struct sk_buff *from,
4346 bool *fragstolen)
4347{
4348 int delta;
4349
4350 *fragstolen = false;
4351
4352
4353 if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
4354 return false;
4355
4356#ifdef CONFIG_TLS_DEVICE
4357 if (from->decrypted != to->decrypted)
4358 return false;
4359#endif
4360
4361 if (!skb_try_coalesce(to, from, fragstolen, &delta))
4362 return false;
4363
4364 atomic_add(delta, &sk->sk_rmem_alloc);
4365 sk_mem_charge(sk, delta);
4366 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
4367 TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
4368 TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
4369 TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
4370
4371 if (TCP_SKB_CB(from)->has_rxtstamp) {
4372 TCP_SKB_CB(to)->has_rxtstamp = true;
4373 to->tstamp = from->tstamp;
4374 }
4375
4376 return true;
4377}
4378
4379static bool tcp_ooo_try_coalesce(struct sock *sk,
4380 struct sk_buff *to,
4381 struct sk_buff *from,
4382 bool *fragstolen)
4383{
4384 bool res = tcp_try_coalesce(sk, to, from, fragstolen);
4385
4386
4387 if (res) {
4388 u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) +
4389 max_t(u16, 1, skb_shinfo(from)->gso_segs);
4390
4391 skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
4392 }
4393 return res;
4394}
4395
4396static void tcp_drop(struct sock *sk, struct sk_buff *skb)
4397{
4398 sk_drops_add(sk, skb);
4399 __kfree_skb(skb);
4400}
4401
4402
4403
4404
4405static void tcp_ofo_queue(struct sock *sk)
4406{
4407 struct tcp_sock *tp = tcp_sk(sk);
4408 __u32 dsack_high = tp->rcv_nxt;
4409 bool fin, fragstolen, eaten;
4410 struct sk_buff *skb, *tail;
4411 struct rb_node *p;
4412
4413 p = rb_first(&tp->out_of_order_queue);
4414 while (p) {
4415 skb = rb_to_skb(p);
4416 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
4417 break;
4418
4419 if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
4420 __u32 dsack = dsack_high;
4421 if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
4422 dsack_high = TCP_SKB_CB(skb)->end_seq;
4423 tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
4424 }
4425 p = rb_next(p);
4426 rb_erase(&skb->rbnode, &tp->out_of_order_queue);
4427
4428 if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
4429 SOCK_DEBUG(sk, "ofo packet was already received\n");
4430 tcp_drop(sk, skb);
4431 continue;
4432 }
4433 SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
4434 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4435 TCP_SKB_CB(skb)->end_seq);
4436
4437 tail = skb_peek_tail(&sk->sk_receive_queue);
4438 eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
4439 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
4440 fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
4441 if (!eaten)
4442 __skb_queue_tail(&sk->sk_receive_queue, skb);
4443 else
4444 kfree_skb_partial(skb, fragstolen);
4445
4446 if (unlikely(fin)) {
4447 tcp_fin(sk);
4448
4449
4450
4451 break;
4452 }
4453 }
4454}
4455
4456static bool tcp_prune_ofo_queue(struct sock *sk);
4457static int tcp_prune_queue(struct sock *sk);
4458
4459static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
4460 unsigned int size)
4461{
4462 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
4463 !sk_rmem_schedule(sk, skb, size)) {
4464
4465 if (tcp_prune_queue(sk) < 0)
4466 return -1;
4467
4468 while (!sk_rmem_schedule(sk, skb, size)) {
4469 if (!tcp_prune_ofo_queue(sk))
4470 return -1;
4471 }
4472 }
4473 return 0;
4474}
4475
4476static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4477{
4478 struct tcp_sock *tp = tcp_sk(sk);
4479 struct rb_node **p, *parent;
4480 struct sk_buff *skb1;
4481 u32 seq, end_seq;
4482 bool fragstolen;
4483
4484 tcp_ecn_check_ce(sk, skb);
4485
4486 if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
4487 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
4488 tcp_drop(sk, skb);
4489 return;
4490 }
4491
4492
4493 tp->pred_flags = 0;
4494 inet_csk_schedule_ack(sk);
4495
4496 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
4497 seq = TCP_SKB_CB(skb)->seq;
4498 end_seq = TCP_SKB_CB(skb)->end_seq;
4499 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
4500 tp->rcv_nxt, seq, end_seq);
4501
4502 p = &tp->out_of_order_queue.rb_node;
4503 if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
4504
4505 if (tcp_is_sack(tp)) {
4506 tp->rx_opt.num_sacks = 1;
4507 tp->selective_acks[0].start_seq = seq;
4508 tp->selective_acks[0].end_seq = end_seq;
4509 }
4510 rb_link_node(&skb->rbnode, NULL, p);
4511 rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
4512 tp->ooo_last_skb = skb;
4513 goto end;
4514 }
4515
4516
4517
4518
4519 if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb,
4520 skb, &fragstolen)) {
4521coalesce_done:
4522 tcp_grow_window(sk, skb);
4523 kfree_skb_partial(skb, fragstolen);
4524 skb = NULL;
4525 goto add_sack;
4526 }
4527
4528 if (!before(seq, TCP_SKB_CB(tp->ooo_last_skb)->end_seq)) {
4529 parent = &tp->ooo_last_skb->rbnode;
4530 p = &parent->rb_right;
4531 goto insert;
4532 }
4533
4534
4535 parent = NULL;
4536 while (*p) {
4537 parent = *p;
4538 skb1 = rb_to_skb(parent);
4539 if (before(seq, TCP_SKB_CB(skb1)->seq)) {
4540 p = &parent->rb_left;
4541 continue;
4542 }
4543 if (before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4544 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4545
4546 NET_INC_STATS(sock_net(sk),
4547 LINUX_MIB_TCPOFOMERGE);
4548 tcp_drop(sk, skb);
4549 skb = NULL;
4550 tcp_dsack_set(sk, seq, end_seq);
4551 goto add_sack;
4552 }
4553 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4554
4555 tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq);
4556 } else {
4557
4558
4559
4560 rb_replace_node(&skb1->rbnode, &skb->rbnode,
4561 &tp->out_of_order_queue);
4562 tcp_dsack_extend(sk,
4563 TCP_SKB_CB(skb1)->seq,
4564 TCP_SKB_CB(skb1)->end_seq);
4565 NET_INC_STATS(sock_net(sk),
4566 LINUX_MIB_TCPOFOMERGE);
4567 tcp_drop(sk, skb1);
4568 goto merge_right;
4569 }
4570 } else if (tcp_ooo_try_coalesce(sk, skb1,
4571 skb, &fragstolen)) {
4572 goto coalesce_done;
4573 }
4574 p = &parent->rb_right;
4575 }
4576insert:
4577
4578 rb_link_node(&skb->rbnode, parent, p);
4579 rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
4580
4581merge_right:
4582
4583 while ((skb1 = skb_rb_next(skb)) != NULL) {
4584 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4585 break;
4586 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4587 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4588 end_seq);
4589 break;
4590 }
4591 rb_erase(&skb1->rbnode, &tp->out_of_order_queue);
4592 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4593 TCP_SKB_CB(skb1)->end_seq);
4594 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
4595 tcp_drop(sk, skb1);
4596 }
4597
4598 if (!skb1)
4599 tp->ooo_last_skb = skb;
4600
4601add_sack:
4602 if (tcp_is_sack(tp))
4603 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4604end:
4605 if (skb) {
4606 tcp_grow_window(sk, skb);
4607 skb_condense(skb);
4608 skb_set_owner_r(skb, sk);
4609 }
4610}
4611
4612static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
4613 bool *fragstolen)
4614{
4615 int eaten;
4616 struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
4617
4618 __skb_pull(skb, hdrlen);
4619 eaten = (tail &&
4620 tcp_try_coalesce(sk, tail,
4621 skb, fragstolen)) ? 1 : 0;
4622 tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
4623 if (!eaten) {
4624 __skb_queue_tail(&sk->sk_receive_queue, skb);
4625 skb_set_owner_r(skb, sk);
4626 }
4627 return eaten;
4628}
4629
4630int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
4631{
4632 struct sk_buff *skb;
4633 int err = -ENOMEM;
4634 int data_len = 0;
4635 bool fragstolen;
4636
4637 if (size == 0)
4638 return 0;
4639
4640 if (size > PAGE_SIZE) {
4641 int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS);
4642
4643 data_len = npages << PAGE_SHIFT;
4644 size = data_len + (size & ~PAGE_MASK);
4645 }
4646 skb = alloc_skb_with_frags(size - data_len, data_len,
4647 PAGE_ALLOC_COSTLY_ORDER,
4648 &err, sk->sk_allocation);
4649 if (!skb)
4650 goto err;
4651
4652 skb_put(skb, size - data_len);
4653 skb->data_len = data_len;
4654 skb->len = size;
4655
4656 if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
4657 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
4658 goto err_free;
4659 }
4660
4661 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
4662 if (err)
4663 goto err_free;
4664
4665 TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
4666 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
4667 TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
4668
4669 if (tcp_queue_rcv(sk, skb, 0, &fragstolen)) {
4670 WARN_ON_ONCE(fragstolen);
4671 __kfree_skb(skb);
4672 }
4673 return size;
4674
4675err_free:
4676 kfree_skb(skb);
4677err:
4678 return err;
4679
4680}
4681
4682void tcp_data_ready(struct sock *sk)
4683{
4684 const struct tcp_sock *tp = tcp_sk(sk);
4685 int avail = tp->rcv_nxt - tp->copied_seq;
4686
4687 if (avail < sk->sk_rcvlowat && !sock_flag(sk, SOCK_DONE))
4688 return;
4689
4690 sk->sk_data_ready(sk);
4691}
4692
4693static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4694{
4695 struct tcp_sock *tp = tcp_sk(sk);
4696 bool fragstolen;
4697 int eaten;
4698
4699 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
4700 __kfree_skb(skb);
4701 return;
4702 }
4703 skb_dst_drop(skb);
4704 __skb_pull(skb, tcp_hdr(skb)->doff * 4);
4705
4706 tcp_ecn_accept_cwr(sk, skb);
4707
4708 tp->rx_opt.dsack = 0;
4709
4710
4711
4712
4713
4714 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
4715 if (tcp_receive_window(tp) == 0) {
4716 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
4717 goto out_of_window;
4718 }
4719
4720
4721queue_and_out:
4722 if (skb_queue_len(&sk->sk_receive_queue) == 0)
4723 sk_forced_mem_schedule(sk, skb->truesize);
4724 else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
4725 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
4726 goto drop;
4727 }
4728
4729 eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
4730 if (skb->len)
4731 tcp_event_data_recv(sk, skb);
4732 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
4733 tcp_fin(sk);
4734
4735 if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
4736 tcp_ofo_queue(sk);
4737
4738
4739
4740
4741 if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
4742 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
4743 }
4744
4745 if (tp->rx_opt.num_sacks)
4746 tcp_sack_remove(tp);
4747
4748 tcp_fast_path_check(sk);
4749
4750 if (eaten > 0)
4751 kfree_skb_partial(skb, fragstolen);
4752 if (!sock_flag(sk, SOCK_DEAD))
4753 tcp_data_ready(sk);
4754 return;
4755 }
4756
4757 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4758
4759 NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4760 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4761
4762out_of_window:
4763 tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
4764 inet_csk_schedule_ack(sk);
4765drop:
4766 tcp_drop(sk, skb);
4767 return;
4768 }
4769
4770
4771 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
4772 goto out_of_window;
4773
4774 if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4775
4776 SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
4777 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4778 TCP_SKB_CB(skb)->end_seq);
4779
4780 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
4781
4782
4783
4784
4785 if (!tcp_receive_window(tp)) {
4786 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
4787 goto out_of_window;
4788 }
4789 goto queue_and_out;
4790 }
4791
4792 tcp_data_queue_ofo(sk, skb);
4793}
4794
4795static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *list)
4796{
4797 if (list)
4798 return !skb_queue_is_last(list, skb) ? skb->next : NULL;
4799
4800 return skb_rb_next(skb);
4801}
4802
4803static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
4804 struct sk_buff_head *list,
4805 struct rb_root *root)
4806{
4807 struct sk_buff *next = tcp_skb_next(skb, list);
4808
4809 if (list)
4810 __skb_unlink(skb, list);
4811 else
4812 rb_erase(&skb->rbnode, root);
4813
4814 __kfree_skb(skb);
4815 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
4816
4817 return next;
4818}
4819
4820
4821void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
4822{
4823 struct rb_node **p = &root->rb_node;
4824 struct rb_node *parent = NULL;
4825 struct sk_buff *skb1;
4826
4827 while (*p) {
4828 parent = *p;
4829 skb1 = rb_to_skb(parent);
4830 if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
4831 p = &parent->rb_left;
4832 else
4833 p = &parent->rb_right;
4834 }
4835 rb_link_node(&skb->rbnode, parent, p);
4836 rb_insert_color(&skb->rbnode, root);
4837}
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847static void
4848tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
4849 struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end)
4850{
4851 struct sk_buff *skb = head, *n;
4852 struct sk_buff_head tmp;
4853 bool end_of_skbs;
4854
4855
4856
4857
4858restart:
4859 for (end_of_skbs = true; skb != NULL && skb != tail; skb = n) {
4860 n = tcp_skb_next(skb, list);
4861
4862
4863 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4864 skb = tcp_collapse_one(sk, skb, list, root);
4865 if (!skb)
4866 break;
4867 goto restart;
4868 }
4869
4870
4871
4872
4873
4874
4875 if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
4876 (tcp_win_from_space(sk, skb->truesize) > skb->len ||
4877 before(TCP_SKB_CB(skb)->seq, start))) {
4878 end_of_skbs = false;
4879 break;
4880 }
4881
4882 if (n && n != tail &&
4883 TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) {
4884 end_of_skbs = false;
4885 break;
4886 }
4887
4888
4889 start = TCP_SKB_CB(skb)->end_seq;
4890 }
4891 if (end_of_skbs ||
4892 (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
4893 return;
4894
4895 __skb_queue_head_init(&tmp);
4896
4897 while (before(start, end)) {
4898 int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start);
4899 struct sk_buff *nskb;
4900
4901 nskb = alloc_skb(copy, GFP_ATOMIC);
4902 if (!nskb)
4903 break;
4904
4905 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
4906#ifdef CONFIG_TLS_DEVICE
4907 nskb->decrypted = skb->decrypted;
4908#endif
4909 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
4910 if (list)
4911 __skb_queue_before(list, skb, nskb);
4912 else
4913 __skb_queue_tail(&tmp, nskb);
4914 skb_set_owner_r(nskb, sk);
4915
4916
4917 while (copy > 0) {
4918 int offset = start - TCP_SKB_CB(skb)->seq;
4919 int size = TCP_SKB_CB(skb)->end_seq - start;
4920
4921 BUG_ON(offset < 0);
4922 if (size > 0) {
4923 size = min(copy, size);
4924 if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
4925 BUG();
4926 TCP_SKB_CB(nskb)->end_seq += size;
4927 copy -= size;
4928 start += size;
4929 }
4930 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4931 skb = tcp_collapse_one(sk, skb, list, root);
4932 if (!skb ||
4933 skb == tail ||
4934 (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
4935 goto end;
4936#ifdef CONFIG_TLS_DEVICE
4937 if (skb->decrypted != nskb->decrypted)
4938 goto end;
4939#endif
4940 }
4941 }
4942 }
4943end:
4944 skb_queue_walk_safe(&tmp, skb, n)
4945 tcp_rbtree_insert(root, skb);
4946}
4947
4948
4949
4950
4951static void tcp_collapse_ofo_queue(struct sock *sk)
4952{
4953 struct tcp_sock *tp = tcp_sk(sk);
4954 u32 range_truesize, sum_tiny = 0;
4955 struct sk_buff *skb, *head;
4956 u32 start, end;
4957
4958 skb = skb_rb_first(&tp->out_of_order_queue);
4959new_range:
4960 if (!skb) {
4961 tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue);
4962 return;
4963 }
4964 start = TCP_SKB_CB(skb)->seq;
4965 end = TCP_SKB_CB(skb)->end_seq;
4966 range_truesize = skb->truesize;
4967
4968 for (head = skb;;) {
4969 skb = skb_rb_next(skb);
4970
4971
4972
4973
4974 if (!skb ||
4975 after(TCP_SKB_CB(skb)->seq, end) ||
4976 before(TCP_SKB_CB(skb)->end_seq, start)) {
4977
4978 if (range_truesize != head->truesize ||
4979 end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) {
4980 tcp_collapse(sk, NULL, &tp->out_of_order_queue,
4981 head, skb, start, end);
4982 } else {
4983 sum_tiny += range_truesize;
4984 if (sum_tiny > sk->sk_rcvbuf >> 3)
4985 return;
4986 }
4987 goto new_range;
4988 }
4989
4990 range_truesize += skb->truesize;
4991 if (unlikely(before(TCP_SKB_CB(skb)->seq, start)))
4992 start = TCP_SKB_CB(skb)->seq;
4993 if (after(TCP_SKB_CB(skb)->end_seq, end))
4994 end = TCP_SKB_CB(skb)->end_seq;
4995 }
4996}
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009static bool tcp_prune_ofo_queue(struct sock *sk)
5010{
5011 struct tcp_sock *tp = tcp_sk(sk);
5012 struct rb_node *node, *prev;
5013 int goal;
5014
5015 if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
5016 return false;
5017
5018 NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
5019 goal = sk->sk_rcvbuf >> 3;
5020 node = &tp->ooo_last_skb->rbnode;
5021 do {
5022 prev = rb_prev(node);
5023 rb_erase(node, &tp->out_of_order_queue);
5024 goal -= rb_to_skb(node)->truesize;
5025 tcp_drop(sk, rb_to_skb(node));
5026 if (!prev || goal <= 0) {
5027 sk_mem_reclaim(sk);
5028 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
5029 !tcp_under_memory_pressure(sk))
5030 break;
5031 goal = sk->sk_rcvbuf >> 3;
5032 }
5033 node = prev;
5034 } while (node);
5035 tp->ooo_last_skb = rb_to_skb(prev);
5036
5037
5038
5039
5040
5041
5042 if (tp->rx_opt.sack_ok)
5043 tcp_sack_reset(&tp->rx_opt);
5044 return true;
5045}
5046
5047
5048
5049
5050
5051
5052
5053
5054static int tcp_prune_queue(struct sock *sk)
5055{
5056 struct tcp_sock *tp = tcp_sk(sk);
5057
5058 SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
5059
5060 NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
5061
5062 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
5063 tcp_clamp_window(sk);
5064 else if (tcp_under_memory_pressure(sk))
5065 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
5066
5067 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
5068 return 0;
5069
5070 tcp_collapse_ofo_queue(sk);
5071 if (!skb_queue_empty(&sk->sk_receive_queue))
5072 tcp_collapse(sk, &sk->sk_receive_queue, NULL,
5073 skb_peek(&sk->sk_receive_queue),
5074 NULL,
5075 tp->copied_seq, tp->rcv_nxt);
5076 sk_mem_reclaim(sk);
5077
5078 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
5079 return 0;
5080
5081
5082
5083
5084 tcp_prune_ofo_queue(sk);
5085
5086 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
5087 return 0;
5088
5089
5090
5091
5092
5093 NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED);
5094
5095
5096 tp->pred_flags = 0;
5097 return -1;
5098}
5099
5100static bool tcp_should_expand_sndbuf(const struct sock *sk)
5101{
5102 const struct tcp_sock *tp = tcp_sk(sk);
5103
5104
5105
5106
5107 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
5108 return false;
5109
5110
5111 if (tcp_under_memory_pressure(sk))
5112 return false;
5113
5114
5115 if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
5116 return false;
5117
5118
5119 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
5120 return false;
5121
5122 return true;
5123}
5124
5125
5126
5127
5128
5129
5130
5131static void tcp_new_space(struct sock *sk)
5132{
5133 struct tcp_sock *tp = tcp_sk(sk);
5134
5135 if (tcp_should_expand_sndbuf(sk)) {
5136 tcp_sndbuf_expand(sk);
5137 tp->snd_cwnd_stamp = tcp_jiffies32;
5138 }
5139
5140 sk->sk_write_space(sk);
5141}
5142
5143static void tcp_check_space(struct sock *sk)
5144{
5145 if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
5146 sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
5147
5148 smp_mb();
5149 if (sk->sk_socket &&
5150 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
5151 tcp_new_space(sk);
5152 if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
5153 tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
5154 }
5155 }
5156}
5157
5158static inline void tcp_data_snd_check(struct sock *sk)
5159{
5160 tcp_push_pending_frames(sk);
5161 tcp_check_space(sk);
5162}
5163
5164
5165
5166
5167static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
5168{
5169 struct tcp_sock *tp = tcp_sk(sk);
5170 unsigned long rtt, delay;
5171
5172
5173 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
5174
5175
5176
5177
5178
5179 (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
5180 __tcp_select_window(sk) >= tp->rcv_wnd)) ||
5181
5182 tcp_in_quickack_mode(sk) ||
5183
5184 inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOW) {
5185send_now:
5186 tcp_send_ack(sk);
5187 return;
5188 }
5189
5190 if (!ofo_possible || RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
5191 tcp_send_delayed_ack(sk);
5192 return;
5193 }
5194
5195 if (!tcp_is_sack(tp) ||
5196 tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
5197 goto send_now;
5198 tp->compressed_ack++;
5199
5200 if (hrtimer_is_queued(&tp->compressed_ack_timer))
5201 return;
5202
5203
5204
5205 rtt = tp->rcv_rtt_est.rtt_us;
5206 if (tp->srtt_us && tp->srtt_us < rtt)
5207 rtt = tp->srtt_us;
5208
5209 delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
5210 rtt * (NSEC_PER_USEC >> 3)/20);
5211 sock_hold(sk);
5212 hrtimer_start(&tp->compressed_ack_timer, ns_to_ktime(delay),
5213 HRTIMER_MODE_REL_PINNED_SOFT);
5214}
5215
5216static inline void tcp_ack_snd_check(struct sock *sk)
5217{
5218 if (!inet_csk_ack_scheduled(sk)) {
5219
5220 return;
5221 }
5222 __tcp_ack_snd_check(sk, 1);
5223}
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
5236{
5237 struct tcp_sock *tp = tcp_sk(sk);
5238 u32 ptr = ntohs(th->urg_ptr);
5239
5240 if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
5241 ptr--;
5242 ptr += ntohl(th->seq);
5243
5244
5245 if (after(tp->copied_seq, ptr))
5246 return;
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258 if (before(ptr, tp->rcv_nxt))
5259 return;
5260
5261
5262 if (tp->urg_data && !after(ptr, tp->urg_seq))
5263 return;
5264
5265
5266 sk_send_sigurg(sk);
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
5284 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
5285 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
5286 tp->copied_seq++;
5287 if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
5288 __skb_unlink(skb, &sk->sk_receive_queue);
5289 __kfree_skb(skb);
5290 }
5291 }
5292
5293 tp->urg_data = TCP_URG_NOTYET;
5294 tp->urg_seq = ptr;
5295
5296
5297 tp->pred_flags = 0;
5298}
5299
5300
5301static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
5302{
5303 struct tcp_sock *tp = tcp_sk(sk);
5304
5305
5306 if (th->urg)
5307 tcp_check_urg(sk, th);
5308
5309
5310 if (tp->urg_data == TCP_URG_NOTYET) {
5311 u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
5312 th->syn;
5313
5314
5315 if (ptr < skb->len) {
5316 u8 tmp;
5317 if (skb_copy_bits(skb, ptr, &tmp, 1))
5318 BUG();
5319 tp->urg_data = TCP_URG_VALID | tmp;
5320 if (!sock_flag(sk, SOCK_DEAD))
5321 sk->sk_data_ready(sk);
5322 }
5323 }
5324}
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334static bool tcp_reset_check(const struct sock *sk, const struct sk_buff *skb)
5335{
5336 struct tcp_sock *tp = tcp_sk(sk);
5337
5338 return unlikely(TCP_SKB_CB(skb)->seq == (tp->rcv_nxt - 1) &&
5339 (1 << sk->sk_state) & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK |
5340 TCPF_CLOSING));
5341}
5342
5343
5344
5345
5346static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5347 const struct tcphdr *th, int syn_inerr)
5348{
5349 struct tcp_sock *tp = tcp_sk(sk);
5350 bool rst_seq_match = false;
5351
5352
5353 if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) &&
5354 tp->rx_opt.saw_tstamp &&
5355 tcp_paws_discard(sk, skb)) {
5356 if (!th->rst) {
5357 NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
5358 if (!tcp_oow_rate_limited(sock_net(sk), skb,
5359 LINUX_MIB_TCPACKSKIPPEDPAWS,
5360 &tp->last_oow_ack_time))
5361 tcp_send_dupack(sk, skb);
5362 goto discard;
5363 }
5364
5365 }
5366
5367
5368 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
5369
5370
5371
5372
5373
5374
5375 if (!th->rst) {
5376 if (th->syn)
5377 goto syn_challenge;
5378 if (!tcp_oow_rate_limited(sock_net(sk), skb,
5379 LINUX_MIB_TCPACKSKIPPEDSEQ,
5380 &tp->last_oow_ack_time))
5381 tcp_send_dupack(sk, skb);
5382 } else if (tcp_reset_check(sk, skb)) {
5383 tcp_reset(sk);
5384 }
5385 goto discard;
5386 }
5387
5388
5389 if (th->rst) {
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt ||
5400 tcp_reset_check(sk, skb)) {
5401 rst_seq_match = true;
5402 } else if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
5403 struct tcp_sack_block *sp = &tp->selective_acks[0];
5404 int max_sack = sp[0].end_seq;
5405 int this_sack;
5406
5407 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;
5408 ++this_sack) {
5409 max_sack = after(sp[this_sack].end_seq,
5410 max_sack) ?
5411 sp[this_sack].end_seq : max_sack;
5412 }
5413
5414 if (TCP_SKB_CB(skb)->seq == max_sack)
5415 rst_seq_match = true;
5416 }
5417
5418 if (rst_seq_match)
5419 tcp_reset(sk);
5420 else {
5421
5422
5423
5424
5425 if (tp->syn_fastopen && !tp->data_segs_in &&
5426 sk->sk_state == TCP_ESTABLISHED)
5427 tcp_fastopen_active_disable(sk);
5428 tcp_send_challenge_ack(sk, skb);
5429 }
5430 goto discard;
5431 }
5432
5433
5434
5435
5436
5437
5438 if (th->syn) {
5439syn_challenge:
5440 if (syn_inerr)
5441 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
5442 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
5443 tcp_send_challenge_ack(sk, skb);
5444 goto discard;
5445 }
5446
5447 return true;
5448
5449discard:
5450 tcp_drop(sk, skb);
5451 return false;
5452}
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
5478{
5479 const struct tcphdr *th = (const struct tcphdr *)skb->data;
5480 struct tcp_sock *tp = tcp_sk(sk);
5481 unsigned int len = skb->len;
5482
5483
5484 trace_tcp_probe(sk, skb);
5485
5486 tcp_mstamp_refresh(tp);
5487 if (unlikely(!sk->sk_rx_dst))
5488 inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504 tp->rx_opt.saw_tstamp = 0;
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
5516 TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
5517 !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
5518 int tcp_header_len = tp->tcp_header_len;
5519
5520
5521
5522
5523
5524
5525
5526 if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
5527
5528 if (!tcp_parse_aligned_timestamp(tp, th))
5529 goto slow_path;
5530
5531
5532 if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
5533 goto slow_path;
5534
5535
5536
5537
5538
5539
5540 }
5541
5542 if (len <= tcp_header_len) {
5543
5544 if (len == tcp_header_len) {
5545
5546
5547
5548
5549 if (tcp_header_len ==
5550 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5551 tp->rcv_nxt == tp->rcv_wup)
5552 tcp_store_ts_recent(tp);
5553
5554
5555
5556
5557 tcp_ack(sk, skb, 0);
5558 __kfree_skb(skb);
5559 tcp_data_snd_check(sk);
5560
5561
5562
5563
5564 tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
5565 return;
5566 } else {
5567 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
5568 goto discard;
5569 }
5570 } else {
5571 int eaten = 0;
5572 bool fragstolen = false;
5573
5574 if (tcp_checksum_complete(skb))
5575 goto csum_error;
5576
5577 if ((int)skb->truesize > sk->sk_forward_alloc)
5578 goto step5;
5579
5580
5581
5582
5583
5584 if (tcp_header_len ==
5585 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5586 tp->rcv_nxt == tp->rcv_wup)
5587 tcp_store_ts_recent(tp);
5588
5589 tcp_rcv_rtt_measure_ts(sk, skb);
5590
5591 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
5592
5593
5594 eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
5595 &fragstolen);
5596
5597 tcp_event_data_recv(sk, skb);
5598
5599 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
5600
5601 tcp_ack(sk, skb, FLAG_DATA);
5602 tcp_data_snd_check(sk);
5603 if (!inet_csk_ack_scheduled(sk))
5604 goto no_ack;
5605 }
5606
5607 __tcp_ack_snd_check(sk, 0);
5608no_ack:
5609 if (eaten)
5610 kfree_skb_partial(skb, fragstolen);
5611 tcp_data_ready(sk);
5612 return;
5613 }
5614 }
5615
5616slow_path:
5617 if (len < (th->doff << 2) || tcp_checksum_complete(skb))
5618 goto csum_error;
5619
5620 if (!th->ack && !th->rst && !th->syn)
5621 goto discard;
5622
5623
5624
5625
5626
5627 if (!tcp_validate_incoming(sk, skb, th, 1))
5628 return;
5629
5630step5:
5631 if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
5632 goto discard;
5633
5634 tcp_rcv_rtt_measure_ts(sk, skb);
5635
5636
5637 tcp_urg(sk, skb, th);
5638
5639
5640 tcp_data_queue(sk, skb);
5641
5642 tcp_data_snd_check(sk);
5643 tcp_ack_snd_check(sk);
5644 return;
5645
5646csum_error:
5647 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
5648 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
5649
5650discard:
5651 tcp_drop(sk, skb);
5652}
5653EXPORT_SYMBOL(tcp_rcv_established);
5654
5655void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
5656{
5657 struct tcp_sock *tp = tcp_sk(sk);
5658 struct inet_connection_sock *icsk = inet_csk(sk);
5659
5660 tcp_set_state(sk, TCP_ESTABLISHED);
5661 icsk->icsk_ack.lrcvtime = tcp_jiffies32;
5662
5663 if (skb) {
5664 icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
5665 security_inet_conn_established(sk, skb);
5666 sk_mark_napi_id(sk, skb);
5667 }
5668
5669 tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
5670
5671
5672
5673
5674 tp->lsndtime = tcp_jiffies32;
5675
5676 if (sock_flag(sk, SOCK_KEEPOPEN))
5677 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
5678
5679 if (!tp->rx_opt.snd_wscale)
5680 __tcp_fast_path_on(tp, tp->snd_wnd);
5681 else
5682 tp->pred_flags = 0;
5683}
5684
5685static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5686 struct tcp_fastopen_cookie *cookie)
5687{
5688 struct tcp_sock *tp = tcp_sk(sk);
5689 struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL;
5690 u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
5691 bool syn_drop = false;
5692
5693 if (mss == tp->rx_opt.user_mss) {
5694 struct tcp_options_received opt;
5695
5696
5697 tcp_clear_options(&opt);
5698 opt.user_mss = opt.mss_clamp = 0;
5699 tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL);
5700 mss = opt.mss_clamp;
5701 }
5702
5703 if (!tp->syn_fastopen) {
5704
5705 cookie->len = -1;
5706 } else if (tp->total_retrans) {
5707
5708
5709
5710
5711
5712 syn_drop = (cookie->len < 0 && data);
5713 } else if (cookie->len < 0 && !tp->syn_data) {
5714
5715
5716
5717
5718 try_exp = tp->syn_fastopen_exp ? 2 : 1;
5719 }
5720
5721 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
5722
5723 if (data) {
5724 skb_rbtree_walk_from(data) {
5725 if (__tcp_retransmit_skb(sk, data, 1))
5726 break;
5727 }
5728 tcp_rearm_rto(sk);
5729 NET_INC_STATS(sock_net(sk),
5730 LINUX_MIB_TCPFASTOPENACTIVEFAIL);
5731 return true;
5732 }
5733 tp->syn_data_acked = tp->syn_data;
5734 if (tp->syn_data_acked) {
5735 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
5736
5737 if (tp->delivered > 1)
5738 --tp->delivered;
5739 }
5740
5741 tcp_fastopen_add_skb(sk, synack);
5742
5743 return false;
5744}
5745
5746static void smc_check_reset_syn(struct tcp_sock *tp)
5747{
5748#if IS_ENABLED(CONFIG_SMC)
5749 if (static_branch_unlikely(&tcp_have_smc)) {
5750 if (tp->syn_smc && !tp->rx_opt.smc_ok)
5751 tp->syn_smc = 0;
5752 }
5753#endif
5754}
5755
5756static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5757 const struct tcphdr *th)
5758{
5759 struct inet_connection_sock *icsk = inet_csk(sk);
5760 struct tcp_sock *tp = tcp_sk(sk);
5761 struct tcp_fastopen_cookie foc = { .len = -1 };
5762 int saved_clamp = tp->rx_opt.mss_clamp;
5763 bool fastopen_fail;
5764
5765 tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc);
5766 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
5767 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
5768
5769 if (th->ack) {
5770
5771
5772
5773
5774
5775
5776
5777
5778 if (!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_una) ||
5779 after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt))
5780 goto reset_and_undo;
5781
5782 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
5783 !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
5784 tcp_time_stamp(tp))) {
5785 NET_INC_STATS(sock_net(sk),
5786 LINUX_MIB_PAWSACTIVEREJECTED);
5787 goto reset_and_undo;
5788 }
5789
5790
5791
5792
5793
5794
5795
5796
5797
5798 if (th->rst) {
5799 tcp_reset(sk);
5800 goto discard;
5801 }
5802
5803
5804
5805
5806
5807
5808
5809
5810 if (!th->syn)
5811 goto discard_and_undo;
5812
5813
5814
5815
5816
5817
5818
5819
5820 tcp_ecn_rcv_synack(tp, th);
5821
5822 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5823 tcp_ack(sk, skb, FLAG_SLOWPATH);
5824
5825
5826
5827
5828 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5829 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5830
5831
5832
5833
5834 tp->snd_wnd = ntohs(th->window);
5835
5836 if (!tp->rx_opt.wscale_ok) {
5837 tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
5838 tp->window_clamp = min(tp->window_clamp, 65535U);
5839 }
5840
5841 if (tp->rx_opt.saw_tstamp) {
5842 tp->rx_opt.tstamp_ok = 1;
5843 tp->tcp_header_len =
5844 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5845 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5846 tcp_store_ts_recent(tp);
5847 } else {
5848 tp->tcp_header_len = sizeof(struct tcphdr);
5849 }
5850
5851 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5852 tcp_initialize_rcv_mss(sk);
5853
5854
5855
5856
5857 tp->copied_seq = tp->rcv_nxt;
5858
5859 smc_check_reset_syn(tp);
5860
5861 smp_mb();
5862
5863 tcp_finish_connect(sk, skb);
5864
5865 fastopen_fail = (tp->syn_fastopen || tp->syn_data) &&
5866 tcp_rcv_fastopen_synack(sk, skb, &foc);
5867
5868 if (!sock_flag(sk, SOCK_DEAD)) {
5869 sk->sk_state_change(sk);
5870 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
5871 }
5872 if (fastopen_fail)
5873 return -1;
5874 if (sk->sk_write_pending ||
5875 icsk->icsk_accept_queue.rskq_defer_accept ||
5876 icsk->icsk_ack.pingpong) {
5877
5878
5879
5880
5881
5882
5883
5884 inet_csk_schedule_ack(sk);
5885 tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
5886 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
5887 TCP_DELACK_MAX, TCP_RTO_MAX);
5888
5889discard:
5890 tcp_drop(sk, skb);
5891 return 0;
5892 } else {
5893 tcp_send_ack(sk);
5894 }
5895 return -1;
5896 }
5897
5898
5899
5900 if (th->rst) {
5901
5902
5903
5904
5905
5906
5907 goto discard_and_undo;
5908 }
5909
5910
5911 if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
5912 tcp_paws_reject(&tp->rx_opt, 0))
5913 goto discard_and_undo;
5914
5915 if (th->syn) {
5916
5917
5918
5919
5920 tcp_set_state(sk, TCP_SYN_RECV);
5921
5922 if (tp->rx_opt.saw_tstamp) {
5923 tp->rx_opt.tstamp_ok = 1;
5924 tcp_store_ts_recent(tp);
5925 tp->tcp_header_len =
5926 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5927 } else {
5928 tp->tcp_header_len = sizeof(struct tcphdr);
5929 }
5930
5931 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5932 tp->copied_seq = tp->rcv_nxt;
5933 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5934
5935
5936
5937
5938 tp->snd_wnd = ntohs(th->window);
5939 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
5940 tp->max_window = tp->snd_wnd;
5941
5942 tcp_ecn_rcv_syn(tp, th);
5943
5944 tcp_mtup_init(sk);
5945 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5946 tcp_initialize_rcv_mss(sk);
5947
5948 tcp_send_synack(sk);
5949#if 0
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961 return -1;
5962#else
5963 goto discard;
5964#endif
5965 }
5966
5967
5968
5969
5970discard_and_undo:
5971 tcp_clear_options(&tp->rx_opt);
5972 tp->rx_opt.mss_clamp = saved_clamp;
5973 goto discard;
5974
5975reset_and_undo:
5976 tcp_clear_options(&tp->rx_opt);
5977 tp->rx_opt.mss_clamp = saved_clamp;
5978 return 1;
5979}
5980
5981
5982
5983
5984
5985
5986
5987
5988int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
5989{
5990 struct tcp_sock *tp = tcp_sk(sk);
5991 struct inet_connection_sock *icsk = inet_csk(sk);
5992 const struct tcphdr *th = tcp_hdr(skb);
5993 struct request_sock *req;
5994 int queued = 0;
5995 bool acceptable;
5996
5997 switch (sk->sk_state) {
5998 case TCP_CLOSE:
5999 goto discard;
6000
6001 case TCP_LISTEN:
6002 if (th->ack)
6003 return 1;
6004
6005 if (th->rst)
6006 goto discard;
6007
6008 if (th->syn) {
6009 if (th->fin)
6010 goto discard;
6011
6012
6013
6014 rcu_read_lock();
6015 local_bh_disable();
6016 acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
6017 local_bh_enable();
6018 rcu_read_unlock();
6019
6020 if (!acceptable)
6021 return 1;
6022 consume_skb(skb);
6023 return 0;
6024 }
6025 goto discard;
6026
6027 case TCP_SYN_SENT:
6028 tp->rx_opt.saw_tstamp = 0;
6029 tcp_mstamp_refresh(tp);
6030 queued = tcp_rcv_synsent_state_process(sk, skb, th);
6031 if (queued >= 0)
6032 return queued;
6033
6034
6035 tcp_urg(sk, skb, th);
6036 __kfree_skb(skb);
6037 tcp_data_snd_check(sk);
6038 return 0;
6039 }
6040
6041 tcp_mstamp_refresh(tp);
6042 tp->rx_opt.saw_tstamp = 0;
6043 req = tp->fastopen_rsk;
6044 if (req) {
6045 bool req_stolen;
6046
6047 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
6048 sk->sk_state != TCP_FIN_WAIT1);
6049
6050 if (!tcp_check_req(sk, skb, req, true, &req_stolen))
6051 goto discard;
6052 }
6053
6054 if (!th->ack && !th->rst && !th->syn)
6055 goto discard;
6056
6057 if (!tcp_validate_incoming(sk, skb, th, 0))
6058 return 0;
6059
6060
6061 acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
6062 FLAG_UPDATE_TS_RECENT |
6063 FLAG_NO_CHALLENGE_ACK) > 0;
6064
6065 if (!acceptable) {
6066 if (sk->sk_state == TCP_SYN_RECV)
6067 return 1;
6068 tcp_send_challenge_ack(sk, skb);
6069 goto discard;
6070 }
6071 switch (sk->sk_state) {
6072 case TCP_SYN_RECV:
6073 tp->delivered++;
6074 if (!tp->srtt_us)
6075 tcp_synack_rtt_meas(sk, req);
6076
6077
6078
6079
6080 if (req) {
6081 inet_csk(sk)->icsk_retransmits = 0;
6082 reqsk_fastopen_remove(sk, req, false);
6083
6084
6085
6086
6087
6088
6089
6090
6091 tcp_rearm_rto(sk);
6092 } else {
6093 tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
6094 tp->copied_seq = tp->rcv_nxt;
6095 }
6096 smp_mb();
6097 tcp_set_state(sk, TCP_ESTABLISHED);
6098 sk->sk_state_change(sk);
6099
6100
6101
6102
6103
6104 if (sk->sk_socket)
6105 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
6106
6107 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
6108 tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
6109 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
6110
6111 if (tp->rx_opt.tstamp_ok)
6112 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
6113
6114 if (!inet_csk(sk)->icsk_ca_ops->cong_control)
6115 tcp_update_pacing_rate(sk);
6116
6117
6118 tp->lsndtime = tcp_jiffies32;
6119
6120 tcp_initialize_rcv_mss(sk);
6121 tcp_fast_path_on(tp);
6122 break;
6123
6124 case TCP_FIN_WAIT1: {
6125 int tmo;
6126
6127
6128
6129
6130
6131
6132 if (req) {
6133
6134 reqsk_fastopen_remove(sk, req, false);
6135 tcp_rearm_rto(sk);
6136 }
6137 if (tp->snd_una != tp->write_seq)
6138 break;
6139
6140 tcp_set_state(sk, TCP_FIN_WAIT2);
6141 sk->sk_shutdown |= SEND_SHUTDOWN;
6142
6143 sk_dst_confirm(sk);
6144
6145 if (!sock_flag(sk, SOCK_DEAD)) {
6146
6147 sk->sk_state_change(sk);
6148 break;
6149 }
6150
6151 if (tp->linger2 < 0) {
6152 tcp_done(sk);
6153 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
6154 return 1;
6155 }
6156 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
6157 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
6158
6159 if (tp->syn_fastopen && th->fin)
6160 tcp_fastopen_active_disable(sk);
6161 tcp_done(sk);
6162 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
6163 return 1;
6164 }
6165
6166 tmo = tcp_fin_time(sk);
6167 if (tmo > TCP_TIMEWAIT_LEN) {
6168 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
6169 } else if (th->fin || sock_owned_by_user(sk)) {
6170
6171
6172
6173
6174
6175
6176 inet_csk_reset_keepalive_timer(sk, tmo);
6177 } else {
6178 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
6179 goto discard;
6180 }
6181 break;
6182 }
6183
6184 case TCP_CLOSING:
6185 if (tp->snd_una == tp->write_seq) {
6186 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
6187 goto discard;
6188 }
6189 break;
6190
6191 case TCP_LAST_ACK:
6192 if (tp->snd_una == tp->write_seq) {
6193 tcp_update_metrics(sk);
6194 tcp_done(sk);
6195 goto discard;
6196 }
6197 break;
6198 }
6199
6200
6201 tcp_urg(sk, skb, th);
6202
6203
6204 switch (sk->sk_state) {
6205 case TCP_CLOSE_WAIT:
6206 case TCP_CLOSING:
6207 case TCP_LAST_ACK:
6208 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
6209 break;
6210
6211 case TCP_FIN_WAIT1:
6212 case TCP_FIN_WAIT2:
6213
6214
6215
6216
6217 if (sk->sk_shutdown & RCV_SHUTDOWN) {
6218 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
6219 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
6220 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
6221 tcp_reset(sk);
6222 return 1;
6223 }
6224 }
6225
6226 case TCP_ESTABLISHED:
6227 tcp_data_queue(sk, skb);
6228 queued = 1;
6229 break;
6230 }
6231
6232
6233 if (sk->sk_state != TCP_CLOSE) {
6234 tcp_data_snd_check(sk);
6235 tcp_ack_snd_check(sk);
6236 }
6237
6238 if (!queued) {
6239discard:
6240 tcp_drop(sk, skb);
6241 }
6242 return 0;
6243}
6244EXPORT_SYMBOL(tcp_rcv_state_process);
6245
6246static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
6247{
6248 struct inet_request_sock *ireq = inet_rsk(req);
6249
6250 if (family == AF_INET)
6251 net_dbg_ratelimited("drop open request from %pI4/%u\n",
6252 &ireq->ir_rmt_addr, port);
6253#if IS_ENABLED(CONFIG_IPV6)
6254 else if (family == AF_INET6)
6255 net_dbg_ratelimited("drop open request from %pI6/%u\n",
6256 &ireq->ir_v6_rmt_addr, port);
6257#endif
6258}
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270
6271
6272static void tcp_ecn_create_request(struct request_sock *req,
6273 const struct sk_buff *skb,
6274 const struct sock *listen_sk,
6275 const struct dst_entry *dst)
6276{
6277 const struct tcphdr *th = tcp_hdr(skb);
6278 const struct net *net = sock_net(listen_sk);
6279 bool th_ecn = th->ece && th->cwr;
6280 bool ect, ecn_ok;
6281 u32 ecn_ok_dst;
6282
6283 if (!th_ecn)
6284 return;
6285
6286 ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
6287 ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK);
6288 ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
6289
6290 if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
6291 (ecn_ok_dst & DST_FEATURE_ECN_CA) ||
6292 tcp_bpf_ca_needs_ecn((struct sock *)req))
6293 inet_rsk(req)->ecn_ok = 1;
6294}
6295
6296static void tcp_openreq_init(struct request_sock *req,
6297 const struct tcp_options_received *rx_opt,
6298 struct sk_buff *skb, const struct sock *sk)
6299{
6300 struct inet_request_sock *ireq = inet_rsk(req);
6301
6302 req->rsk_rcv_wnd = 0;
6303 req->cookie_ts = 0;
6304 tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
6305 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
6306 tcp_rsk(req)->snt_synack = tcp_clock_us();
6307 tcp_rsk(req)->last_oow_ack_time = 0;
6308 req->mss = rx_opt->mss_clamp;
6309 req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
6310 ireq->tstamp_ok = rx_opt->tstamp_ok;
6311 ireq->sack_ok = rx_opt->sack_ok;
6312 ireq->snd_wscale = rx_opt->snd_wscale;
6313 ireq->wscale_ok = rx_opt->wscale_ok;
6314 ireq->acked = 0;
6315 ireq->ecn_ok = 0;
6316 ireq->ir_rmt_port = tcp_hdr(skb)->source;
6317 ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
6318 ireq->ir_mark = inet_request_mark(sk, skb);
6319#if IS_ENABLED(CONFIG_SMC)
6320 ireq->smc_ok = rx_opt->smc_ok;
6321#endif
6322}
6323
6324struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
6325 struct sock *sk_listener,
6326 bool attach_listener)
6327{
6328 struct request_sock *req = reqsk_alloc(ops, sk_listener,
6329 attach_listener);
6330
6331 if (req) {
6332 struct inet_request_sock *ireq = inet_rsk(req);
6333
6334 ireq->ireq_opt = NULL;
6335#if IS_ENABLED(CONFIG_IPV6)
6336 ireq->pktopts = NULL;
6337#endif
6338 atomic64_set(&ireq->ir_cookie, 0);
6339 ireq->ireq_state = TCP_NEW_SYN_RECV;
6340 write_pnet(&ireq->ireq_net, sock_net(sk_listener));
6341 ireq->ireq_family = sk_listener->sk_family;
6342 }
6343
6344 return req;
6345}
6346EXPORT_SYMBOL(inet_reqsk_alloc);
6347
6348
6349
6350
6351static bool tcp_syn_flood_action(const struct sock *sk,
6352 const struct sk_buff *skb,
6353 const char *proto)
6354{
6355 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
6356 const char *msg = "Dropping request";
6357 bool want_cookie = false;
6358 struct net *net = sock_net(sk);
6359
6360#ifdef CONFIG_SYN_COOKIES
6361 if (net->ipv4.sysctl_tcp_syncookies) {
6362 msg = "Sending cookies";
6363 want_cookie = true;
6364 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
6365 } else
6366#endif
6367 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
6368
6369 if (!queue->synflood_warned &&
6370 net->ipv4.sysctl_tcp_syncookies != 2 &&
6371 xchg(&queue->synflood_warned, 1) == 0)
6372 net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
6373 proto, ntohs(tcp_hdr(skb)->dest), msg);
6374
6375 return want_cookie;
6376}
6377
6378static void tcp_reqsk_record_syn(const struct sock *sk,
6379 struct request_sock *req,
6380 const struct sk_buff *skb)
6381{
6382 if (tcp_sk(sk)->save_syn) {
6383 u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb);
6384 u32 *copy;
6385
6386 copy = kmalloc(len + sizeof(u32), GFP_ATOMIC);
6387 if (copy) {
6388 copy[0] = len;
6389 memcpy(©[1], skb_network_header(skb), len);
6390 req->saved_syn = copy;
6391 }
6392 }
6393}
6394
6395int tcp_conn_request(struct request_sock_ops *rsk_ops,
6396 const struct tcp_request_sock_ops *af_ops,
6397 struct sock *sk, struct sk_buff *skb)
6398{
6399 struct tcp_fastopen_cookie foc = { .len = -1 };
6400 __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
6401 struct tcp_options_received tmp_opt;
6402 struct tcp_sock *tp = tcp_sk(sk);
6403 struct net *net = sock_net(sk);
6404 struct sock *fastopen_sk = NULL;
6405 struct request_sock *req;
6406 bool want_cookie = false;
6407 struct dst_entry *dst;
6408 struct flowi fl;
6409
6410
6411
6412
6413
6414 if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
6415 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
6416 want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name);
6417 if (!want_cookie)
6418 goto drop;
6419 }
6420
6421 if (sk_acceptq_is_full(sk)) {
6422 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
6423 goto drop;
6424 }
6425
6426 req = inet_reqsk_alloc(rsk_ops, sk, !want_cookie);
6427 if (!req)
6428 goto drop;
6429
6430 tcp_rsk(req)->af_specific = af_ops;
6431 tcp_rsk(req)->ts_off = 0;
6432
6433 tcp_clear_options(&tmp_opt);
6434 tmp_opt.mss_clamp = af_ops->mss_clamp;
6435 tmp_opt.user_mss = tp->rx_opt.user_mss;
6436 tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0,
6437 want_cookie ? NULL : &foc);
6438
6439 if (want_cookie && !tmp_opt.saw_tstamp)
6440 tcp_clear_options(&tmp_opt);
6441
6442 if (IS_ENABLED(CONFIG_SMC) && want_cookie)
6443 tmp_opt.smc_ok = 0;
6444
6445 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
6446 tcp_openreq_init(req, &tmp_opt, skb, sk);
6447 inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent;
6448
6449
6450 inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
6451
6452 af_ops->init_req(req, sk, skb);
6453
6454 if (security_inet_conn_request(sk, skb, req))
6455 goto drop_and_free;
6456
6457 if (tmp_opt.tstamp_ok)
6458 tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
6459
6460 dst = af_ops->route_req(sk, &fl, req);
6461 if (!dst)
6462 goto drop_and_free;
6463
6464 if (!want_cookie && !isn) {
6465
6466 if (!net->ipv4.sysctl_tcp_syncookies &&
6467 (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
6468 (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
6469 !tcp_peer_is_proven(req, dst)) {
6470
6471
6472
6473
6474
6475
6476
6477 pr_drop_req(req, ntohs(tcp_hdr(skb)->source),
6478 rsk_ops->family);
6479 goto drop_and_release;
6480 }
6481
6482 isn = af_ops->init_seq(skb);
6483 }
6484
6485 tcp_ecn_create_request(req, skb, sk, dst);
6486
6487 if (want_cookie) {
6488 isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
6489 req->cookie_ts = tmp_opt.tstamp_ok;
6490 if (!tmp_opt.tstamp_ok)
6491 inet_rsk(req)->ecn_ok = 0;
6492 }
6493
6494 tcp_rsk(req)->snt_isn = isn;
6495 tcp_rsk(req)->txhash = net_tx_rndhash();
6496 tcp_openreq_init_rwin(req, sk, dst);
6497 sk_rx_queue_set(req_to_sk(req), skb);
6498 if (!want_cookie) {
6499 tcp_reqsk_record_syn(sk, req, skb);
6500 fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
6501 }
6502 if (fastopen_sk) {
6503 af_ops->send_synack(fastopen_sk, dst, &fl, req,
6504 &foc, TCP_SYNACK_FASTOPEN);
6505
6506 inet_csk_reqsk_queue_add(sk, req, fastopen_sk);
6507 sk->sk_data_ready(sk);
6508 bh_unlock_sock(fastopen_sk);
6509 sock_put(fastopen_sk);
6510 } else {
6511 tcp_rsk(req)->tfo_listener = false;
6512 if (!want_cookie)
6513 inet_csk_reqsk_queue_hash_add(sk, req,
6514 tcp_timeout_init((struct sock *)req));
6515 af_ops->send_synack(sk, dst, &fl, req, &foc,
6516 !want_cookie ? TCP_SYNACK_NORMAL :
6517 TCP_SYNACK_COOKIE);
6518 if (want_cookie) {
6519 reqsk_free(req);
6520 return 0;
6521 }
6522 }
6523 reqsk_put(req);
6524 return 0;
6525
6526drop_and_release:
6527 dst_release(dst);
6528drop_and_free:
6529 reqsk_free(req);
6530drop:
6531 tcp_listendrop(sk);
6532 return 0;
6533}
6534EXPORT_SYMBOL(tcp_conn_request);
6535