1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64#define pr_fmt(fmt) "TCP: " fmt
65
66#include <linux/mm.h>
67#include <linux/slab.h>
68#include <linux/module.h>
69#include <linux/sysctl.h>
70#include <linux/kernel.h>
71#include <net/dst.h>
72#include <net/tcp.h>
73#include <net/inet_common.h>
74#include <linux/ipsec.h>
75#include <asm/unaligned.h>
76
77int sysctl_tcp_timestamps __read_mostly = 1;
78int sysctl_tcp_window_scaling __read_mostly = 1;
79int sysctl_tcp_sack __read_mostly = 1;
80int sysctl_tcp_fack __read_mostly = 1;
81int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
82EXPORT_SYMBOL(sysctl_tcp_reordering);
83int sysctl_tcp_dsack __read_mostly = 1;
84int sysctl_tcp_app_win __read_mostly = 31;
85int sysctl_tcp_adv_win_scale __read_mostly = 1;
86EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
87
88
89int sysctl_tcp_challenge_ack_limit = 1000;
90
91int sysctl_tcp_stdurg __read_mostly;
92int sysctl_tcp_rfc1337 __read_mostly;
93int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
94int sysctl_tcp_frto __read_mostly = 2;
95
96int sysctl_tcp_thin_dupack __read_mostly;
97
98int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
99int sysctl_tcp_early_retrans __read_mostly = 3;
100int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
101
102#define FLAG_DATA 0x01
103#define FLAG_WIN_UPDATE 0x02
104#define FLAG_DATA_ACKED 0x04
105#define FLAG_RETRANS_DATA_ACKED 0x08
106#define FLAG_SYN_ACKED 0x10
107#define FLAG_DATA_SACKED 0x20
108#define FLAG_ECE 0x40
109#define FLAG_SLOWPATH 0x100
110#define FLAG_ORIG_SACK_ACKED 0x200
111#define FLAG_SND_UNA_ADVANCED 0x400
112#define FLAG_DSACKING_ACK 0x800
113#define FLAG_SACK_RENEGING 0x2000
114#define FLAG_UPDATE_TS_RECENT 0x4000
115
116#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
117#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
118#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
119#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
120
121#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
122#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
123
124static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
125 unsigned int len)
126{
127 static bool __once __read_mostly;
128
129 if (!__once) {
130 struct net_device *dev;
131
132 __once = true;
133
134 rcu_read_lock();
135 dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
136 if (!dev || len >= dev->mtu)
137 pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
138 dev ? dev->name : "Unknown driver");
139 rcu_read_unlock();
140 }
141}
142
143
144
145
146static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
147{
148 struct inet_connection_sock *icsk = inet_csk(sk);
149 const unsigned int lss = icsk->icsk_ack.last_seg_size;
150 unsigned int len;
151
152 icsk->icsk_ack.last_seg_size = 0;
153
154
155
156
157 len = skb_shinfo(skb)->gso_size ? : skb->len;
158 if (len >= icsk->icsk_ack.rcv_mss) {
159 icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
160 tcp_sk(sk)->advmss);
161
162 if (unlikely(len > icsk->icsk_ack.rcv_mss +
163 MAX_TCP_OPTION_SPACE))
164 tcp_gro_dev_warn(sk, skb, len);
165 } else {
166
167
168
169
170
171 len += skb->data - skb_transport_header(skb);
172 if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
173
174
175
176
177
178 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
179 !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
180
181
182
183
184 len -= tcp_sk(sk)->tcp_header_len;
185 icsk->icsk_ack.last_seg_size = len;
186 if (len == lss) {
187 icsk->icsk_ack.rcv_mss = len;
188 return;
189 }
190 }
191 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
192 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
193 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
194 }
195}
196
197static void tcp_incr_quickack(struct sock *sk)
198{
199 struct inet_connection_sock *icsk = inet_csk(sk);
200 unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
201
202 if (quickacks == 0)
203 quickacks = 2;
204 if (quickacks > icsk->icsk_ack.quick)
205 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
206}
207
208static void tcp_enter_quickack_mode(struct sock *sk)
209{
210 struct inet_connection_sock *icsk = inet_csk(sk);
211 tcp_incr_quickack(sk);
212 icsk->icsk_ack.pingpong = 0;
213 icsk->icsk_ack.ato = TCP_ATO_MIN;
214}
215
216
217
218
219
220static bool tcp_in_quickack_mode(struct sock *sk)
221{
222 const struct inet_connection_sock *icsk = inet_csk(sk);
223 const struct dst_entry *dst = __sk_dst_get(sk);
224
225 return (dst && dst_metric(dst, RTAX_QUICKACK)) ||
226 (icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong);
227}
228
229static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
230{
231 if (tp->ecn_flags & TCP_ECN_OK)
232 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
233}
234
235static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb)
236{
237 if (tcp_hdr(skb)->cwr)
238 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
239}
240
241static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)
242{
243 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
244}
245
246static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
247{
248 if (!(tp->ecn_flags & TCP_ECN_OK))
249 return;
250
251 switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
252 case INET_ECN_NOT_ECT:
253
254
255
256
257 if (tp->ecn_flags & TCP_ECN_SEEN)
258 tcp_enter_quickack_mode((struct sock *)tp);
259 break;
260 case INET_ECN_CE:
261 if (tcp_ca_needs_ecn((struct sock *)tp))
262 tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_IS_CE);
263
264 if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
265
266 tcp_enter_quickack_mode((struct sock *)tp);
267 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
268 }
269 tp->ecn_flags |= TCP_ECN_SEEN;
270 break;
271 default:
272 if (tcp_ca_needs_ecn((struct sock *)tp))
273 tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_NO_CE);
274 tp->ecn_flags |= TCP_ECN_SEEN;
275 break;
276 }
277}
278
279static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
280{
281 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
282 tp->ecn_flags &= ~TCP_ECN_OK;
283}
284
285static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
286{
287 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
288 tp->ecn_flags &= ~TCP_ECN_OK;
289}
290
291static bool TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
292{
293 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
294 return true;
295 return false;
296}
297
298
299
300
301
302
303static void tcp_sndbuf_expand(struct sock *sk)
304{
305 const struct tcp_sock *tp = tcp_sk(sk);
306 int sndmem, per_mss;
307 u32 nr_segs;
308
309
310
311
312 per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
313 MAX_TCP_HEADER +
314 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
315
316 per_mss = roundup_pow_of_two(per_mss) +
317 SKB_DATA_ALIGN(sizeof(struct sk_buff));
318
319 nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
320 nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
321
322
323
324
325
326 sndmem = 2 * nr_segs * per_mss;
327
328 if (sk->sk_sndbuf < sndmem)
329 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
330}
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
359{
360 struct tcp_sock *tp = tcp_sk(sk);
361
362 int truesize = tcp_win_from_space(skb->truesize) >> 1;
363 int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
364
365 while (tp->rcv_ssthresh <= window) {
366 if (truesize <= skb->len)
367 return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
368
369 truesize >>= 1;
370 window >>= 1;
371 }
372 return 0;
373}
374
375static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
376{
377 struct tcp_sock *tp = tcp_sk(sk);
378
379
380 if (tp->rcv_ssthresh < tp->window_clamp &&
381 (int)tp->rcv_ssthresh < tcp_space(sk) &&
382 !tcp_under_memory_pressure(sk)) {
383 int incr;
384
385
386
387
388 if (tcp_win_from_space(skb->truesize) <= skb->len)
389 incr = 2 * tp->advmss;
390 else
391 incr = __tcp_grow_window(sk, skb);
392
393 if (incr) {
394 incr = max_t(int, incr, 2 * skb->len);
395 tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
396 tp->window_clamp);
397 inet_csk(sk)->icsk_ack.quick |= 1;
398 }
399 }
400}
401
402
403static void tcp_fixup_rcvbuf(struct sock *sk)
404{
405 u32 mss = tcp_sk(sk)->advmss;
406 int rcvmem;
407
408 rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) *
409 tcp_default_init_rwnd(mss);
410
411
412
413
414 if (sysctl_tcp_moderate_rcvbuf)
415 rcvmem <<= 2;
416
417 if (sk->sk_rcvbuf < rcvmem)
418 sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
419}
420
421
422
423
424void tcp_init_buffer_space(struct sock *sk)
425{
426 struct tcp_sock *tp = tcp_sk(sk);
427 int maxwin;
428
429 if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
430 tcp_fixup_rcvbuf(sk);
431 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
432 tcp_sndbuf_expand(sk);
433
434 tp->rcvq_space.space = tp->rcv_wnd;
435 tp->rcvq_space.time = tcp_time_stamp;
436 tp->rcvq_space.seq = tp->copied_seq;
437
438 maxwin = tcp_full_space(sk);
439
440 if (tp->window_clamp >= maxwin) {
441 tp->window_clamp = maxwin;
442
443 if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
444 tp->window_clamp = max(maxwin -
445 (maxwin >> sysctl_tcp_app_win),
446 4 * tp->advmss);
447 }
448
449
450 if (sysctl_tcp_app_win &&
451 tp->window_clamp > 2 * tp->advmss &&
452 tp->window_clamp + tp->advmss > maxwin)
453 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
454
455 tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
456 tp->snd_cwnd_stamp = tcp_time_stamp;
457}
458
459
460static void tcp_clamp_window(struct sock *sk)
461{
462 struct tcp_sock *tp = tcp_sk(sk);
463 struct inet_connection_sock *icsk = inet_csk(sk);
464
465 icsk->icsk_ack.quick = 0;
466
467 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
468 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
469 !tcp_under_memory_pressure(sk) &&
470 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
471 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
472 sysctl_tcp_rmem[2]);
473 }
474 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
475 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
476}
477
478
479
480
481
482
483
484
485void tcp_initialize_rcv_mss(struct sock *sk)
486{
487 const struct tcp_sock *tp = tcp_sk(sk);
488 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
489
490 hint = min(hint, tp->rcv_wnd / 2);
491 hint = min(hint, TCP_MSS_DEFAULT);
492 hint = max(hint, TCP_MIN_MSS);
493
494 inet_csk(sk)->icsk_ack.rcv_mss = hint;
495}
496EXPORT_SYMBOL(tcp_initialize_rcv_mss);
497
498
499
500
501
502
503
504
505
506
507
508
509static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
510{
511 u32 new_sample = tp->rcv_rtt_est.rtt;
512 long m = sample;
513
514 if (m == 0)
515 m = 1;
516
517 if (new_sample != 0) {
518
519
520
521
522
523
524
525
526
527
528 if (!win_dep) {
529 m -= (new_sample >> 3);
530 new_sample += m;
531 } else {
532 m <<= 3;
533 if (m < new_sample)
534 new_sample = m;
535 }
536 } else {
537
538 new_sample = m << 3;
539 }
540
541 if (tp->rcv_rtt_est.rtt != new_sample)
542 tp->rcv_rtt_est.rtt = new_sample;
543}
544
545static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
546{
547 if (tp->rcv_rtt_est.time == 0)
548 goto new_measure;
549 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
550 return;
551 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_rtt_est.time, 1);
552
553new_measure:
554 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
555 tp->rcv_rtt_est.time = tcp_time_stamp;
556}
557
558static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
559 const struct sk_buff *skb)
560{
561 struct tcp_sock *tp = tcp_sk(sk);
562 if (tp->rx_opt.rcv_tsecr &&
563 (TCP_SKB_CB(skb)->end_seq -
564 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
565 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
566}
567
568
569
570
571
572void tcp_rcv_space_adjust(struct sock *sk)
573{
574 struct tcp_sock *tp = tcp_sk(sk);
575 int time;
576 int copied;
577
578 time = tcp_time_stamp - tp->rcvq_space.time;
579 if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
580 return;
581
582
583 copied = tp->copied_seq - tp->rcvq_space.seq;
584 if (copied <= tp->rcvq_space.space)
585 goto new_measure;
586
587
588
589
590
591
592
593
594
595
596 if (sysctl_tcp_moderate_rcvbuf &&
597 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
598 int rcvwin, rcvmem, rcvbuf;
599
600
601
602
603 rcvwin = (copied << 1) + 16 * tp->advmss;
604
605
606
607
608
609
610 if (copied >=
611 tp->rcvq_space.space + (tp->rcvq_space.space >> 2)) {
612 if (copied >=
613 tp->rcvq_space.space + (tp->rcvq_space.space >> 1))
614 rcvwin <<= 1;
615 else
616 rcvwin += (rcvwin >> 1);
617 }
618
619 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
620 while (tcp_win_from_space(rcvmem) < tp->advmss)
621 rcvmem += 128;
622
623 rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]);
624 if (rcvbuf > sk->sk_rcvbuf) {
625 sk->sk_rcvbuf = rcvbuf;
626
627
628 tp->window_clamp = rcvwin;
629 }
630 }
631 tp->rcvq_space.space = copied;
632
633new_measure:
634 tp->rcvq_space.seq = tp->copied_seq;
635 tp->rcvq_space.time = tcp_time_stamp;
636}
637
638
639
640
641
642
643
644
645
646
647
648static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
649{
650 struct tcp_sock *tp = tcp_sk(sk);
651 struct inet_connection_sock *icsk = inet_csk(sk);
652 u32 now;
653
654 inet_csk_schedule_ack(sk);
655
656 tcp_measure_rcv_mss(sk, skb);
657
658 tcp_rcv_rtt_measure(tp);
659
660 now = tcp_time_stamp;
661
662 if (!icsk->icsk_ack.ato) {
663
664
665
666 tcp_incr_quickack(sk);
667 icsk->icsk_ack.ato = TCP_ATO_MIN;
668 } else {
669 int m = now - icsk->icsk_ack.lrcvtime;
670
671 if (m <= TCP_ATO_MIN / 2) {
672
673 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
674 } else if (m < icsk->icsk_ack.ato) {
675 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
676 if (icsk->icsk_ack.ato > icsk->icsk_rto)
677 icsk->icsk_ack.ato = icsk->icsk_rto;
678 } else if (m > icsk->icsk_rto) {
679
680
681
682 tcp_incr_quickack(sk);
683 sk_mem_reclaim(sk);
684 }
685 }
686 icsk->icsk_ack.lrcvtime = now;
687
688 TCP_ECN_check_ce(tp, skb);
689
690 if (skb->len >= 128)
691 tcp_grow_window(sk, skb);
692}
693
694
695
696
697
698
699
700
701
702
703static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
704{
705 struct tcp_sock *tp = tcp_sk(sk);
706 long m = mrtt_us;
707 u32 srtt = tp->srtt_us;
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725 if (srtt != 0) {
726 m -= (srtt >> 3);
727 srtt += m;
728 if (m < 0) {
729 m = -m;
730 m -= (tp->mdev_us >> 2);
731
732
733
734
735
736
737
738
739 if (m > 0)
740 m >>= 3;
741 } else {
742 m -= (tp->mdev_us >> 2);
743 }
744 tp->mdev_us += m;
745 if (tp->mdev_us > tp->mdev_max_us) {
746 tp->mdev_max_us = tp->mdev_us;
747 if (tp->mdev_max_us > tp->rttvar_us)
748 tp->rttvar_us = tp->mdev_max_us;
749 }
750 if (after(tp->snd_una, tp->rtt_seq)) {
751 if (tp->mdev_max_us < tp->rttvar_us)
752 tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
753 tp->rtt_seq = tp->snd_nxt;
754 tp->mdev_max_us = tcp_rto_min_us(sk);
755 }
756 } else {
757
758 srtt = m << 3;
759 tp->mdev_us = m << 1;
760 tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
761 tp->mdev_max_us = tp->rttvar_us;
762 tp->rtt_seq = tp->snd_nxt;
763 }
764 tp->srtt_us = max(1U, srtt);
765}
766
767
768
769
770
771
772
773static void tcp_update_pacing_rate(struct sock *sk)
774{
775 const struct tcp_sock *tp = tcp_sk(sk);
776 u64 rate;
777
778
779 rate = (u64)tp->mss_cache * 2 * (USEC_PER_SEC << 3);
780
781 rate *= max(tp->snd_cwnd, tp->packets_out);
782
783 if (likely(tp->srtt_us))
784 do_div(rate, tp->srtt_us);
785
786
787
788
789
790 ACCESS_ONCE(sk->sk_pacing_rate) = min_t(u64, rate,
791 sk->sk_max_pacing_rate);
792}
793
794
795
796
797static void tcp_set_rto(struct sock *sk)
798{
799 const struct tcp_sock *tp = tcp_sk(sk);
800
801
802
803
804
805
806
807
808
809
810 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
811
812
813
814
815
816
817
818
819
820
821 tcp_bound_rto(sk);
822}
823
824__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
825{
826 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
827
828 if (!cwnd)
829 cwnd = TCP_INIT_CWND;
830 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
831}
832
833
834
835
836
837void tcp_disable_fack(struct tcp_sock *tp)
838{
839
840 if (tcp_is_fack(tp))
841 tp->lost_skb_hint = NULL;
842 tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;
843}
844
845
846static void tcp_dsack_seen(struct tcp_sock *tp)
847{
848 tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
849}
850
851static void tcp_update_reordering(struct sock *sk, const int metric,
852 const int ts)
853{
854 struct tcp_sock *tp = tcp_sk(sk);
855 if (metric > tp->reordering) {
856 int mib_idx;
857
858 tp->reordering = min(TCP_MAX_REORDERING, metric);
859
860
861 if (ts)
862 mib_idx = LINUX_MIB_TCPTSREORDER;
863 else if (tcp_is_reno(tp))
864 mib_idx = LINUX_MIB_TCPRENOREORDER;
865 else if (tcp_is_fack(tp))
866 mib_idx = LINUX_MIB_TCPFACKREORDER;
867 else
868 mib_idx = LINUX_MIB_TCPSACKREORDER;
869
870 NET_INC_STATS_BH(sock_net(sk), mib_idx);
871#if FASTRETRANS_DEBUG > 1
872 pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
873 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
874 tp->reordering,
875 tp->fackets_out,
876 tp->sacked_out,
877 tp->undo_marker ? tp->undo_retrans : 0);
878#endif
879 tcp_disable_fack(tp);
880 }
881
882 if (metric > 0)
883 tcp_disable_early_retrans(tp);
884}
885
886
887static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
888{
889 if ((tp->retransmit_skb_hint == NULL) ||
890 before(TCP_SKB_CB(skb)->seq,
891 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
892 tp->retransmit_skb_hint = skb;
893
894 if (!tp->lost_out ||
895 after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high))
896 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
897}
898
899static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
900{
901 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
902 tcp_verify_retransmit_hint(tp, skb);
903
904 tp->lost_out += tcp_skb_pcount(skb);
905 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
906 }
907}
908
909static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
910 struct sk_buff *skb)
911{
912 tcp_verify_retransmit_hint(tp, skb);
913
914 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
915 tp->lost_out += tcp_skb_pcount(skb);
916 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
917 }
918}
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
1015 u32 start_seq, u32 end_seq)
1016{
1017
1018 if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
1019 return false;
1020
1021
1022 if (!before(start_seq, tp->snd_nxt))
1023 return false;
1024
1025
1026
1027
1028 if (after(start_seq, tp->snd_una))
1029 return true;
1030
1031 if (!is_dsack || !tp->undo_marker)
1032 return false;
1033
1034
1035 if (after(end_seq, tp->snd_una))
1036 return false;
1037
1038 if (!before(start_seq, tp->undo_marker))
1039 return true;
1040
1041
1042 if (!after(end_seq, tp->undo_marker))
1043 return false;
1044
1045
1046
1047
1048 return !before(start_seq, end_seq - tp->max_window);
1049}
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060static void tcp_mark_lost_retrans(struct sock *sk)
1061{
1062 const struct inet_connection_sock *icsk = inet_csk(sk);
1063 struct tcp_sock *tp = tcp_sk(sk);
1064 struct sk_buff *skb;
1065 int cnt = 0;
1066 u32 new_low_seq = tp->snd_nxt;
1067 u32 received_upto = tcp_highest_sack_seq(tp);
1068
1069 if (!tcp_is_fack(tp) || !tp->retrans_out ||
1070 !after(received_upto, tp->lost_retrans_low) ||
1071 icsk->icsk_ca_state != TCP_CA_Recovery)
1072 return;
1073
1074 tcp_for_write_queue(skb, sk) {
1075 u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
1076
1077 if (skb == tcp_send_head(sk))
1078 break;
1079 if (cnt == tp->retrans_out)
1080 break;
1081 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1082 continue;
1083
1084 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS))
1085 continue;
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098 if (after(received_upto, ack_seq)) {
1099 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1100 tp->retrans_out -= tcp_skb_pcount(skb);
1101
1102 tcp_skb_mark_lost_uncond_verify(tp, skb);
1103 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
1104 } else {
1105 if (before(ack_seq, new_low_seq))
1106 new_low_seq = ack_seq;
1107 cnt += tcp_skb_pcount(skb);
1108 }
1109 }
1110
1111 if (tp->retrans_out)
1112 tp->lost_retrans_low = new_low_seq;
1113}
1114
1115static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1116 struct tcp_sack_block_wire *sp, int num_sacks,
1117 u32 prior_snd_una)
1118{
1119 struct tcp_sock *tp = tcp_sk(sk);
1120 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
1121 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
1122 bool dup_sack = false;
1123
1124 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1125 dup_sack = true;
1126 tcp_dsack_seen(tp);
1127 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
1128 } else if (num_sacks > 1) {
1129 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
1130 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
1131
1132 if (!after(end_seq_0, end_seq_1) &&
1133 !before(start_seq_0, start_seq_1)) {
1134 dup_sack = true;
1135 tcp_dsack_seen(tp);
1136 NET_INC_STATS_BH(sock_net(sk),
1137 LINUX_MIB_TCPDSACKOFORECV);
1138 }
1139 }
1140
1141
1142 if (dup_sack && tp->undo_marker && tp->undo_retrans > 0 &&
1143 !after(end_seq_0, prior_snd_una) &&
1144 after(end_seq_0, tp->undo_marker))
1145 tp->undo_retrans--;
1146
1147 return dup_sack;
1148}
1149
1150struct tcp_sacktag_state {
1151 int reord;
1152 int fack_count;
1153 long rtt_us;
1154 int flag;
1155};
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1166 u32 start_seq, u32 end_seq)
1167{
1168 int err;
1169 bool in_sack;
1170 unsigned int pkt_len;
1171 unsigned int mss;
1172
1173 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1174 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1175
1176 if (tcp_skb_pcount(skb) > 1 && !in_sack &&
1177 after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
1178 mss = tcp_skb_mss(skb);
1179 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1180
1181 if (!in_sack) {
1182 pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
1183 if (pkt_len < mss)
1184 pkt_len = mss;
1185 } else {
1186 pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
1187 if (pkt_len < mss)
1188 return -EINVAL;
1189 }
1190
1191
1192
1193
1194 if (pkt_len > mss) {
1195 unsigned int new_len = (pkt_len / mss) * mss;
1196 if (!in_sack && new_len < pkt_len)
1197 new_len += mss;
1198 pkt_len = new_len;
1199 }
1200
1201 if (pkt_len >= skb->len && !in_sack)
1202 return 0;
1203
1204 err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, pkt_len, mss);
1205 if (err < 0)
1206 return err;
1207 }
1208
1209 return in_sack;
1210}
1211
1212
1213static u8 tcp_sacktag_one(struct sock *sk,
1214 struct tcp_sacktag_state *state, u8 sacked,
1215 u32 start_seq, u32 end_seq,
1216 int dup_sack, int pcount,
1217 const struct skb_mstamp *xmit_time)
1218{
1219 struct tcp_sock *tp = tcp_sk(sk);
1220 int fack_count = state->fack_count;
1221
1222
1223 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1224 if (tp->undo_marker && tp->undo_retrans > 0 &&
1225 after(end_seq, tp->undo_marker))
1226 tp->undo_retrans--;
1227 if (sacked & TCPCB_SACKED_ACKED)
1228 state->reord = min(fack_count, state->reord);
1229 }
1230
1231
1232 if (!after(end_seq, tp->snd_una))
1233 return sacked;
1234
1235 if (!(sacked & TCPCB_SACKED_ACKED)) {
1236 if (sacked & TCPCB_SACKED_RETRANS) {
1237
1238
1239
1240
1241 if (sacked & TCPCB_LOST) {
1242 sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1243 tp->lost_out -= pcount;
1244 tp->retrans_out -= pcount;
1245 }
1246 } else {
1247 if (!(sacked & TCPCB_RETRANS)) {
1248
1249
1250
1251 if (before(start_seq,
1252 tcp_highest_sack_seq(tp)))
1253 state->reord = min(fack_count,
1254 state->reord);
1255 if (!after(end_seq, tp->high_seq))
1256 state->flag |= FLAG_ORIG_SACK_ACKED;
1257
1258 if (state->rtt_us < 0) {
1259 struct skb_mstamp now;
1260
1261 skb_mstamp_get(&now);
1262 state->rtt_us = skb_mstamp_us_delta(&now,
1263 xmit_time);
1264 }
1265 }
1266
1267 if (sacked & TCPCB_LOST) {
1268 sacked &= ~TCPCB_LOST;
1269 tp->lost_out -= pcount;
1270 }
1271 }
1272
1273 sacked |= TCPCB_SACKED_ACKED;
1274 state->flag |= FLAG_DATA_SACKED;
1275 tp->sacked_out += pcount;
1276
1277 fack_count += pcount;
1278
1279
1280 if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
1281 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
1282 tp->lost_cnt_hint += pcount;
1283
1284 if (fack_count > tp->fackets_out)
1285 tp->fackets_out = fack_count;
1286 }
1287
1288
1289
1290
1291
1292 if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
1293 sacked &= ~TCPCB_SACKED_RETRANS;
1294 tp->retrans_out -= pcount;
1295 }
1296
1297 return sacked;
1298}
1299
1300
1301
1302
1303static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
1304 struct sk_buff *skb,
1305 struct tcp_sacktag_state *state,
1306 unsigned int pcount, int shifted, int mss,
1307 bool dup_sack)
1308{
1309 struct tcp_sock *tp = tcp_sk(sk);
1310 u32 start_seq = TCP_SKB_CB(skb)->seq;
1311 u32 end_seq = start_seq + shifted;
1312
1313 BUG_ON(!pcount);
1314
1315
1316
1317
1318
1319
1320
1321 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1322 start_seq, end_seq, dup_sack, pcount,
1323 &skb->skb_mstamp);
1324
1325 if (skb == tp->lost_skb_hint)
1326 tp->lost_cnt_hint += pcount;
1327
1328 TCP_SKB_CB(prev)->end_seq += shifted;
1329 TCP_SKB_CB(skb)->seq += shifted;
1330
1331 skb_shinfo(prev)->gso_segs += pcount;
1332 WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
1333 skb_shinfo(skb)->gso_segs -= pcount;
1334
1335
1336
1337
1338
1339
1340 if (!skb_shinfo(prev)->gso_size) {
1341 skb_shinfo(prev)->gso_size = mss;
1342 skb_shinfo(prev)->gso_type = sk->sk_gso_type;
1343 }
1344
1345
1346 if (skb_shinfo(skb)->gso_segs <= 1) {
1347 skb_shinfo(skb)->gso_size = 0;
1348 skb_shinfo(skb)->gso_type = 0;
1349 }
1350
1351
1352 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
1353
1354 if (skb->len > 0) {
1355 BUG_ON(!tcp_skb_pcount(skb));
1356 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED);
1357 return false;
1358 }
1359
1360
1361
1362 if (skb == tp->retransmit_skb_hint)
1363 tp->retransmit_skb_hint = prev;
1364 if (skb == tp->lost_skb_hint) {
1365 tp->lost_skb_hint = prev;
1366 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1367 }
1368
1369 TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1370 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1371 TCP_SKB_CB(prev)->end_seq++;
1372
1373 if (skb == tcp_highest_sack(sk))
1374 tcp_advance_highest_sack(sk, skb);
1375
1376 tcp_unlink_write_queue(skb, sk);
1377 sk_wmem_free_skb(sk, skb);
1378
1379 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED);
1380
1381 return true;
1382}
1383
1384
1385
1386
1387static int tcp_skb_seglen(const struct sk_buff *skb)
1388{
1389 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
1390}
1391
1392
1393static int skb_can_shift(const struct sk_buff *skb)
1394{
1395 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1396}
1397
1398int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from,
1399 int pcount, int shiftlen)
1400{
1401
1402
1403
1404
1405
1406 if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE))
1407 return 0;
1408 if (unlikely(tcp_skb_pcount(to) + pcount > 65535))
1409 return 0;
1410 return skb_shift(to, from, shiftlen);
1411}
1412
1413
1414
1415
1416static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1417 struct tcp_sacktag_state *state,
1418 u32 start_seq, u32 end_seq,
1419 bool dup_sack)
1420{
1421 struct tcp_sock *tp = tcp_sk(sk);
1422 struct sk_buff *prev;
1423 int mss;
1424 int pcount = 0;
1425 int len;
1426 int in_sack;
1427
1428 if (!sk_can_gso(sk))
1429 goto fallback;
1430
1431
1432 if (!dup_sack &&
1433 (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
1434 goto fallback;
1435 if (!skb_can_shift(skb))
1436 goto fallback;
1437
1438 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1439 goto fallback;
1440
1441
1442 if (unlikely(skb == tcp_write_queue_head(sk)))
1443 goto fallback;
1444 prev = tcp_write_queue_prev(sk, skb);
1445
1446 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1447 goto fallback;
1448
1449 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1450 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1451
1452 if (in_sack) {
1453 len = skb->len;
1454 pcount = tcp_skb_pcount(skb);
1455 mss = tcp_skb_seglen(skb);
1456
1457
1458
1459
1460 if (mss != tcp_skb_seglen(prev))
1461 goto fallback;
1462 } else {
1463 if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
1464 goto noop;
1465
1466
1467
1468
1469 if (tcp_skb_pcount(skb) <= 1)
1470 goto noop;
1471
1472 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1473 if (!in_sack) {
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485 goto fallback;
1486 }
1487
1488 len = end_seq - TCP_SKB_CB(skb)->seq;
1489 BUG_ON(len < 0);
1490 BUG_ON(len > skb->len);
1491
1492
1493
1494
1495
1496 mss = tcp_skb_mss(skb);
1497
1498
1499
1500
1501 if (mss != tcp_skb_seglen(prev))
1502 goto fallback;
1503
1504 if (len == mss) {
1505 pcount = 1;
1506 } else if (len < mss) {
1507 goto noop;
1508 } else {
1509 pcount = len / mss;
1510 len = pcount * mss;
1511 }
1512 }
1513
1514
1515 if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
1516 goto fallback;
1517
1518 if (!tcp_skb_shift(prev, skb, pcount, len))
1519 goto fallback;
1520 if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
1521 goto out;
1522
1523
1524
1525
1526 if (prev == tcp_write_queue_tail(sk))
1527 goto out;
1528 skb = tcp_write_queue_next(sk, prev);
1529
1530 if (!skb_can_shift(skb) ||
1531 (skb == tcp_send_head(sk)) ||
1532 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1533 (mss != tcp_skb_seglen(skb)))
1534 goto out;
1535
1536 len = skb->len;
1537 if (tcp_skb_shift(prev, skb, tcp_skb_pcount(skb), len)) {
1538 pcount += tcp_skb_pcount(skb);
1539 tcp_shifted_skb(sk, prev, skb, state, tcp_skb_pcount(skb),
1540 len, mss, 0);
1541 }
1542out:
1543 state->fack_count += pcount;
1544 return prev;
1545
1546noop:
1547 return skb;
1548
1549fallback:
1550 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
1551 return NULL;
1552}
1553
1554static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1555 struct tcp_sack_block *next_dup,
1556 struct tcp_sacktag_state *state,
1557 u32 start_seq, u32 end_seq,
1558 bool dup_sack_in)
1559{
1560 struct tcp_sock *tp = tcp_sk(sk);
1561 struct sk_buff *tmp;
1562
1563 tcp_for_write_queue_from(skb, sk) {
1564 int in_sack = 0;
1565 bool dup_sack = dup_sack_in;
1566
1567 if (skb == tcp_send_head(sk))
1568 break;
1569
1570
1571 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1572 break;
1573
1574 if ((next_dup != NULL) &&
1575 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
1576 in_sack = tcp_match_skb_to_sack(sk, skb,
1577 next_dup->start_seq,
1578 next_dup->end_seq);
1579 if (in_sack > 0)
1580 dup_sack = true;
1581 }
1582
1583
1584
1585
1586
1587 if (in_sack <= 0) {
1588 tmp = tcp_shift_skb_data(sk, skb, state,
1589 start_seq, end_seq, dup_sack);
1590 if (tmp != NULL) {
1591 if (tmp != skb) {
1592 skb = tmp;
1593 continue;
1594 }
1595
1596 in_sack = 0;
1597 } else {
1598 in_sack = tcp_match_skb_to_sack(sk, skb,
1599 start_seq,
1600 end_seq);
1601 }
1602 }
1603
1604 if (unlikely(in_sack < 0))
1605 break;
1606
1607 if (in_sack) {
1608 TCP_SKB_CB(skb)->sacked =
1609 tcp_sacktag_one(sk,
1610 state,
1611 TCP_SKB_CB(skb)->sacked,
1612 TCP_SKB_CB(skb)->seq,
1613 TCP_SKB_CB(skb)->end_seq,
1614 dup_sack,
1615 tcp_skb_pcount(skb),
1616 &skb->skb_mstamp);
1617
1618 if (!before(TCP_SKB_CB(skb)->seq,
1619 tcp_highest_sack_seq(tp)))
1620 tcp_advance_highest_sack(sk, skb);
1621 }
1622
1623 state->fack_count += tcp_skb_pcount(skb);
1624 }
1625 return skb;
1626}
1627
1628
1629
1630
1631static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1632 struct tcp_sacktag_state *state,
1633 u32 skip_to_seq)
1634{
1635 tcp_for_write_queue_from(skb, sk) {
1636 if (skb == tcp_send_head(sk))
1637 break;
1638
1639 if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
1640 break;
1641
1642 state->fack_count += tcp_skb_pcount(skb);
1643 }
1644 return skb;
1645}
1646
1647static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1648 struct sock *sk,
1649 struct tcp_sack_block *next_dup,
1650 struct tcp_sacktag_state *state,
1651 u32 skip_to_seq)
1652{
1653 if (next_dup == NULL)
1654 return skb;
1655
1656 if (before(next_dup->start_seq, skip_to_seq)) {
1657 skb = tcp_sacktag_skip(skb, sk, state, next_dup->start_seq);
1658 skb = tcp_sacktag_walk(skb, sk, NULL, state,
1659 next_dup->start_seq, next_dup->end_seq,
1660 1);
1661 }
1662
1663 return skb;
1664}
1665
1666static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
1667{
1668 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1669}
1670
1671static int
1672tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1673 u32 prior_snd_una, long *sack_rtt_us)
1674{
1675 struct tcp_sock *tp = tcp_sk(sk);
1676 const unsigned char *ptr = (skb_transport_header(ack_skb) +
1677 TCP_SKB_CB(ack_skb)->sacked);
1678 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1679 struct tcp_sack_block sp[TCP_NUM_SACKS];
1680 struct tcp_sack_block *cache;
1681 struct tcp_sacktag_state state;
1682 struct sk_buff *skb;
1683 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1684 int used_sacks;
1685 bool found_dup_sack = false;
1686 int i, j;
1687 int first_sack_index;
1688
1689 state.flag = 0;
1690 state.reord = tp->packets_out;
1691 state.rtt_us = -1L;
1692
1693 if (!tp->sacked_out) {
1694 if (WARN_ON(tp->fackets_out))
1695 tp->fackets_out = 0;
1696 tcp_highest_sack_reset(sk);
1697 }
1698
1699 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1700 num_sacks, prior_snd_una);
1701 if (found_dup_sack)
1702 state.flag |= FLAG_DSACKING_ACK;
1703
1704
1705
1706
1707
1708 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
1709 return 0;
1710
1711 if (!tp->packets_out)
1712 goto out;
1713
1714 used_sacks = 0;
1715 first_sack_index = 0;
1716 for (i = 0; i < num_sacks; i++) {
1717 bool dup_sack = !i && found_dup_sack;
1718
1719 sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
1720 sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
1721
1722 if (!tcp_is_sackblock_valid(tp, dup_sack,
1723 sp[used_sacks].start_seq,
1724 sp[used_sacks].end_seq)) {
1725 int mib_idx;
1726
1727 if (dup_sack) {
1728 if (!tp->undo_marker)
1729 mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
1730 else
1731 mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
1732 } else {
1733
1734 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1735 !after(sp[used_sacks].end_seq, tp->snd_una))
1736 continue;
1737 mib_idx = LINUX_MIB_TCPSACKDISCARD;
1738 }
1739
1740 NET_INC_STATS_BH(sock_net(sk), mib_idx);
1741 if (i == 0)
1742 first_sack_index = -1;
1743 continue;
1744 }
1745
1746
1747 if (!after(sp[used_sacks].end_seq, prior_snd_una))
1748 continue;
1749
1750 used_sacks++;
1751 }
1752
1753
1754 for (i = used_sacks - 1; i > 0; i--) {
1755 for (j = 0; j < i; j++) {
1756 if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
1757 swap(sp[j], sp[j + 1]);
1758
1759
1760 if (j == first_sack_index)
1761 first_sack_index = j + 1;
1762 }
1763 }
1764 }
1765
1766 skb = tcp_write_queue_head(sk);
1767 state.fack_count = 0;
1768 i = 0;
1769
1770 if (!tp->sacked_out) {
1771
1772 cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1773 } else {
1774 cache = tp->recv_sack_cache;
1775
1776 while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
1777 !cache->end_seq)
1778 cache++;
1779 }
1780
1781 while (i < used_sacks) {
1782 u32 start_seq = sp[i].start_seq;
1783 u32 end_seq = sp[i].end_seq;
1784 bool dup_sack = (found_dup_sack && (i == first_sack_index));
1785 struct tcp_sack_block *next_dup = NULL;
1786
1787 if (found_dup_sack && ((i + 1) == first_sack_index))
1788 next_dup = &sp[i + 1];
1789
1790
1791 while (tcp_sack_cache_ok(tp, cache) &&
1792 !before(start_seq, cache->end_seq))
1793 cache++;
1794
1795
1796 if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
1797 after(end_seq, cache->start_seq)) {
1798
1799
1800 if (before(start_seq, cache->start_seq)) {
1801 skb = tcp_sacktag_skip(skb, sk, &state,
1802 start_seq);
1803 skb = tcp_sacktag_walk(skb, sk, next_dup,
1804 &state,
1805 start_seq,
1806 cache->start_seq,
1807 dup_sack);
1808 }
1809
1810
1811 if (!after(end_seq, cache->end_seq))
1812 goto advance_sp;
1813
1814 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1815 &state,
1816 cache->end_seq);
1817
1818
1819 if (tcp_highest_sack_seq(tp) == cache->end_seq) {
1820
1821 skb = tcp_highest_sack(sk);
1822 if (skb == NULL)
1823 break;
1824 state.fack_count = tp->fackets_out;
1825 cache++;
1826 goto walk;
1827 }
1828
1829 skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq);
1830
1831 cache++;
1832 continue;
1833 }
1834
1835 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
1836 skb = tcp_highest_sack(sk);
1837 if (skb == NULL)
1838 break;
1839 state.fack_count = tp->fackets_out;
1840 }
1841 skb = tcp_sacktag_skip(skb, sk, &state, start_seq);
1842
1843walk:
1844 skb = tcp_sacktag_walk(skb, sk, next_dup, &state,
1845 start_seq, end_seq, dup_sack);
1846
1847advance_sp:
1848 i++;
1849 }
1850
1851
1852 for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
1853 tp->recv_sack_cache[i].start_seq = 0;
1854 tp->recv_sack_cache[i].end_seq = 0;
1855 }
1856 for (j = 0; j < used_sacks; j++)
1857 tp->recv_sack_cache[i++] = sp[j];
1858
1859 tcp_mark_lost_retrans(sk);
1860
1861 tcp_verify_left_out(tp);
1862
1863 if ((state.reord < tp->fackets_out) &&
1864 ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
1865 tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
1866
1867out:
1868
1869#if FASTRETRANS_DEBUG > 0
1870 WARN_ON((int)tp->sacked_out < 0);
1871 WARN_ON((int)tp->lost_out < 0);
1872 WARN_ON((int)tp->retrans_out < 0);
1873 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1874#endif
1875 *sack_rtt_us = state.rtt_us;
1876 return state.flag;
1877}
1878
1879
1880
1881
1882static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
1883{
1884 u32 holes;
1885
1886 holes = max(tp->lost_out, 1U);
1887 holes = min(holes, tp->packets_out);
1888
1889 if ((tp->sacked_out + holes) > tp->packets_out) {
1890 tp->sacked_out = tp->packets_out - holes;
1891 return true;
1892 }
1893 return false;
1894}
1895
1896
1897
1898
1899
1900static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1901{
1902 struct tcp_sock *tp = tcp_sk(sk);
1903 if (tcp_limit_reno_sacked(tp))
1904 tcp_update_reordering(sk, tp->packets_out + addend, 0);
1905}
1906
1907
1908
1909static void tcp_add_reno_sack(struct sock *sk)
1910{
1911 struct tcp_sock *tp = tcp_sk(sk);
1912 tp->sacked_out++;
1913 tcp_check_reno_reordering(sk, 0);
1914 tcp_verify_left_out(tp);
1915}
1916
1917
1918
1919static void tcp_remove_reno_sacks(struct sock *sk, int acked)
1920{
1921 struct tcp_sock *tp = tcp_sk(sk);
1922
1923 if (acked > 0) {
1924
1925 if (acked - 1 >= tp->sacked_out)
1926 tp->sacked_out = 0;
1927 else
1928 tp->sacked_out -= acked - 1;
1929 }
1930 tcp_check_reno_reordering(sk, acked);
1931 tcp_verify_left_out(tp);
1932}
1933
1934static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1935{
1936 tp->sacked_out = 0;
1937}
1938
1939void tcp_clear_retrans(struct tcp_sock *tp)
1940{
1941 tp->retrans_out = 0;
1942 tp->lost_out = 0;
1943 tp->undo_marker = 0;
1944 tp->undo_retrans = -1;
1945 tp->fackets_out = 0;
1946 tp->sacked_out = 0;
1947}
1948
1949static inline void tcp_init_undo(struct tcp_sock *tp)
1950{
1951 tp->undo_marker = tp->snd_una;
1952
1953 tp->undo_retrans = tp->retrans_out ? : -1;
1954}
1955
1956
1957
1958
1959
1960void tcp_enter_loss(struct sock *sk)
1961{
1962 const struct inet_connection_sock *icsk = inet_csk(sk);
1963 struct tcp_sock *tp = tcp_sk(sk);
1964 struct sk_buff *skb;
1965 bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
1966 bool is_reneg;
1967
1968
1969 if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
1970 !after(tp->high_seq, tp->snd_una) ||
1971 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
1972 tp->prior_ssthresh = tcp_current_ssthresh(sk);
1973 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1974 tcp_ca_event(sk, CA_EVENT_LOSS);
1975 tcp_init_undo(tp);
1976 }
1977 tp->snd_cwnd = 1;
1978 tp->snd_cwnd_cnt = 0;
1979 tp->snd_cwnd_stamp = tcp_time_stamp;
1980
1981 tp->retrans_out = 0;
1982 tp->lost_out = 0;
1983
1984 if (tcp_is_reno(tp))
1985 tcp_reset_reno_sack(tp);
1986
1987 skb = tcp_write_queue_head(sk);
1988 is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
1989 if (is_reneg) {
1990 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
1991 tp->sacked_out = 0;
1992 tp->fackets_out = 0;
1993 }
1994 tcp_clear_all_retrans_hints(tp);
1995
1996 tcp_for_write_queue(skb, sk) {
1997 if (skb == tcp_send_head(sk))
1998 break;
1999
2000 TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
2001 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) {
2002 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
2003 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
2004 tp->lost_out += tcp_skb_pcount(skb);
2005 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
2006 }
2007 }
2008 tcp_verify_left_out(tp);
2009
2010
2011
2012
2013 if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
2014 tp->sacked_out >= sysctl_tcp_reordering)
2015 tp->reordering = min_t(unsigned int, tp->reordering,
2016 sysctl_tcp_reordering);
2017 tcp_set_ca_state(sk, TCP_CA_Loss);
2018 tp->high_seq = tp->snd_nxt;
2019 TCP_ECN_queue_cwr(tp);
2020
2021
2022
2023
2024
2025 tp->frto = sysctl_tcp_frto &&
2026 (new_recovery || icsk->icsk_retransmits) &&
2027 !inet_csk(sk)->icsk_mtup.probe_size;
2028}
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040static bool tcp_check_sack_reneging(struct sock *sk, int flag)
2041{
2042 if (flag & FLAG_SACK_RENEGING) {
2043 struct tcp_sock *tp = tcp_sk(sk);
2044 unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
2045 msecs_to_jiffies(10));
2046
2047 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2048 delay, TCP_RTO_MAX);
2049 return true;
2050 }
2051 return false;
2052}
2053
2054static inline int tcp_fackets_out(const struct tcp_sock *tp)
2055{
2056 return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
2057}
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
2075{
2076 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
2077}
2078
2079static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
2080{
2081 struct tcp_sock *tp = tcp_sk(sk);
2082 unsigned long delay;
2083
2084
2085
2086
2087
2088 if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 ||
2089 (flag & FLAG_ECE) || !tp->srtt_us)
2090 return false;
2091
2092 delay = max(usecs_to_jiffies(tp->srtt_us >> 5),
2093 msecs_to_jiffies(2));
2094
2095 if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
2096 return false;
2097
2098 inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay,
2099 TCP_RTO_MAX);
2100 return true;
2101}
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196static bool tcp_time_to_recover(struct sock *sk, int flag)
2197{
2198 struct tcp_sock *tp = tcp_sk(sk);
2199 __u32 packets_out;
2200
2201
2202 if (tp->lost_out)
2203 return true;
2204
2205
2206 if (tcp_dupack_heuristics(tp) > tp->reordering)
2207 return true;
2208
2209
2210
2211
2212 packets_out = tp->packets_out;
2213 if (packets_out <= tp->reordering &&
2214 tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
2215 !tcp_may_send_now(sk)) {
2216
2217
2218
2219 return true;
2220 }
2221
2222
2223
2224
2225
2226
2227 if ((tp->thin_dupack || sysctl_tcp_thin_dupack) &&
2228 tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 &&
2229 tcp_is_sack(tp) && !tcp_send_head(sk))
2230 return true;
2231
2232
2233
2234
2235
2236
2237 if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
2238 (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) &&
2239 !tcp_may_send_now(sk))
2240 return !tcp_pause_early_retransmit(sk, flag);
2241
2242 return false;
2243}
2244
2245
2246
2247
2248
2249
2250
2251static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2252{
2253 struct tcp_sock *tp = tcp_sk(sk);
2254 struct sk_buff *skb;
2255 int cnt, oldcnt, lost;
2256 unsigned int mss;
2257
2258 const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
2259
2260 WARN_ON(packets > tp->packets_out);
2261 if (tp->lost_skb_hint) {
2262 skb = tp->lost_skb_hint;
2263 cnt = tp->lost_cnt_hint;
2264
2265 if (mark_head && skb != tcp_write_queue_head(sk))
2266 return;
2267 } else {
2268 skb = tcp_write_queue_head(sk);
2269 cnt = 0;
2270 }
2271
2272 tcp_for_write_queue_from(skb, sk) {
2273 if (skb == tcp_send_head(sk))
2274 break;
2275
2276
2277 tp->lost_skb_hint = skb;
2278 tp->lost_cnt_hint = cnt;
2279
2280 if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
2281 break;
2282
2283 oldcnt = cnt;
2284 if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
2285 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2286 cnt += tcp_skb_pcount(skb);
2287
2288 if (cnt > packets) {
2289 if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
2290 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
2291 (oldcnt >= packets))
2292 break;
2293
2294 mss = skb_shinfo(skb)->gso_size;
2295
2296 lost = (packets - oldcnt) * mss;
2297 if (lost < skb->len &&
2298 tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, lost, mss) < 0)
2299 break;
2300 cnt = packets;
2301 }
2302
2303 tcp_skb_mark_lost(tp, skb);
2304
2305 if (mark_head)
2306 break;
2307 }
2308 tcp_verify_left_out(tp);
2309}
2310
2311
2312
2313static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2314{
2315 struct tcp_sock *tp = tcp_sk(sk);
2316
2317 if (tcp_is_reno(tp)) {
2318 tcp_mark_head_lost(sk, 1, 1);
2319 } else if (tcp_is_fack(tp)) {
2320 int lost = tp->fackets_out - tp->reordering;
2321 if (lost <= 0)
2322 lost = 1;
2323 tcp_mark_head_lost(sk, lost, 0);
2324 } else {
2325 int sacked_upto = tp->sacked_out - tp->reordering;
2326 if (sacked_upto >= 0)
2327 tcp_mark_head_lost(sk, sacked_upto, 0);
2328 else if (fast_rexmit)
2329 tcp_mark_head_lost(sk, 1, 1);
2330 }
2331}
2332
2333
2334
2335
2336static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
2337{
2338 tp->snd_cwnd = min(tp->snd_cwnd,
2339 tcp_packets_in_flight(tp) + tcp_max_burst(tp));
2340 tp->snd_cwnd_stamp = tcp_time_stamp;
2341}
2342
2343
2344
2345
2346static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
2347{
2348 return !tp->retrans_stamp ||
2349 (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2350 before(tp->rx_opt.rcv_tsecr, tp->retrans_stamp));
2351}
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369static bool tcp_any_retrans_done(const struct sock *sk)
2370{
2371 const struct tcp_sock *tp = tcp_sk(sk);
2372 struct sk_buff *skb;
2373
2374 if (tp->retrans_out)
2375 return true;
2376
2377 skb = tcp_write_queue_head(sk);
2378 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2379 return true;
2380
2381 return false;
2382}
2383
2384#if FASTRETRANS_DEBUG > 1
2385static void DBGUNDO(struct sock *sk, const char *msg)
2386{
2387 struct tcp_sock *tp = tcp_sk(sk);
2388 struct inet_sock *inet = inet_sk(sk);
2389
2390 if (sk->sk_family == AF_INET) {
2391 pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2392 msg,
2393 &inet->inet_daddr, ntohs(inet->inet_dport),
2394 tp->snd_cwnd, tcp_left_out(tp),
2395 tp->snd_ssthresh, tp->prior_ssthresh,
2396 tp->packets_out);
2397 }
2398#if IS_ENABLED(CONFIG_IPV6)
2399 else if (sk->sk_family == AF_INET6) {
2400 struct ipv6_pinfo *np = inet6_sk(sk);
2401 pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2402 msg,
2403 &np->daddr, ntohs(inet->inet_dport),
2404 tp->snd_cwnd, tcp_left_out(tp),
2405 tp->snd_ssthresh, tp->prior_ssthresh,
2406 tp->packets_out);
2407 }
2408#endif
2409}
2410#else
2411#define DBGUNDO(x...) do { } while (0)
2412#endif
2413
2414static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
2415{
2416 struct tcp_sock *tp = tcp_sk(sk);
2417
2418 if (unmark_loss) {
2419 struct sk_buff *skb;
2420
2421 tcp_for_write_queue(skb, sk) {
2422 if (skb == tcp_send_head(sk))
2423 break;
2424 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2425 }
2426 tp->lost_out = 0;
2427 tcp_clear_all_retrans_hints(tp);
2428 }
2429
2430 if (tp->prior_ssthresh) {
2431 const struct inet_connection_sock *icsk = inet_csk(sk);
2432
2433 if (icsk->icsk_ca_ops->undo_cwnd)
2434 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
2435 else
2436 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
2437
2438 if (tp->prior_ssthresh > tp->snd_ssthresh) {
2439 tp->snd_ssthresh = tp->prior_ssthresh;
2440 TCP_ECN_withdraw_cwr(tp);
2441 }
2442 } else {
2443 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
2444 }
2445 tp->snd_cwnd_stamp = tcp_time_stamp;
2446 tp->undo_marker = 0;
2447}
2448
2449static inline bool tcp_may_undo(const struct tcp_sock *tp)
2450{
2451 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2452}
2453
2454
2455static bool tcp_try_undo_recovery(struct sock *sk)
2456{
2457 struct tcp_sock *tp = tcp_sk(sk);
2458
2459 if (tcp_may_undo(tp)) {
2460 int mib_idx;
2461
2462
2463
2464
2465 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2466 tcp_undo_cwnd_reduction(sk, false);
2467 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2468 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2469 else
2470 mib_idx = LINUX_MIB_TCPFULLUNDO;
2471
2472 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2473 }
2474 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2475
2476
2477
2478 tcp_moderate_cwnd(tp);
2479 if (!tcp_any_retrans_done(sk))
2480 tp->retrans_stamp = 0;
2481 return true;
2482 }
2483 tcp_set_ca_state(sk, TCP_CA_Open);
2484 return false;
2485}
2486
2487
2488static bool tcp_try_undo_dsack(struct sock *sk)
2489{
2490 struct tcp_sock *tp = tcp_sk(sk);
2491
2492 if (tp->undo_marker && !tp->undo_retrans) {
2493 DBGUNDO(sk, "D-SACK");
2494 tcp_undo_cwnd_reduction(sk, false);
2495 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2496 return true;
2497 }
2498 return false;
2499}
2500
2501
2502static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
2503{
2504 struct tcp_sock *tp = tcp_sk(sk);
2505
2506 if (frto_undo || tcp_may_undo(tp)) {
2507 tcp_undo_cwnd_reduction(sk, true);
2508
2509 DBGUNDO(sk, "partial loss");
2510 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2511 if (frto_undo)
2512 NET_INC_STATS_BH(sock_net(sk),
2513 LINUX_MIB_TCPSPURIOUSRTOS);
2514 inet_csk(sk)->icsk_retransmits = 0;
2515 if (frto_undo || tcp_is_sack(tp))
2516 tcp_set_ca_state(sk, TCP_CA_Open);
2517 return true;
2518 }
2519 return false;
2520}
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532static void tcp_init_cwnd_reduction(struct sock *sk)
2533{
2534 struct tcp_sock *tp = tcp_sk(sk);
2535
2536 tp->high_seq = tp->snd_nxt;
2537 tp->tlp_high_seq = 0;
2538 tp->snd_cwnd_cnt = 0;
2539 tp->prior_cwnd = tp->snd_cwnd;
2540 tp->prr_delivered = 0;
2541 tp->prr_out = 0;
2542 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
2543 TCP_ECN_queue_cwr(tp);
2544}
2545
2546static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
2547 int fast_rexmit)
2548{
2549 struct tcp_sock *tp = tcp_sk(sk);
2550 int sndcnt = 0;
2551 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2552 int newly_acked_sacked = prior_unsacked -
2553 (tp->packets_out - tp->sacked_out);
2554
2555 tp->prr_delivered += newly_acked_sacked;
2556 if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
2557 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2558 tp->prior_cwnd - 1;
2559 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2560 } else {
2561 sndcnt = min_t(int, delta,
2562 max_t(int, tp->prr_delivered - tp->prr_out,
2563 newly_acked_sacked) + 1);
2564 }
2565
2566 sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
2567 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2568}
2569
2570static inline void tcp_end_cwnd_reduction(struct sock *sk)
2571{
2572 struct tcp_sock *tp = tcp_sk(sk);
2573
2574
2575 if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
2576 (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
2577 tp->snd_cwnd = tp->snd_ssthresh;
2578 tp->snd_cwnd_stamp = tcp_time_stamp;
2579 }
2580 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2581}
2582
2583
2584void tcp_enter_cwr(struct sock *sk)
2585{
2586 struct tcp_sock *tp = tcp_sk(sk);
2587
2588 tp->prior_ssthresh = 0;
2589 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2590 tp->undo_marker = 0;
2591 tcp_init_cwnd_reduction(sk);
2592 tcp_set_ca_state(sk, TCP_CA_CWR);
2593 }
2594}
2595
2596static void tcp_try_keep_open(struct sock *sk)
2597{
2598 struct tcp_sock *tp = tcp_sk(sk);
2599 int state = TCP_CA_Open;
2600
2601 if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
2602 state = TCP_CA_Disorder;
2603
2604 if (inet_csk(sk)->icsk_ca_state != state) {
2605 tcp_set_ca_state(sk, state);
2606 tp->high_seq = tp->snd_nxt;
2607 }
2608}
2609
2610static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
2611{
2612 struct tcp_sock *tp = tcp_sk(sk);
2613
2614 tcp_verify_left_out(tp);
2615
2616 if (!tcp_any_retrans_done(sk))
2617 tp->retrans_stamp = 0;
2618
2619 if (flag & FLAG_ECE)
2620 tcp_enter_cwr(sk);
2621
2622 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2623 tcp_try_keep_open(sk);
2624 } else {
2625 tcp_cwnd_reduction(sk, prior_unsacked, 0);
2626 }
2627}
2628
2629static void tcp_mtup_probe_failed(struct sock *sk)
2630{
2631 struct inet_connection_sock *icsk = inet_csk(sk);
2632
2633 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2634 icsk->icsk_mtup.probe_size = 0;
2635}
2636
2637static void tcp_mtup_probe_success(struct sock *sk)
2638{
2639 struct tcp_sock *tp = tcp_sk(sk);
2640 struct inet_connection_sock *icsk = inet_csk(sk);
2641
2642
2643 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2644 tp->snd_cwnd = tp->snd_cwnd *
2645 tcp_mss_to_mtu(sk, tp->mss_cache) /
2646 icsk->icsk_mtup.probe_size;
2647 tp->snd_cwnd_cnt = 0;
2648 tp->snd_cwnd_stamp = tcp_time_stamp;
2649 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2650
2651 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2652 icsk->icsk_mtup.probe_size = 0;
2653 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2654}
2655
2656
2657
2658
2659
2660void tcp_simple_retransmit(struct sock *sk)
2661{
2662 const struct inet_connection_sock *icsk = inet_csk(sk);
2663 struct tcp_sock *tp = tcp_sk(sk);
2664 struct sk_buff *skb;
2665 unsigned int mss = tcp_current_mss(sk);
2666 u32 prior_lost = tp->lost_out;
2667
2668 tcp_for_write_queue(skb, sk) {
2669 if (skb == tcp_send_head(sk))
2670 break;
2671 if (tcp_skb_seglen(skb) > mss &&
2672 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2673 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2674 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2675 tp->retrans_out -= tcp_skb_pcount(skb);
2676 }
2677 tcp_skb_mark_lost_uncond_verify(tp, skb);
2678 }
2679 }
2680
2681 tcp_clear_retrans_hints_partial(tp);
2682
2683 if (prior_lost == tp->lost_out)
2684 return;
2685
2686 if (tcp_is_reno(tp))
2687 tcp_limit_reno_sacked(tp);
2688
2689 tcp_verify_left_out(tp);
2690
2691
2692
2693
2694
2695
2696 if (icsk->icsk_ca_state != TCP_CA_Loss) {
2697 tp->high_seq = tp->snd_nxt;
2698 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2699 tp->prior_ssthresh = 0;
2700 tp->undo_marker = 0;
2701 tcp_set_ca_state(sk, TCP_CA_Loss);
2702 }
2703 tcp_xmit_retransmit_queue(sk);
2704}
2705EXPORT_SYMBOL(tcp_simple_retransmit);
2706
2707static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2708{
2709 struct tcp_sock *tp = tcp_sk(sk);
2710 int mib_idx;
2711
2712 if (tcp_is_reno(tp))
2713 mib_idx = LINUX_MIB_TCPRENORECOVERY;
2714 else
2715 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
2716
2717 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2718
2719 tp->prior_ssthresh = 0;
2720 tcp_init_undo(tp);
2721
2722 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2723 if (!ece_ack)
2724 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2725 tcp_init_cwnd_reduction(sk);
2726 }
2727 tcp_set_ca_state(sk, TCP_CA_Recovery);
2728}
2729
2730
2731
2732
2733static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
2734{
2735 struct tcp_sock *tp = tcp_sk(sk);
2736 bool recovered = !before(tp->snd_una, tp->high_seq);
2737
2738 if (tp->frto) {
2739
2740
2741
2742 if (tcp_try_undo_loss(sk, flag & FLAG_ORIG_SACK_ACKED))
2743 return;
2744
2745 if (after(tp->snd_nxt, tp->high_seq) &&
2746 (flag & FLAG_DATA_SACKED || is_dupack)) {
2747 tp->frto = 0;
2748 } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
2749 tp->high_seq = tp->snd_nxt;
2750 __tcp_push_pending_frames(sk, tcp_current_mss(sk),
2751 TCP_NAGLE_OFF);
2752 if (after(tp->snd_nxt, tp->high_seq))
2753 return;
2754 tp->frto = 0;
2755 }
2756 }
2757
2758 if (recovered) {
2759
2760 tcp_try_undo_recovery(sk);
2761 return;
2762 }
2763 if (tcp_is_reno(tp)) {
2764
2765
2766
2767 if (after(tp->snd_nxt, tp->high_seq) && is_dupack)
2768 tcp_add_reno_sack(sk);
2769 else if (flag & FLAG_SND_UNA_ADVANCED)
2770 tcp_reset_reno_sack(tp);
2771 }
2772 if (tcp_try_undo_loss(sk, false))
2773 return;
2774 tcp_xmit_retransmit_queue(sk);
2775}
2776
2777
2778static bool tcp_try_undo_partial(struct sock *sk, const int acked,
2779 const int prior_unsacked)
2780{
2781 struct tcp_sock *tp = tcp_sk(sk);
2782
2783 if (tp->undo_marker && tcp_packet_delayed(tp)) {
2784
2785
2786
2787 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
2788
2789
2790
2791
2792
2793
2794 if (tp->retrans_out) {
2795 tcp_cwnd_reduction(sk, prior_unsacked, 0);
2796 return true;
2797 }
2798
2799 if (!tcp_any_retrans_done(sk))
2800 tp->retrans_stamp = 0;
2801
2802 DBGUNDO(sk, "partial recovery");
2803 tcp_undo_cwnd_reduction(sk, true);
2804 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2805 tcp_try_keep_open(sk);
2806 return true;
2807 }
2808 return false;
2809}
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2823 const int prior_unsacked,
2824 bool is_dupack, int flag)
2825{
2826 struct inet_connection_sock *icsk = inet_csk(sk);
2827 struct tcp_sock *tp = tcp_sk(sk);
2828 bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
2829 (tcp_fackets_out(tp) > tp->reordering));
2830 int fast_rexmit = 0;
2831
2832 if (WARN_ON(!tp->packets_out && tp->sacked_out))
2833 tp->sacked_out = 0;
2834 if (WARN_ON(!tp->sacked_out && tp->fackets_out))
2835 tp->fackets_out = 0;
2836
2837
2838
2839 if (flag & FLAG_ECE)
2840 tp->prior_ssthresh = 0;
2841
2842
2843 if (tcp_check_sack_reneging(sk, flag))
2844 return;
2845
2846
2847 tcp_verify_left_out(tp);
2848
2849
2850
2851 if (icsk->icsk_ca_state == TCP_CA_Open) {
2852 WARN_ON(tp->retrans_out != 0);
2853 tp->retrans_stamp = 0;
2854 } else if (!before(tp->snd_una, tp->high_seq)) {
2855 switch (icsk->icsk_ca_state) {
2856 case TCP_CA_CWR:
2857
2858
2859 if (tp->snd_una != tp->high_seq) {
2860 tcp_end_cwnd_reduction(sk);
2861 tcp_set_ca_state(sk, TCP_CA_Open);
2862 }
2863 break;
2864
2865 case TCP_CA_Recovery:
2866 if (tcp_is_reno(tp))
2867 tcp_reset_reno_sack(tp);
2868 if (tcp_try_undo_recovery(sk))
2869 return;
2870 tcp_end_cwnd_reduction(sk);
2871 break;
2872 }
2873 }
2874
2875
2876 switch (icsk->icsk_ca_state) {
2877 case TCP_CA_Recovery:
2878 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
2879 if (tcp_is_reno(tp) && is_dupack)
2880 tcp_add_reno_sack(sk);
2881 } else {
2882 if (tcp_try_undo_partial(sk, acked, prior_unsacked))
2883 return;
2884
2885 do_lost = tcp_is_reno(tp) ||
2886 tcp_fackets_out(tp) > tp->reordering;
2887 }
2888 if (tcp_try_undo_dsack(sk)) {
2889 tcp_try_keep_open(sk);
2890 return;
2891 }
2892 break;
2893 case TCP_CA_Loss:
2894 tcp_process_loss(sk, flag, is_dupack);
2895 if (icsk->icsk_ca_state != TCP_CA_Open)
2896 return;
2897
2898 default:
2899 if (tcp_is_reno(tp)) {
2900 if (flag & FLAG_SND_UNA_ADVANCED)
2901 tcp_reset_reno_sack(tp);
2902 if (is_dupack)
2903 tcp_add_reno_sack(sk);
2904 }
2905
2906 if (icsk->icsk_ca_state <= TCP_CA_Disorder)
2907 tcp_try_undo_dsack(sk);
2908
2909 if (!tcp_time_to_recover(sk, flag)) {
2910 tcp_try_to_open(sk, flag, prior_unsacked);
2911 return;
2912 }
2913
2914
2915 if (icsk->icsk_ca_state < TCP_CA_CWR &&
2916 icsk->icsk_mtup.probe_size &&
2917 tp->snd_una == tp->mtu_probe.probe_seq_start) {
2918 tcp_mtup_probe_failed(sk);
2919
2920 tp->snd_cwnd++;
2921 tcp_simple_retransmit(sk);
2922 return;
2923 }
2924
2925
2926 tcp_enter_recovery(sk, (flag & FLAG_ECE));
2927 fast_rexmit = 1;
2928 }
2929
2930 if (do_lost)
2931 tcp_update_scoreboard(sk, fast_rexmit);
2932 tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit);
2933 tcp_xmit_retransmit_queue(sk);
2934}
2935
2936static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
2937 long seq_rtt_us, long sack_rtt_us)
2938{
2939 const struct tcp_sock *tp = tcp_sk(sk);
2940
2941
2942
2943
2944
2945
2946 if (seq_rtt_us < 0)
2947 seq_rtt_us = sack_rtt_us;
2948
2949
2950
2951
2952
2953
2954
2955 if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2956 flag & FLAG_ACKED)
2957 seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - tp->rx_opt.rcv_tsecr);
2958
2959 if (seq_rtt_us < 0)
2960 return false;
2961
2962 tcp_rtt_estimator(sk, seq_rtt_us);
2963 tcp_set_rto(sk);
2964
2965
2966 inet_csk(sk)->icsk_backoff = 0;
2967 return true;
2968}
2969
2970
2971static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp)
2972{
2973 struct tcp_sock *tp = tcp_sk(sk);
2974 long seq_rtt_us = -1L;
2975
2976 if (synack_stamp && !tp->total_retrans)
2977 seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - synack_stamp);
2978
2979
2980
2981
2982 if (!tp->srtt_us)
2983 tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L);
2984}
2985
2986static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
2987{
2988 const struct inet_connection_sock *icsk = inet_csk(sk);
2989
2990 icsk->icsk_ca_ops->cong_avoid(sk, ack, acked);
2991 tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
2992}
2993
2994
2995
2996
2997void tcp_rearm_rto(struct sock *sk)
2998{
2999 const struct inet_connection_sock *icsk = inet_csk(sk);
3000 struct tcp_sock *tp = tcp_sk(sk);
3001
3002
3003
3004
3005 if (tp->fastopen_rsk)
3006 return;
3007
3008 if (!tp->packets_out) {
3009 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
3010 } else {
3011 u32 rto = inet_csk(sk)->icsk_rto;
3012
3013 if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
3014 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
3015 struct sk_buff *skb = tcp_write_queue_head(sk);
3016 const u32 rto_time_stamp =
3017 tcp_skb_timestamp(skb) + rto;
3018 s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);
3019
3020
3021
3022 rto = max_t(int, delta, 1);
3023 }
3024 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
3025 TCP_RTO_MAX);
3026 }
3027}
3028
3029
3030
3031
3032void tcp_resume_early_retransmit(struct sock *sk)
3033{
3034 struct tcp_sock *tp = tcp_sk(sk);
3035
3036 tcp_rearm_rto(sk);
3037
3038
3039 if (!tp->do_early_retrans)
3040 return;
3041
3042 tcp_enter_recovery(sk, false);
3043 tcp_update_scoreboard(sk, 1);
3044 tcp_xmit_retransmit_queue(sk);
3045}
3046
3047
3048static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
3049{
3050 struct tcp_sock *tp = tcp_sk(sk);
3051 u32 packets_acked;
3052
3053 BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
3054
3055 packets_acked = tcp_skb_pcount(skb);
3056 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
3057 return 0;
3058 packets_acked -= tcp_skb_pcount(skb);
3059
3060 if (packets_acked) {
3061 BUG_ON(tcp_skb_pcount(skb) == 0);
3062 BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
3063 }
3064
3065 return packets_acked;
3066}
3067
3068
3069
3070
3071
3072static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3073 u32 prior_snd_una, long sack_rtt_us)
3074{
3075 const struct inet_connection_sock *icsk = inet_csk(sk);
3076 struct skb_mstamp first_ackt, last_ackt, now;
3077 struct tcp_sock *tp = tcp_sk(sk);
3078 u32 prior_sacked = tp->sacked_out;
3079 u32 reord = tp->packets_out;
3080 bool fully_acked = true;
3081 bool rtt_update;
3082 long ca_seq_rtt_us = -1L;
3083 long seq_rtt_us = -1L;
3084 struct sk_buff *skb;
3085 u32 pkts_acked = 0;
3086 int flag = 0;
3087
3088 first_ackt.v64 = 0;
3089
3090 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
3091 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3092 u8 sacked = scb->sacked;
3093 u32 acked_pcount;
3094
3095
3096 if (after(scb->end_seq, tp->snd_una)) {
3097 if (tcp_skb_pcount(skb) == 1 ||
3098 !after(tp->snd_una, scb->seq))
3099 break;
3100
3101 acked_pcount = tcp_tso_acked(sk, skb);
3102 if (!acked_pcount)
3103 break;
3104
3105 fully_acked = false;
3106 } else {
3107 acked_pcount = tcp_skb_pcount(skb);
3108 }
3109
3110 if (sacked & TCPCB_RETRANS) {
3111 if (sacked & TCPCB_SACKED_RETRANS)
3112 tp->retrans_out -= acked_pcount;
3113 flag |= FLAG_RETRANS_DATA_ACKED;
3114 } else {
3115 last_ackt = skb->skb_mstamp;
3116 WARN_ON_ONCE(last_ackt.v64 == 0);
3117 if (!first_ackt.v64)
3118 first_ackt = last_ackt;
3119
3120 if (!(sacked & TCPCB_SACKED_ACKED)) {
3121 reord = min(pkts_acked, reord);
3122 if (!after(scb->end_seq, tp->high_seq))
3123 flag |= FLAG_ORIG_SACK_ACKED;
3124 }
3125 }
3126
3127 if (sacked & TCPCB_SACKED_ACKED)
3128 tp->sacked_out -= acked_pcount;
3129 if (sacked & TCPCB_LOST)
3130 tp->lost_out -= acked_pcount;
3131
3132 tp->packets_out -= acked_pcount;
3133 pkts_acked += acked_pcount;
3134
3135
3136
3137
3138
3139
3140
3141
3142 if (!(scb->tcp_flags & TCPHDR_SYN)) {
3143 flag |= FLAG_DATA_ACKED;
3144 } else {
3145 flag |= FLAG_SYN_ACKED;
3146 tp->retrans_stamp = 0;
3147 }
3148
3149 if (!fully_acked)
3150 break;
3151
3152 tcp_unlink_write_queue(skb, sk);
3153 sk_wmem_free_skb(sk, skb);
3154 if (skb == tp->retransmit_skb_hint)
3155 tp->retransmit_skb_hint = NULL;
3156 if (skb == tp->lost_skb_hint)
3157 tp->lost_skb_hint = NULL;
3158 }
3159
3160 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
3161 tp->snd_up = tp->snd_una;
3162
3163 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
3164 flag |= FLAG_SACK_RENEGING;
3165
3166 skb_mstamp_get(&now);
3167 if (first_ackt.v64 && !(flag & FLAG_RETRANS_DATA_ACKED)) {
3168 seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
3169 ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
3170 }
3171
3172 rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us);
3173
3174 if (flag & FLAG_ACKED) {
3175 const struct tcp_congestion_ops *ca_ops
3176 = inet_csk(sk)->icsk_ca_ops;
3177
3178 tcp_rearm_rto(sk);
3179 if (unlikely(icsk->icsk_mtup.probe_size &&
3180 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
3181 tcp_mtup_probe_success(sk);
3182 }
3183
3184 if (tcp_is_reno(tp)) {
3185 tcp_remove_reno_sacks(sk, pkts_acked);
3186
3187
3188
3189
3190
3191
3192
3193 if (flag & FLAG_RETRANS_DATA_ACKED)
3194 flag &= ~FLAG_ORIG_SACK_ACKED;
3195 } else {
3196 int delta;
3197
3198
3199 if (reord < prior_fackets && reord <= tp->fackets_out)
3200 tcp_update_reordering(sk, tp->fackets_out - reord, 0);
3201
3202 delta = tcp_is_fack(tp) ? pkts_acked :
3203 prior_sacked - tp->sacked_out;
3204 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3205 }
3206
3207 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
3208
3209 if (ca_ops->pkts_acked)
3210 ca_ops->pkts_acked(sk, pkts_acked, ca_seq_rtt_us);
3211
3212 } else if (skb && rtt_update && sack_rtt_us >= 0 &&
3213 sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
3214
3215
3216
3217
3218 tcp_rearm_rto(sk);
3219 }
3220
3221#if FASTRETRANS_DEBUG > 0
3222 WARN_ON((int)tp->sacked_out < 0);
3223 WARN_ON((int)tp->lost_out < 0);
3224 WARN_ON((int)tp->retrans_out < 0);
3225 if (!tp->packets_out && tcp_is_sack(tp)) {
3226 icsk = inet_csk(sk);
3227 if (tp->lost_out) {
3228 pr_debug("Leak l=%u %d\n",
3229 tp->lost_out, icsk->icsk_ca_state);
3230 tp->lost_out = 0;
3231 }
3232 if (tp->sacked_out) {
3233 pr_debug("Leak s=%u %d\n",
3234 tp->sacked_out, icsk->icsk_ca_state);
3235 tp->sacked_out = 0;
3236 }
3237 if (tp->retrans_out) {
3238 pr_debug("Leak r=%u %d\n",
3239 tp->retrans_out, icsk->icsk_ca_state);
3240 tp->retrans_out = 0;
3241 }
3242 }
3243#endif
3244 return flag;
3245}
3246
3247static void tcp_ack_probe(struct sock *sk)
3248{
3249 const struct tcp_sock *tp = tcp_sk(sk);
3250 struct inet_connection_sock *icsk = inet_csk(sk);
3251
3252
3253
3254 if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
3255 icsk->icsk_backoff = 0;
3256 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
3257
3258
3259
3260 } else {
3261 unsigned long when = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
3262
3263 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3264 when, TCP_RTO_MAX);
3265 }
3266}
3267
3268static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
3269{
3270 return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3271 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3272}
3273
3274
3275static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3276{
3277 if (tcp_in_cwnd_reduction(sk))
3278 return false;
3279
3280
3281
3282
3283
3284
3285
3286 if (tcp_sk(sk)->reordering > sysctl_tcp_reordering)
3287 return flag & FLAG_FORWARD_PROGRESS;
3288
3289 return flag & FLAG_DATA_ACKED;
3290}
3291
3292
3293
3294
3295static inline bool tcp_may_update_window(const struct tcp_sock *tp,
3296 const u32 ack, const u32 ack_seq,
3297 const u32 nwin)
3298{
3299 return after(ack, tp->snd_una) ||
3300 after(ack_seq, tp->snd_wl1) ||
3301 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
3302}
3303
3304
3305static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
3306{
3307 u32 delta = ack - tp->snd_una;
3308
3309 u64_stats_update_begin(&tp->syncp);
3310 tp->bytes_acked += delta;
3311 u64_stats_update_end(&tp->syncp);
3312 tp->snd_una = ack;
3313}
3314
3315
3316static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
3317{
3318 u32 delta = seq - tp->rcv_nxt;
3319
3320 u64_stats_update_begin(&tp->syncp);
3321 tp->bytes_received += delta;
3322 u64_stats_update_end(&tp->syncp);
3323 tp->rcv_nxt = seq;
3324}
3325
3326
3327
3328
3329
3330
3331static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
3332 u32 ack_seq)
3333{
3334 struct tcp_sock *tp = tcp_sk(sk);
3335 int flag = 0;
3336 u32 nwin = ntohs(tcp_hdr(skb)->window);
3337
3338 if (likely(!tcp_hdr(skb)->syn))
3339 nwin <<= tp->rx_opt.snd_wscale;
3340
3341 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
3342 flag |= FLAG_WIN_UPDATE;
3343 tcp_update_wl(tp, ack_seq);
3344
3345 if (tp->snd_wnd != nwin) {
3346 tp->snd_wnd = nwin;
3347
3348
3349
3350
3351 tp->pred_flags = 0;
3352 tcp_fast_path_check(sk);
3353
3354 if (nwin > tp->max_window) {
3355 tp->max_window = nwin;
3356 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
3357 }
3358 }
3359 }
3360
3361 tcp_snd_una_update(tp, ack);
3362
3363 return flag;
3364}
3365
3366static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
3367 u32 *last_oow_ack_time)
3368{
3369 if (*last_oow_ack_time) {
3370 s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
3371
3372 if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
3373 NET_INC_STATS(net, mib_idx);
3374 return true;
3375 }
3376 }
3377
3378 *last_oow_ack_time = tcp_time_stamp;
3379
3380 return false;
3381}
3382
3383
3384
3385
3386
3387
3388
3389
3390bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
3391 int mib_idx, u32 *last_oow_ack_time)
3392{
3393
3394 if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
3395 !tcp_hdr(skb)->syn)
3396 return false;
3397
3398 return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time);
3399}
3400
3401
3402static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
3403{
3404
3405 static u32 challenge_timestamp;
3406 static unsigned int challenge_count;
3407 struct tcp_sock *tp = tcp_sk(sk);
3408 u32 count, now;
3409
3410
3411 if (__tcp_oow_rate_limited(sock_net(sk),
3412 LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
3413 &tp->last_oow_ack_time))
3414 return;
3415
3416
3417 now = jiffies / HZ;
3418 if (now != challenge_timestamp) {
3419 u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1;
3420
3421 challenge_timestamp = now;
3422 WRITE_ONCE(challenge_count, half +
3423 prandom_u32_max(sysctl_tcp_challenge_ack_limit));
3424 }
3425 count = READ_ONCE(challenge_count);
3426 if (count > 0) {
3427 WRITE_ONCE(challenge_count, count - 1);
3428 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
3429 tcp_send_ack(sk);
3430 }
3431}
3432
3433static void tcp_store_ts_recent(struct tcp_sock *tp)
3434{
3435 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
3436 tp->rx_opt.ts_recent_stamp = get_seconds();
3437}
3438
3439static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3440{
3441 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
3442
3443
3444
3445
3446
3447
3448
3449 if (tcp_paws_check(&tp->rx_opt, 0))
3450 tcp_store_ts_recent(tp);
3451 }
3452}
3453
3454
3455
3456
3457static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
3458{
3459 struct tcp_sock *tp = tcp_sk(sk);
3460 bool is_tlp_dupack = (ack == tp->tlp_high_seq) &&
3461 !(flag & (FLAG_SND_UNA_ADVANCED |
3462 FLAG_NOT_DUP | FLAG_DATA_SACKED));
3463
3464
3465
3466
3467 if (is_tlp_dupack) {
3468 tp->tlp_high_seq = 0;
3469 return;
3470 }
3471
3472 if (after(ack, tp->tlp_high_seq)) {
3473 tp->tlp_high_seq = 0;
3474
3475 if (!(flag & FLAG_DSACKING_ACK)) {
3476 tcp_init_cwnd_reduction(sk);
3477 tcp_set_ca_state(sk, TCP_CA_CWR);
3478 tcp_end_cwnd_reduction(sk);
3479 tcp_try_keep_open(sk);
3480 NET_INC_STATS_BH(sock_net(sk),
3481 LINUX_MIB_TCPLOSSPROBERECOVERY);
3482 }
3483 }
3484}
3485
3486static inline void tcp_in_ack_event(struct sock *sk, u32 flags)
3487{
3488 const struct inet_connection_sock *icsk = inet_csk(sk);
3489
3490 if (icsk->icsk_ca_ops->in_ack_event)
3491 icsk->icsk_ca_ops->in_ack_event(sk, flags);
3492}
3493
3494
3495static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3496{
3497 struct inet_connection_sock *icsk = inet_csk(sk);
3498 struct tcp_sock *tp = tcp_sk(sk);
3499 u32 prior_snd_una = tp->snd_una;
3500 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3501 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3502 bool is_dupack = false;
3503 u32 prior_fackets;
3504 int prior_packets = tp->packets_out;
3505 const int prior_unsacked = tp->packets_out - tp->sacked_out;
3506 int acked = 0;
3507 long sack_rtt_us = -1L;
3508
3509
3510
3511
3512 if (before(ack, prior_snd_una)) {
3513
3514 if (before(ack, prior_snd_una - tp->max_window)) {
3515 tcp_send_challenge_ack(sk, skb);
3516 return -1;
3517 }
3518 goto old_ack;
3519 }
3520
3521
3522
3523
3524 if (after(ack, tp->snd_nxt))
3525 goto invalid_ack;
3526
3527 if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
3528 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
3529 tcp_rearm_rto(sk);
3530
3531 if (after(ack, prior_snd_una)) {
3532 flag |= FLAG_SND_UNA_ADVANCED;
3533 icsk->icsk_retransmits = 0;
3534 }
3535
3536 prior_fackets = tp->fackets_out;
3537
3538
3539
3540
3541 if (flag & FLAG_UPDATE_TS_RECENT)
3542 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
3543
3544 if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
3545
3546
3547
3548
3549 tcp_update_wl(tp, ack_seq);
3550 tcp_snd_una_update(tp, ack);
3551 flag |= FLAG_WIN_UPDATE;
3552
3553 tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
3554
3555 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS);
3556 } else {
3557 u32 ack_ev_flags = CA_ACK_SLOWPATH;
3558
3559 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3560 flag |= FLAG_DATA;
3561 else
3562 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS);
3563
3564 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3565
3566 if (TCP_SKB_CB(skb)->sacked)
3567 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3568 &sack_rtt_us);
3569
3570 if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb))) {
3571 flag |= FLAG_ECE;
3572 ack_ev_flags |= CA_ACK_ECE;
3573 }
3574
3575 if (flag & FLAG_WIN_UPDATE)
3576 ack_ev_flags |= CA_ACK_WIN_UPDATE;
3577
3578 tcp_in_ack_event(sk, ack_ev_flags);
3579 }
3580
3581
3582
3583
3584 sk->sk_err_soft = 0;
3585 icsk->icsk_probes_out = 0;
3586 tp->rcv_tstamp = tcp_time_stamp;
3587 if (!prior_packets)
3588 goto no_queue;
3589
3590
3591 acked = tp->packets_out;
3592 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
3593 sack_rtt_us);
3594 acked -= tp->packets_out;
3595
3596
3597 if (tcp_may_raise_cwnd(sk, flag))
3598 tcp_cong_avoid(sk, ack, acked);
3599
3600 if (tcp_ack_is_dubious(sk, flag)) {
3601 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3602 tcp_fastretrans_alert(sk, acked, prior_unsacked,
3603 is_dupack, flag);
3604 }
3605 if (tp->tlp_high_seq)
3606 tcp_process_tlp_ack(sk, ack, flag);
3607
3608 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
3609 sk_dst_confirm(sk);
3610
3611 if (icsk->icsk_pending == ICSK_TIME_RETRANS)
3612 tcp_schedule_loss_probe(sk);
3613 tcp_update_pacing_rate(sk);
3614 return 1;
3615
3616no_queue:
3617
3618 if (flag & FLAG_DSACKING_ACK)
3619 tcp_fastretrans_alert(sk, acked, prior_unsacked,
3620 is_dupack, flag);
3621
3622
3623
3624
3625 if (tcp_send_head(sk))
3626 tcp_ack_probe(sk);
3627
3628 if (tp->tlp_high_seq)
3629 tcp_process_tlp_ack(sk, ack, flag);
3630 return 1;
3631
3632invalid_ack:
3633 SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3634 return -1;
3635
3636old_ack:
3637
3638
3639
3640 if (TCP_SKB_CB(skb)->sacked) {
3641 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3642 &sack_rtt_us);
3643 tcp_fastretrans_alert(sk, acked, prior_unsacked,
3644 is_dupack, flag);
3645 }
3646
3647 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3648 return 0;
3649}
3650
3651static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
3652 bool syn, struct tcp_fastopen_cookie *foc,
3653 bool exp_opt)
3654{
3655
3656 if (!foc || !syn || len < 0 || (len & 1))
3657 return;
3658
3659 if (len >= TCP_FASTOPEN_COOKIE_MIN &&
3660 len <= TCP_FASTOPEN_COOKIE_MAX)
3661 memcpy(foc->val, cookie, len);
3662 else if (len != 0)
3663 len = -1;
3664 foc->len = len;
3665 foc->exp = exp_opt;
3666}
3667
3668
3669
3670
3671
3672void tcp_parse_options(const struct sk_buff *skb,
3673 struct tcp_options_received *opt_rx, int estab,
3674 struct tcp_fastopen_cookie *foc)
3675{
3676 const unsigned char *ptr;
3677 const struct tcphdr *th = tcp_hdr(skb);
3678 int length = (th->doff * 4) - sizeof(struct tcphdr);
3679
3680 ptr = (const unsigned char *)(th + 1);
3681 opt_rx->saw_tstamp = 0;
3682
3683 while (length > 0) {
3684 int opcode = *ptr++;
3685 int opsize;
3686
3687 switch (opcode) {
3688 case TCPOPT_EOL:
3689 return;
3690 case TCPOPT_NOP:
3691 length--;
3692 continue;
3693 default:
3694 opsize = *ptr++;
3695 if (opsize < 2)
3696 return;
3697 if (opsize > length)
3698 return;
3699 switch (opcode) {
3700 case TCPOPT_MSS:
3701 if (opsize == TCPOLEN_MSS && th->syn && !estab) {
3702 u16 in_mss = get_unaligned_be16(ptr);
3703 if (in_mss) {
3704 if (opt_rx->user_mss &&
3705 opt_rx->user_mss < in_mss)
3706 in_mss = opt_rx->user_mss;
3707 opt_rx->mss_clamp = in_mss;
3708 }
3709 }
3710 break;
3711 case TCPOPT_WINDOW:
3712 if (opsize == TCPOLEN_WINDOW && th->syn &&
3713 !estab && sysctl_tcp_window_scaling) {
3714 __u8 snd_wscale = *(__u8 *)ptr;
3715 opt_rx->wscale_ok = 1;
3716 if (snd_wscale > 14) {
3717 net_info_ratelimited("%s: Illegal window scaling value %d >14 received\n",
3718 __func__,
3719 snd_wscale);
3720 snd_wscale = 14;
3721 }
3722 opt_rx->snd_wscale = snd_wscale;
3723 }
3724 break;
3725 case TCPOPT_TIMESTAMP:
3726 if ((opsize == TCPOLEN_TIMESTAMP) &&
3727 ((estab && opt_rx->tstamp_ok) ||
3728 (!estab && sysctl_tcp_timestamps))) {
3729 opt_rx->saw_tstamp = 1;
3730 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
3731 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
3732 }
3733 break;
3734 case TCPOPT_SACK_PERM:
3735 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3736 !estab && sysctl_tcp_sack) {
3737 opt_rx->sack_ok = TCP_SACK_SEEN;
3738 tcp_sack_reset(opt_rx);
3739 }
3740 break;
3741
3742 case TCPOPT_SACK:
3743 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
3744 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
3745 opt_rx->sack_ok) {
3746 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
3747 }
3748 break;
3749#ifdef CONFIG_TCP_MD5SIG
3750 case TCPOPT_MD5SIG:
3751
3752
3753
3754
3755 break;
3756#endif
3757 case TCPOPT_FASTOPEN:
3758 tcp_parse_fastopen_option(
3759 opsize - TCPOLEN_FASTOPEN_BASE,
3760 ptr, th->syn, foc, false);
3761 break;
3762
3763 case TCPOPT_EXP:
3764
3765
3766
3767 if (opsize >= TCPOLEN_EXP_FASTOPEN_BASE &&
3768 get_unaligned_be16(ptr) ==
3769 TCPOPT_FASTOPEN_MAGIC)
3770 tcp_parse_fastopen_option(opsize -
3771 TCPOLEN_EXP_FASTOPEN_BASE,
3772 ptr + 2, th->syn, foc, true);
3773 break;
3774
3775 }
3776 ptr += opsize-2;
3777 length -= opsize;
3778 }
3779 }
3780}
3781EXPORT_SYMBOL(tcp_parse_options);
3782
3783static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
3784{
3785 const __be32 *ptr = (const __be32 *)(th + 1);
3786
3787 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
3788 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
3789 tp->rx_opt.saw_tstamp = 1;
3790 ++ptr;
3791 tp->rx_opt.rcv_tsval = ntohl(*ptr);
3792 ++ptr;
3793 if (*ptr)
3794 tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
3795 else
3796 tp->rx_opt.rcv_tsecr = 0;
3797 return true;
3798 }
3799 return false;
3800}
3801
3802
3803
3804
3805static bool tcp_fast_parse_options(const struct sk_buff *skb,
3806 const struct tcphdr *th, struct tcp_sock *tp)
3807{
3808
3809
3810
3811 if (th->doff == (sizeof(*th) / 4)) {
3812 tp->rx_opt.saw_tstamp = 0;
3813 return false;
3814 } else if (tp->rx_opt.tstamp_ok &&
3815 th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
3816 if (tcp_parse_aligned_timestamp(tp, th))
3817 return true;
3818 }
3819
3820 tcp_parse_options(skb, &tp->rx_opt, 1, NULL);
3821 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
3822 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
3823
3824 return true;
3825}
3826
3827#ifdef CONFIG_TCP_MD5SIG
3828
3829
3830
3831const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
3832{
3833 int length = (th->doff << 2) - sizeof(*th);
3834 const u8 *ptr = (const u8 *)(th + 1);
3835
3836
3837 while (length >= TCPOLEN_MD5SIG) {
3838 int opcode = *ptr++;
3839 int opsize;
3840
3841 switch(opcode) {
3842 case TCPOPT_EOL:
3843 return NULL;
3844 case TCPOPT_NOP:
3845 length--;
3846 continue;
3847 default:
3848 opsize = *ptr++;
3849 if (opsize < 2 || opsize > length)
3850 return NULL;
3851 if (opcode == TCPOPT_MD5SIG)
3852 return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
3853 }
3854 ptr += opsize - 2;
3855 length -= opsize;
3856 }
3857 return NULL;
3858}
3859EXPORT_SYMBOL(tcp_parse_md5sig_option);
3860#endif
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
3886{
3887 const struct tcp_sock *tp = tcp_sk(sk);
3888 const struct tcphdr *th = tcp_hdr(skb);
3889 u32 seq = TCP_SKB_CB(skb)->seq;
3890 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3891
3892 return (
3893 (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
3894
3895
3896 ack == tp->snd_una &&
3897
3898
3899 !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
3900
3901
3902 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
3903}
3904
3905static inline bool tcp_paws_discard(const struct sock *sk,
3906 const struct sk_buff *skb)
3907{
3908 const struct tcp_sock *tp = tcp_sk(sk);
3909
3910 return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
3911 !tcp_disordered_ack(sk, skb);
3912}
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
3928{
3929 return !before(end_seq, tp->rcv_wup) &&
3930 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
3931}
3932
3933
3934void tcp_reset(struct sock *sk)
3935{
3936
3937 switch (sk->sk_state) {
3938 case TCP_SYN_SENT:
3939 sk->sk_err = ECONNREFUSED;
3940 break;
3941 case TCP_CLOSE_WAIT:
3942 sk->sk_err = EPIPE;
3943 break;
3944 case TCP_CLOSE:
3945 return;
3946 default:
3947 sk->sk_err = ECONNRESET;
3948 }
3949
3950 smp_wmb();
3951
3952 tcp_done(sk);
3953
3954 if (!sock_flag(sk, SOCK_DEAD))
3955 sk->sk_error_report(sk);
3956}
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972static void tcp_fin(struct sock *sk)
3973{
3974 struct tcp_sock *tp = tcp_sk(sk);
3975
3976 inet_csk_schedule_ack(sk);
3977
3978 sk->sk_shutdown |= RCV_SHUTDOWN;
3979 sock_set_flag(sk, SOCK_DONE);
3980
3981 switch (sk->sk_state) {
3982 case TCP_SYN_RECV:
3983 case TCP_ESTABLISHED:
3984
3985 tcp_set_state(sk, TCP_CLOSE_WAIT);
3986 inet_csk(sk)->icsk_ack.pingpong = 1;
3987 break;
3988
3989 case TCP_CLOSE_WAIT:
3990 case TCP_CLOSING:
3991
3992
3993
3994 break;
3995 case TCP_LAST_ACK:
3996
3997 break;
3998
3999 case TCP_FIN_WAIT1:
4000
4001
4002
4003
4004 tcp_send_ack(sk);
4005 tcp_set_state(sk, TCP_CLOSING);
4006 break;
4007 case TCP_FIN_WAIT2:
4008
4009 tcp_send_ack(sk);
4010 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
4011 break;
4012 default:
4013
4014
4015
4016 pr_err("%s: Impossible, sk->sk_state=%d\n",
4017 __func__, sk->sk_state);
4018 break;
4019 }
4020
4021
4022
4023
4024 skb_rbtree_purge(&tp->out_of_order_queue);
4025 if (tcp_is_sack(tp))
4026 tcp_sack_reset(&tp->rx_opt);
4027 sk_mem_reclaim(sk);
4028
4029 if (!sock_flag(sk, SOCK_DEAD)) {
4030 sk->sk_state_change(sk);
4031
4032
4033 if (sk->sk_shutdown == SHUTDOWN_MASK ||
4034 sk->sk_state == TCP_CLOSE)
4035 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
4036 else
4037 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
4038 }
4039}
4040
4041static inline bool tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
4042 u32 end_seq)
4043{
4044 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
4045 if (before(seq, sp->start_seq))
4046 sp->start_seq = seq;
4047 if (after(end_seq, sp->end_seq))
4048 sp->end_seq = end_seq;
4049 return true;
4050 }
4051 return false;
4052}
4053
4054static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4055{
4056 struct tcp_sock *tp = tcp_sk(sk);
4057
4058 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
4059 int mib_idx;
4060
4061 if (before(seq, tp->rcv_nxt))
4062 mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
4063 else
4064 mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
4065
4066 NET_INC_STATS_BH(sock_net(sk), mib_idx);
4067
4068 tp->rx_opt.dsack = 1;
4069 tp->duplicate_sack[0].start_seq = seq;
4070 tp->duplicate_sack[0].end_seq = end_seq;
4071 }
4072}
4073
4074static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
4075{
4076 struct tcp_sock *tp = tcp_sk(sk);
4077
4078 if (!tp->rx_opt.dsack)
4079 tcp_dsack_set(sk, seq, end_seq);
4080 else
4081 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
4082}
4083
4084static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
4085{
4086 struct tcp_sock *tp = tcp_sk(sk);
4087
4088 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
4089 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4090 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4091 tcp_enter_quickack_mode(sk);
4092
4093 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
4094 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4095
4096 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
4097 end_seq = tp->rcv_nxt;
4098 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
4099 }
4100 }
4101
4102 tcp_send_ack(sk);
4103}
4104
4105
4106
4107
4108static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
4109{
4110 int this_sack;
4111 struct tcp_sack_block *sp = &tp->selective_acks[0];
4112 struct tcp_sack_block *swalk = sp + 1;
4113
4114
4115
4116
4117 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
4118 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
4119 int i;
4120
4121
4122
4123
4124 tp->rx_opt.num_sacks--;
4125 for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
4126 sp[i] = sp[i + 1];
4127 continue;
4128 }
4129 this_sack++, swalk++;
4130 }
4131}
4132
4133static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
4134{
4135 struct tcp_sock *tp = tcp_sk(sk);
4136 struct tcp_sack_block *sp = &tp->selective_acks[0];
4137 int cur_sacks = tp->rx_opt.num_sacks;
4138 int this_sack;
4139
4140 if (!cur_sacks)
4141 goto new_sack;
4142
4143 for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
4144 if (tcp_sack_extend(sp, seq, end_seq)) {
4145
4146 for (; this_sack > 0; this_sack--, sp--)
4147 swap(*sp, *(sp - 1));
4148 if (cur_sacks > 1)
4149 tcp_sack_maybe_coalesce(tp);
4150 return;
4151 }
4152 }
4153
4154
4155
4156
4157
4158
4159
4160 if (this_sack >= TCP_NUM_SACKS) {
4161 this_sack--;
4162 tp->rx_opt.num_sacks--;
4163 sp--;
4164 }
4165 for (; this_sack > 0; this_sack--, sp--)
4166 *sp = *(sp - 1);
4167
4168new_sack:
4169
4170 sp->start_seq = seq;
4171 sp->end_seq = end_seq;
4172 tp->rx_opt.num_sacks++;
4173}
4174
4175
4176
4177static void tcp_sack_remove(struct tcp_sock *tp)
4178{
4179 struct tcp_sack_block *sp = &tp->selective_acks[0];
4180 int num_sacks = tp->rx_opt.num_sacks;
4181 int this_sack;
4182
4183
4184 if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
4185 tp->rx_opt.num_sacks = 0;
4186 return;
4187 }
4188
4189 for (this_sack = 0; this_sack < num_sacks;) {
4190
4191 if (!before(tp->rcv_nxt, sp->start_seq)) {
4192 int i;
4193
4194
4195 WARN_ON(before(tp->rcv_nxt, sp->end_seq));
4196
4197
4198 for (i=this_sack+1; i < num_sacks; i++)
4199 tp->selective_acks[i-1] = tp->selective_acks[i];
4200 num_sacks--;
4201 continue;
4202 }
4203 this_sack++;
4204 sp++;
4205 }
4206 tp->rx_opt.num_sacks = num_sacks;
4207}
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222static bool tcp_try_coalesce(struct sock *sk,
4223 struct sk_buff *to,
4224 struct sk_buff *from,
4225 bool *fragstolen)
4226{
4227 int delta;
4228
4229 *fragstolen = false;
4230
4231
4232 if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
4233 return false;
4234
4235 if (!skb_try_coalesce(to, from, fragstolen, &delta))
4236 return false;
4237
4238 atomic_add(delta, &sk->sk_rmem_alloc);
4239 sk_mem_charge(sk, delta);
4240 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
4241 TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
4242 TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
4243 TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
4244 return true;
4245}
4246
4247static bool tcp_ooo_try_coalesce(struct sock *sk,
4248 struct sk_buff *to,
4249 struct sk_buff *from,
4250 bool *fragstolen)
4251{
4252 bool res = tcp_try_coalesce(sk, to, from, fragstolen);
4253
4254
4255 if (res) {
4256 u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) +
4257 max_t(u16, 1, skb_shinfo(from)->gso_segs);
4258
4259 skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
4260 }
4261 return res;
4262}
4263
4264static void tcp_drop(struct sock *sk, struct sk_buff *skb)
4265{
4266 sk_drops_add(sk, skb);
4267 __kfree_skb(skb);
4268}
4269
4270
4271
4272
4273static void tcp_ofo_queue(struct sock *sk)
4274{
4275 struct tcp_sock *tp = tcp_sk(sk);
4276 __u32 dsack_high = tp->rcv_nxt;
4277 bool fin, fragstolen, eaten;
4278 struct sk_buff *skb, *tail;
4279 struct rb_node *p;
4280
4281 p = rb_first(&tp->out_of_order_queue);
4282 while (p) {
4283 skb = rb_to_skb(p);
4284 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
4285 break;
4286
4287 if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
4288 __u32 dsack = dsack_high;
4289 if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
4290 dsack_high = TCP_SKB_CB(skb)->end_seq;
4291 tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
4292 }
4293 p = rb_next(p);
4294 rb_erase(&skb->rbnode, &tp->out_of_order_queue);
4295
4296 if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
4297 SOCK_DEBUG(sk, "ofo packet was already received\n");
4298 tcp_drop(sk, skb);
4299 continue;
4300 }
4301 SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
4302 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4303 TCP_SKB_CB(skb)->end_seq);
4304
4305 tail = skb_peek_tail(&sk->sk_receive_queue);
4306 eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
4307 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
4308 fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
4309 if (!eaten)
4310 __skb_queue_tail(&sk->sk_receive_queue, skb);
4311 else
4312 kfree_skb_partial(skb, fragstolen);
4313
4314 if (unlikely(fin)) {
4315 tcp_fin(sk);
4316
4317
4318
4319 break;
4320 }
4321 }
4322}
4323
4324static bool tcp_prune_ofo_queue(struct sock *sk);
4325static int tcp_prune_queue(struct sock *sk);
4326
4327static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
4328 unsigned int size)
4329{
4330 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
4331 !sk_rmem_schedule(sk, skb, size)) {
4332
4333 if (tcp_prune_queue(sk) < 0)
4334 return -1;
4335
4336 while (!sk_rmem_schedule(sk, skb, size)) {
4337 if (!tcp_prune_ofo_queue(sk))
4338 return -1;
4339 }
4340 }
4341 return 0;
4342}
4343
4344static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4345{
4346 struct tcp_sock *tp = tcp_sk(sk);
4347 struct rb_node **p, *parent;
4348 struct sk_buff *skb1;
4349 u32 seq, end_seq;
4350 bool fragstolen;
4351
4352 TCP_ECN_check_ce(tp, skb);
4353
4354 if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
4355 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP);
4356 tcp_drop(sk, skb);
4357 return;
4358 }
4359
4360
4361 tp->pred_flags = 0;
4362 inet_csk_schedule_ack(sk);
4363
4364 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
4365 seq = TCP_SKB_CB(skb)->seq;
4366 end_seq = TCP_SKB_CB(skb)->end_seq;
4367 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
4368 tp->rcv_nxt, seq, end_seq);
4369
4370 p = &tp->out_of_order_queue.rb_node;
4371 if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
4372
4373 if (tcp_is_sack(tp)) {
4374 tp->rx_opt.num_sacks = 1;
4375 tp->selective_acks[0].start_seq = seq;
4376 tp->selective_acks[0].end_seq = end_seq;
4377 }
4378 rb_link_node(&skb->rbnode, NULL, p);
4379 rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
4380 tp->ooo_last_skb = skb;
4381 goto end;
4382 }
4383
4384
4385
4386
4387 if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) {
4388coalesce_done:
4389 tcp_grow_window(sk, skb);
4390 kfree_skb_partial(skb, fragstolen);
4391 skb = NULL;
4392 goto add_sack;
4393 }
4394
4395
4396 parent = NULL;
4397 while (*p) {
4398 parent = *p;
4399 skb1 = rb_to_skb(parent);
4400 if (before(seq, TCP_SKB_CB(skb1)->seq)) {
4401 p = &parent->rb_left;
4402 continue;
4403 }
4404 if (before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4405 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4406
4407 NET_INC_STATS_BH(sock_net(sk),
4408 LINUX_MIB_TCPOFOMERGE);
4409 tcp_drop(sk, skb);
4410 skb = NULL;
4411 tcp_dsack_set(sk, seq, end_seq);
4412 goto add_sack;
4413 }
4414 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4415
4416 tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq);
4417 } else {
4418
4419
4420
4421 rb_replace_node(&skb1->rbnode, &skb->rbnode,
4422 &tp->out_of_order_queue);
4423 tcp_dsack_extend(sk,
4424 TCP_SKB_CB(skb1)->seq,
4425 TCP_SKB_CB(skb1)->end_seq);
4426 NET_INC_STATS_BH(sock_net(sk),
4427 LINUX_MIB_TCPOFOMERGE);
4428 tcp_drop(sk, skb1);
4429 goto merge_right;
4430 }
4431 } else if (tcp_ooo_try_coalesce(sk, skb1, skb, &fragstolen)) {
4432 goto coalesce_done;
4433 }
4434 p = &parent->rb_right;
4435 }
4436
4437
4438 rb_link_node(&skb->rbnode, parent, p);
4439 rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
4440
4441merge_right:
4442
4443 while ((skb1 = skb_rb_next(skb)) != NULL) {
4444 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4445 break;
4446 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4447 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4448 end_seq);
4449 break;
4450 }
4451 rb_erase(&skb1->rbnode, &tp->out_of_order_queue);
4452 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4453 TCP_SKB_CB(skb1)->end_seq);
4454 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
4455 tcp_drop(sk, skb1);
4456 }
4457
4458 if (!skb1)
4459 tp->ooo_last_skb = skb;
4460
4461add_sack:
4462 if (tcp_is_sack(tp))
4463 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4464end:
4465 if (skb) {
4466 tcp_grow_window(sk, skb);
4467 skb_set_owner_r(skb, sk);
4468 }
4469}
4470
4471static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
4472 bool *fragstolen)
4473{
4474 int eaten;
4475 struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
4476
4477 __skb_pull(skb, hdrlen);
4478 eaten = (tail &&
4479 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
4480 tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
4481 if (!eaten) {
4482 __skb_queue_tail(&sk->sk_receive_queue, skb);
4483 skb_set_owner_r(skb, sk);
4484 }
4485 return eaten;
4486}
4487
4488int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
4489{
4490 struct sk_buff *skb;
4491 bool fragstolen;
4492
4493 if (size == 0)
4494 return 0;
4495
4496 skb = alloc_skb(size, sk->sk_allocation);
4497 if (!skb)
4498 goto err;
4499
4500 if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
4501 goto err_free;
4502
4503 if (memcpy_from_msg(skb_put(skb, size), msg, size))
4504 goto err_free;
4505
4506 TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
4507 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
4508 TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
4509
4510 if (tcp_queue_rcv(sk, skb, 0, &fragstolen)) {
4511 WARN_ON_ONCE(fragstolen);
4512 __kfree_skb(skb);
4513 }
4514 return size;
4515
4516err_free:
4517 kfree_skb(skb);
4518err:
4519 return -ENOMEM;
4520}
4521
4522static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4523{
4524 struct tcp_sock *tp = tcp_sk(sk);
4525 bool fragstolen = false;
4526 int eaten = -1;
4527
4528 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
4529 __kfree_skb(skb);
4530 return;
4531 }
4532 skb_dst_drop(skb);
4533 __skb_pull(skb, tcp_hdr(skb)->doff * 4);
4534
4535 TCP_ECN_accept_cwr(tp, skb);
4536
4537 tp->rx_opt.dsack = 0;
4538
4539
4540
4541
4542
4543 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
4544 if (tcp_receive_window(tp) == 0)
4545 goto out_of_window;
4546
4547
4548 if (tp->ucopy.task == current &&
4549 tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
4550 sock_owned_by_user(sk) && !tp->urg_data) {
4551 int chunk = min_t(unsigned int, skb->len,
4552 tp->ucopy.len);
4553
4554 __set_current_state(TASK_RUNNING);
4555
4556 local_bh_enable();
4557 if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) {
4558 tp->ucopy.len -= chunk;
4559 tp->copied_seq += chunk;
4560 eaten = (chunk == skb->len);
4561 tcp_rcv_space_adjust(sk);
4562 }
4563 local_bh_disable();
4564 }
4565
4566 if (eaten <= 0) {
4567queue_and_out:
4568 if (eaten < 0) {
4569 if (skb_queue_len(&sk->sk_receive_queue) == 0)
4570 sk_forced_mem_schedule(sk, skb->truesize);
4571 else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
4572 goto drop;
4573 }
4574 eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
4575 }
4576 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
4577 if (skb->len)
4578 tcp_event_data_recv(sk, skb);
4579 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
4580 tcp_fin(sk);
4581
4582 if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
4583 tcp_ofo_queue(sk);
4584
4585
4586
4587
4588 if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
4589 inet_csk(sk)->icsk_ack.pingpong = 0;
4590 }
4591
4592 if (tp->rx_opt.num_sacks)
4593 tcp_sack_remove(tp);
4594
4595 tcp_fast_path_check(sk);
4596
4597 if (eaten > 0)
4598 kfree_skb_partial(skb, fragstolen);
4599 if (!sock_flag(sk, SOCK_DEAD))
4600 sk->sk_data_ready(sk, 0);
4601 return;
4602 }
4603
4604 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4605
4606 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4607 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4608
4609out_of_window:
4610 tcp_enter_quickack_mode(sk);
4611 inet_csk_schedule_ack(sk);
4612drop:
4613 tcp_drop(sk, skb);
4614 return;
4615 }
4616
4617
4618 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
4619 goto out_of_window;
4620
4621 tcp_enter_quickack_mode(sk);
4622
4623 if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4624
4625 SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
4626 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4627 TCP_SKB_CB(skb)->end_seq);
4628
4629 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
4630
4631
4632
4633
4634 if (!tcp_receive_window(tp))
4635 goto out_of_window;
4636 goto queue_and_out;
4637 }
4638
4639 tcp_data_queue_ofo(sk, skb);
4640}
4641
4642static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *list)
4643{
4644 if (list)
4645 return !skb_queue_is_last(list, skb) ? skb->next : NULL;
4646
4647 return skb_rb_next(skb);
4648}
4649
4650static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
4651 struct sk_buff_head *list,
4652 struct rb_root *root)
4653{
4654 struct sk_buff *next = tcp_skb_next(skb, list);
4655
4656 if (list)
4657 __skb_unlink(skb, list);
4658 else
4659 rb_erase(&skb->rbnode, root);
4660
4661 __kfree_skb(skb);
4662 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
4663
4664 return next;
4665}
4666
4667
4668static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
4669{
4670 struct rb_node **p = &root->rb_node;
4671 struct rb_node *parent = NULL;
4672 struct sk_buff *skb1;
4673
4674 while (*p) {
4675 parent = *p;
4676 skb1 = rb_to_skb(parent);
4677 if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
4678 p = &parent->rb_left;
4679 else
4680 p = &parent->rb_right;
4681 }
4682 rb_link_node(&skb->rbnode, parent, p);
4683 rb_insert_color(&skb->rbnode, root);
4684}
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694static void
4695tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
4696 struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end)
4697{
4698 struct sk_buff *skb = head, *n;
4699 struct sk_buff_head tmp;
4700 bool end_of_skbs;
4701
4702
4703
4704
4705restart:
4706 for (end_of_skbs = true; skb != NULL && skb != tail; skb = n) {
4707 n = tcp_skb_next(skb, list);
4708
4709
4710 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4711 skb = tcp_collapse_one(sk, skb, list, root);
4712 if (!skb)
4713 break;
4714 goto restart;
4715 }
4716
4717
4718
4719
4720
4721
4722 if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
4723 (tcp_win_from_space(skb->truesize) > skb->len ||
4724 before(TCP_SKB_CB(skb)->seq, start))) {
4725 end_of_skbs = false;
4726 break;
4727 }
4728
4729 if (n && n != tail &&
4730 TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) {
4731 end_of_skbs = false;
4732 break;
4733 }
4734
4735
4736 start = TCP_SKB_CB(skb)->end_seq;
4737 }
4738 if (end_of_skbs ||
4739 (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
4740 return;
4741
4742 __skb_queue_head_init(&tmp);
4743
4744 while (before(start, end)) {
4745 int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start);
4746 struct sk_buff *nskb;
4747
4748 nskb = alloc_skb(copy, GFP_ATOMIC);
4749 if (!nskb)
4750 break;
4751
4752 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
4753 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
4754 if (list)
4755 __skb_queue_before(list, skb, nskb);
4756 else
4757 __skb_queue_tail(&tmp, nskb);
4758 skb_set_owner_r(nskb, sk);
4759
4760
4761 while (copy > 0) {
4762 int offset = start - TCP_SKB_CB(skb)->seq;
4763 int size = TCP_SKB_CB(skb)->end_seq - start;
4764
4765 BUG_ON(offset < 0);
4766 if (size > 0) {
4767 size = min(copy, size);
4768 if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
4769 BUG();
4770 TCP_SKB_CB(nskb)->end_seq += size;
4771 copy -= size;
4772 start += size;
4773 }
4774 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4775 skb = tcp_collapse_one(sk, skb, list, root);
4776 if (!skb ||
4777 skb == tail ||
4778 (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
4779 goto end;
4780 }
4781 }
4782 }
4783end:
4784 skb_queue_walk_safe(&tmp, skb, n)
4785 tcp_rbtree_insert(root, skb);
4786}
4787
4788
4789
4790
4791static void tcp_collapse_ofo_queue(struct sock *sk)
4792{
4793 struct tcp_sock *tp = tcp_sk(sk);
4794 u32 range_truesize, sum_tiny = 0;
4795 struct sk_buff *skb, *head;
4796 u32 start, end;
4797
4798 skb = skb_rb_first(&tp->out_of_order_queue);
4799new_range:
4800 if (!skb) {
4801 tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue);
4802 return;
4803 }
4804 start = TCP_SKB_CB(skb)->seq;
4805 end = TCP_SKB_CB(skb)->end_seq;
4806 range_truesize = skb->truesize;
4807
4808 for (head = skb;;) {
4809 skb = skb_rb_next(skb);
4810
4811
4812
4813
4814 if (!skb ||
4815 after(TCP_SKB_CB(skb)->seq, end) ||
4816 before(TCP_SKB_CB(skb)->end_seq, start)) {
4817
4818 if (range_truesize != head->truesize ||
4819 end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) {
4820 tcp_collapse(sk, NULL, &tp->out_of_order_queue,
4821 head, skb, start, end);
4822 } else {
4823 sum_tiny += range_truesize;
4824 if (sum_tiny > sk->sk_rcvbuf >> 3)
4825 return;
4826 }
4827 goto new_range;
4828 }
4829
4830 range_truesize += skb->truesize;
4831 if (unlikely(before(TCP_SKB_CB(skb)->seq, start)))
4832 start = TCP_SKB_CB(skb)->seq;
4833 if (after(TCP_SKB_CB(skb)->end_seq, end))
4834 end = TCP_SKB_CB(skb)->end_seq;
4835 }
4836}
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849static bool tcp_prune_ofo_queue(struct sock *sk)
4850{
4851 struct tcp_sock *tp = tcp_sk(sk);
4852 struct rb_node *node, *prev;
4853 int goal;
4854
4855 if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
4856 return false;
4857
4858 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED);
4859 goal = sk->sk_rcvbuf >> 3;
4860 node = &tp->ooo_last_skb->rbnode;
4861 do {
4862 prev = rb_prev(node);
4863 rb_erase(node, &tp->out_of_order_queue);
4864 goal -= rb_to_skb(node)->truesize;
4865 tcp_drop(sk, rb_to_skb(node));
4866 if (!prev || goal <= 0) {
4867 sk_mem_reclaim(sk);
4868 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
4869 !tcp_under_memory_pressure(sk))
4870 break;
4871 goal = sk->sk_rcvbuf >> 3;
4872 }
4873 node = prev;
4874 } while (node);
4875 tp->ooo_last_skb = rb_to_skb(prev);
4876
4877
4878
4879
4880
4881
4882 if (tp->rx_opt.sack_ok)
4883 tcp_sack_reset(&tp->rx_opt);
4884 return true;
4885}
4886
4887
4888
4889
4890
4891
4892
4893
4894static int tcp_prune_queue(struct sock *sk)
4895{
4896 struct tcp_sock *tp = tcp_sk(sk);
4897
4898 SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
4899
4900 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PRUNECALLED);
4901
4902 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
4903 tcp_clamp_window(sk);
4904 else if (tcp_under_memory_pressure(sk))
4905 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
4906
4907 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4908 return 0;
4909
4910 tcp_collapse_ofo_queue(sk);
4911 if (!skb_queue_empty(&sk->sk_receive_queue))
4912 tcp_collapse(sk, &sk->sk_receive_queue, NULL,
4913 skb_peek(&sk->sk_receive_queue),
4914 NULL,
4915 tp->copied_seq, tp->rcv_nxt);
4916 sk_mem_reclaim(sk);
4917
4918 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4919 return 0;
4920
4921
4922
4923
4924 tcp_prune_ofo_queue(sk);
4925
4926 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4927 return 0;
4928
4929
4930
4931
4932
4933 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_RCVPRUNED);
4934
4935
4936 tp->pred_flags = 0;
4937 return -1;
4938}
4939
4940static bool tcp_should_expand_sndbuf(const struct sock *sk)
4941{
4942 const struct tcp_sock *tp = tcp_sk(sk);
4943
4944
4945
4946
4947 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
4948 return false;
4949
4950
4951 if (tcp_under_memory_pressure(sk))
4952 return false;
4953
4954
4955 if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
4956 return false;
4957
4958
4959 if (tp->packets_out >= tp->snd_cwnd)
4960 return false;
4961
4962 return true;
4963}
4964
4965
4966
4967
4968
4969
4970
4971static void tcp_new_space(struct sock *sk)
4972{
4973 struct tcp_sock *tp = tcp_sk(sk);
4974
4975 if (tcp_should_expand_sndbuf(sk)) {
4976 tcp_sndbuf_expand(sk);
4977 tp->snd_cwnd_stamp = tcp_time_stamp;
4978 }
4979
4980 sk->sk_write_space(sk);
4981}
4982
4983static void tcp_check_space(struct sock *sk)
4984{
4985 if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
4986 sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
4987 if (sk->sk_socket &&
4988 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
4989 tcp_new_space(sk);
4990 }
4991}
4992
4993static inline void tcp_data_snd_check(struct sock *sk)
4994{
4995 tcp_push_pending_frames(sk);
4996 tcp_check_space(sk);
4997}
4998
4999
5000
5001
5002static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
5003{
5004 struct tcp_sock *tp = tcp_sk(sk);
5005
5006
5007 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
5008
5009
5010
5011 __tcp_select_window(sk) >= tp->rcv_wnd) ||
5012
5013 tcp_in_quickack_mode(sk) ||
5014
5015 (ofo_possible && !RB_EMPTY_ROOT(&tp->out_of_order_queue))) {
5016
5017 tcp_send_ack(sk);
5018 } else {
5019
5020 tcp_send_delayed_ack(sk);
5021 }
5022}
5023
5024static inline void tcp_ack_snd_check(struct sock *sk)
5025{
5026 if (!inet_csk_ack_scheduled(sk)) {
5027
5028 return;
5029 }
5030 __tcp_ack_snd_check(sk, 1);
5031}
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
5044{
5045 struct tcp_sock *tp = tcp_sk(sk);
5046 u32 ptr = ntohs(th->urg_ptr);
5047
5048 if (ptr && !sysctl_tcp_stdurg)
5049 ptr--;
5050 ptr += ntohl(th->seq);
5051
5052
5053 if (after(tp->copied_seq, ptr))
5054 return;
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066 if (before(ptr, tp->rcv_nxt))
5067 return;
5068
5069
5070 if (tp->urg_data && !after(ptr, tp->urg_seq))
5071 return;
5072
5073
5074 sk_send_sigurg(sk);
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
5092 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
5093 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
5094 tp->copied_seq++;
5095 if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
5096 __skb_unlink(skb, &sk->sk_receive_queue);
5097 __kfree_skb(skb);
5098 }
5099 }
5100
5101 tp->urg_data = TCP_URG_NOTYET;
5102 tp->urg_seq = ptr;
5103
5104
5105 tp->pred_flags = 0;
5106}
5107
5108
5109static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
5110{
5111 struct tcp_sock *tp = tcp_sk(sk);
5112
5113
5114 if (th->urg)
5115 tcp_check_urg(sk, th);
5116
5117
5118 if (tp->urg_data == TCP_URG_NOTYET) {
5119 u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
5120 th->syn;
5121
5122
5123 if (ptr < skb->len) {
5124 u8 tmp;
5125 if (skb_copy_bits(skb, ptr, &tmp, 1))
5126 BUG();
5127 tp->urg_data = TCP_URG_VALID | tmp;
5128 if (!sock_flag(sk, SOCK_DEAD))
5129 sk->sk_data_ready(sk, 0);
5130 }
5131 }
5132}
5133
5134static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
5135{
5136 struct tcp_sock *tp = tcp_sk(sk);
5137 int chunk = skb->len - hlen;
5138 int err;
5139
5140 local_bh_enable();
5141 if (skb_csum_unnecessary(skb))
5142 err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk);
5143 else
5144 err = skb_copy_and_csum_datagram_iovec(skb, hlen,
5145 tp->ucopy.iov, chunk);
5146
5147 if (!err) {
5148 tp->ucopy.len -= chunk;
5149 tp->copied_seq += chunk;
5150 tcp_rcv_space_adjust(sk);
5151 }
5152
5153 local_bh_disable();
5154 return err;
5155}
5156
5157static __sum16 __tcp_checksum_complete_user(struct sock *sk,
5158 struct sk_buff *skb)
5159{
5160 __sum16 result;
5161
5162 if (sock_owned_by_user(sk)) {
5163 local_bh_enable();
5164 result = __tcp_checksum_complete(skb);
5165 local_bh_disable();
5166 } else {
5167 result = __tcp_checksum_complete(skb);
5168 }
5169 return result;
5170}
5171
5172static inline bool tcp_checksum_complete_user(struct sock *sk,
5173 struct sk_buff *skb)
5174{
5175 return !skb_csum_unnecessary(skb) &&
5176 __tcp_checksum_complete_user(sk, skb);
5177}
5178
5179
5180
5181
5182static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5183 const struct tcphdr *th, int syn_inerr)
5184{
5185 struct tcp_sock *tp = tcp_sk(sk);
5186
5187
5188 if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
5189 tcp_paws_discard(sk, skb)) {
5190 if (!th->rst) {
5191 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
5192 if (!tcp_oow_rate_limited(sock_net(sk), skb,
5193 LINUX_MIB_TCPACKSKIPPEDPAWS,
5194 &tp->last_oow_ack_time))
5195 tcp_send_dupack(sk, skb);
5196 goto discard;
5197 }
5198
5199 }
5200
5201
5202 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
5203
5204
5205
5206
5207
5208
5209 if (!th->rst) {
5210 if (th->syn)
5211 goto syn_challenge;
5212 if (!tcp_oow_rate_limited(sock_net(sk), skb,
5213 LINUX_MIB_TCPACKSKIPPEDSEQ,
5214 &tp->last_oow_ack_time))
5215 tcp_send_dupack(sk, skb);
5216 }
5217 goto discard;
5218 }
5219
5220
5221 if (th->rst) {
5222
5223
5224
5225
5226
5227
5228 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt)
5229 tcp_reset(sk);
5230 else
5231 tcp_send_challenge_ack(sk, skb);
5232 goto discard;
5233 }
5234
5235
5236
5237
5238
5239
5240 if (th->syn) {
5241syn_challenge:
5242 if (syn_inerr)
5243 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5244 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
5245 tcp_send_challenge_ack(sk, skb);
5246 goto discard;
5247 }
5248
5249 return true;
5250
5251discard:
5252 tcp_drop(sk, skb);
5253 return false;
5254}
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5280 const struct tcphdr *th, unsigned int len)
5281{
5282 struct tcp_sock *tp = tcp_sk(sk);
5283
5284 if (unlikely(sk->sk_rx_dst == NULL))
5285 inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301 tp->rx_opt.saw_tstamp = 0;
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
5313 TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
5314 !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
5315 int tcp_header_len = tp->tcp_header_len;
5316
5317
5318
5319
5320
5321
5322
5323 if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
5324
5325 if (!tcp_parse_aligned_timestamp(tp, th))
5326 goto slow_path;
5327
5328
5329 if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
5330 goto slow_path;
5331
5332
5333
5334
5335
5336
5337 }
5338
5339 if (len <= tcp_header_len) {
5340
5341 if (len == tcp_header_len) {
5342
5343
5344
5345
5346 if (tcp_header_len ==
5347 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5348 tp->rcv_nxt == tp->rcv_wup)
5349 tcp_store_ts_recent(tp);
5350
5351
5352
5353
5354 tcp_ack(sk, skb, 0);
5355 __kfree_skb(skb);
5356 tcp_data_snd_check(sk);
5357 return;
5358 } else {
5359 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5360 goto discard;
5361 }
5362 } else {
5363 int eaten = 0;
5364 bool fragstolen = false;
5365
5366 if (tp->ucopy.task == current &&
5367 tp->copied_seq == tp->rcv_nxt &&
5368 len - tcp_header_len <= tp->ucopy.len &&
5369 sock_owned_by_user(sk)) {
5370 __set_current_state(TASK_RUNNING);
5371
5372 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) {
5373
5374
5375
5376
5377 if (tcp_header_len ==
5378 (sizeof(struct tcphdr) +
5379 TCPOLEN_TSTAMP_ALIGNED) &&
5380 tp->rcv_nxt == tp->rcv_wup)
5381 tcp_store_ts_recent(tp);
5382
5383 tcp_rcv_rtt_measure_ts(sk, skb);
5384
5385 __skb_pull(skb, tcp_header_len);
5386 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
5387 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
5388 eaten = 1;
5389 }
5390 }
5391 if (!eaten) {
5392 if (tcp_checksum_complete_user(sk, skb))
5393 goto csum_error;
5394
5395 if ((int)skb->truesize > sk->sk_forward_alloc)
5396 goto step5;
5397
5398
5399
5400
5401
5402 if (tcp_header_len ==
5403 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5404 tp->rcv_nxt == tp->rcv_wup)
5405 tcp_store_ts_recent(tp);
5406
5407 tcp_rcv_rtt_measure_ts(sk, skb);
5408
5409 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
5410
5411
5412 eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
5413 &fragstolen);
5414 }
5415
5416 tcp_event_data_recv(sk, skb);
5417
5418 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
5419
5420 tcp_ack(sk, skb, FLAG_DATA);
5421 tcp_data_snd_check(sk);
5422 if (!inet_csk_ack_scheduled(sk))
5423 goto no_ack;
5424 }
5425
5426 __tcp_ack_snd_check(sk, 0);
5427no_ack:
5428 if (eaten)
5429 kfree_skb_partial(skb, fragstolen);
5430 sk->sk_data_ready(sk, 0);
5431 return;
5432 }
5433 }
5434
5435slow_path:
5436 if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
5437 goto csum_error;
5438
5439 if (!th->ack && !th->rst && !th->syn)
5440 goto discard;
5441
5442
5443
5444
5445
5446 if (!tcp_validate_incoming(sk, skb, th, 1))
5447 return;
5448
5449step5:
5450 if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
5451 goto discard;
5452
5453 tcp_rcv_rtt_measure_ts(sk, skb);
5454
5455
5456 tcp_urg(sk, skb, th);
5457
5458
5459 tcp_data_queue(sk, skb);
5460
5461 tcp_data_snd_check(sk);
5462 tcp_ack_snd_check(sk);
5463 return;
5464
5465csum_error:
5466 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
5467 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5468
5469discard:
5470 tcp_drop(sk, skb);
5471}
5472EXPORT_SYMBOL(tcp_rcv_established);
5473
5474void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
5475{
5476 struct tcp_sock *tp = tcp_sk(sk);
5477 struct inet_connection_sock *icsk = inet_csk(sk);
5478
5479 tcp_set_state(sk, TCP_ESTABLISHED);
5480
5481 if (skb != NULL) {
5482 icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
5483 security_inet_conn_established(sk, skb);
5484 }
5485
5486
5487 icsk->icsk_af_ops->rebuild_header(sk);
5488
5489 tcp_init_metrics(sk);
5490
5491 tcp_init_congestion_control(sk);
5492
5493
5494
5495
5496 tp->lsndtime = tcp_time_stamp;
5497
5498 tcp_init_buffer_space(sk);
5499
5500 if (sock_flag(sk, SOCK_KEEPOPEN))
5501 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
5502
5503 if (!tp->rx_opt.snd_wscale)
5504 __tcp_fast_path_on(tp, tp->snd_wnd);
5505 else
5506 tp->pred_flags = 0;
5507
5508 if (!sock_flag(sk, SOCK_DEAD)) {
5509 sk->sk_state_change(sk);
5510 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
5511 }
5512}
5513
5514static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5515 struct tcp_fastopen_cookie *cookie)
5516{
5517 struct tcp_sock *tp = tcp_sk(sk);
5518 struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL;
5519 u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
5520 bool syn_drop = false;
5521
5522 if (mss == tp->rx_opt.user_mss) {
5523 struct tcp_options_received opt;
5524
5525
5526 tcp_clear_options(&opt);
5527 opt.user_mss = opt.mss_clamp = 0;
5528 tcp_parse_options(synack, &opt, 0, NULL);
5529 mss = opt.mss_clamp;
5530 }
5531
5532 if (!tp->syn_fastopen) {
5533
5534 cookie->len = -1;
5535 } else if (tp->total_retrans) {
5536
5537
5538
5539
5540
5541 syn_drop = (cookie->len < 0 && data);
5542 } else if (cookie->len < 0 && !tp->syn_data) {
5543
5544
5545
5546
5547 try_exp = tp->syn_fastopen_exp ? 2 : 1;
5548 }
5549
5550 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
5551
5552 if (data) {
5553 tcp_for_write_queue_from(data, sk) {
5554 if (data == tcp_send_head(sk) ||
5555 __tcp_retransmit_skb(sk, data))
5556 break;
5557 }
5558 tcp_rearm_rto(sk);
5559 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL);
5560 return true;
5561 }
5562 tp->syn_data_acked = tp->syn_data;
5563 if (tp->syn_data_acked)
5564 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
5565 return false;
5566}
5567
5568static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5569 const struct tcphdr *th, unsigned int len)
5570{
5571 struct inet_connection_sock *icsk = inet_csk(sk);
5572 struct tcp_sock *tp = tcp_sk(sk);
5573 struct tcp_fastopen_cookie foc = { .len = -1 };
5574 int saved_clamp = tp->rx_opt.mss_clamp;
5575
5576 tcp_parse_options(skb, &tp->rx_opt, 0, &foc);
5577 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
5578 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
5579
5580 if (th->ack) {
5581
5582
5583
5584
5585
5586
5587
5588
5589 if (!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_una) ||
5590 after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt))
5591 goto reset_and_undo;
5592
5593 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
5594 !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
5595 tcp_time_stamp)) {
5596 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED);
5597 goto reset_and_undo;
5598 }
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608 if (th->rst) {
5609 tcp_reset(sk);
5610 goto discard;
5611 }
5612
5613
5614
5615
5616
5617
5618
5619
5620 if (!th->syn)
5621 goto discard_and_undo;
5622
5623
5624
5625
5626
5627
5628
5629
5630 TCP_ECN_rcv_synack(tp, th);
5631
5632 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5633 tcp_ack(sk, skb, FLAG_SLOWPATH);
5634
5635
5636
5637
5638 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5639 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5640
5641
5642
5643
5644 tp->snd_wnd = ntohs(th->window);
5645
5646 if (!tp->rx_opt.wscale_ok) {
5647 tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
5648 tp->window_clamp = min(tp->window_clamp, 65535U);
5649 }
5650
5651 if (tp->rx_opt.saw_tstamp) {
5652 tp->rx_opt.tstamp_ok = 1;
5653 tp->tcp_header_len =
5654 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5655 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5656 tcp_store_ts_recent(tp);
5657 } else {
5658 tp->tcp_header_len = sizeof(struct tcphdr);
5659 }
5660
5661 if (tcp_is_sack(tp) && sysctl_tcp_fack)
5662 tcp_enable_fack(tp);
5663
5664 tcp_mtup_init(sk);
5665 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5666 tcp_initialize_rcv_mss(sk);
5667
5668
5669
5670
5671 tp->copied_seq = tp->rcv_nxt;
5672
5673 smp_mb();
5674
5675 tcp_finish_connect(sk, skb);
5676
5677 if ((tp->syn_fastopen || tp->syn_data) &&
5678 tcp_rcv_fastopen_synack(sk, skb, &foc))
5679 return -1;
5680
5681 if (sk->sk_write_pending ||
5682 icsk->icsk_accept_queue.rskq_defer_accept ||
5683 icsk->icsk_ack.pingpong) {
5684
5685
5686
5687
5688
5689
5690
5691 inet_csk_schedule_ack(sk);
5692 icsk->icsk_ack.lrcvtime = tcp_time_stamp;
5693 tcp_enter_quickack_mode(sk);
5694 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
5695 TCP_DELACK_MAX, TCP_RTO_MAX);
5696
5697discard:
5698 tcp_drop(sk, skb);
5699 return 0;
5700 } else {
5701 tcp_send_ack(sk);
5702 }
5703 return -1;
5704 }
5705
5706
5707
5708 if (th->rst) {
5709
5710
5711
5712
5713
5714
5715 goto discard_and_undo;
5716 }
5717
5718
5719 if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
5720 tcp_paws_reject(&tp->rx_opt, 0))
5721 goto discard_and_undo;
5722
5723 if (th->syn) {
5724
5725
5726
5727
5728 tcp_set_state(sk, TCP_SYN_RECV);
5729
5730 if (tp->rx_opt.saw_tstamp) {
5731 tp->rx_opt.tstamp_ok = 1;
5732 tcp_store_ts_recent(tp);
5733 tp->tcp_header_len =
5734 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5735 } else {
5736 tp->tcp_header_len = sizeof(struct tcphdr);
5737 }
5738
5739 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5740 tp->copied_seq = tp->rcv_nxt;
5741 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5742
5743
5744
5745
5746 tp->snd_wnd = ntohs(th->window);
5747 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
5748 tp->max_window = tp->snd_wnd;
5749
5750 TCP_ECN_rcv_syn(tp, th);
5751
5752 tcp_mtup_init(sk);
5753 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5754 tcp_initialize_rcv_mss(sk);
5755
5756 tcp_send_synack(sk);
5757#if 0
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769 return -1;
5770#else
5771 goto discard;
5772#endif
5773 }
5774
5775
5776
5777
5778discard_and_undo:
5779 tcp_clear_options(&tp->rx_opt);
5780 tp->rx_opt.mss_clamp = saved_clamp;
5781 goto discard;
5782
5783reset_and_undo:
5784 tcp_clear_options(&tp->rx_opt);
5785 tp->rx_opt.mss_clamp = saved_clamp;
5786 return 1;
5787}
5788
5789
5790
5791
5792
5793
5794
5795
5796int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5797 const struct tcphdr *th, unsigned int len)
5798{
5799 struct tcp_sock *tp = tcp_sk(sk);
5800 struct inet_connection_sock *icsk = inet_csk(sk);
5801 struct request_sock *req;
5802 int queued = 0;
5803 bool acceptable;
5804 u32 synack_stamp;
5805
5806 tp->rx_opt.saw_tstamp = 0;
5807
5808 switch (sk->sk_state) {
5809 case TCP_CLOSE:
5810 goto discard;
5811
5812 case TCP_LISTEN:
5813 if (th->ack)
5814 return 1;
5815
5816 if (th->rst)
5817 goto discard;
5818
5819 if (th->syn) {
5820 if (th->fin)
5821 goto discard;
5822 if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
5823 return 1;
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842 kfree_skb(skb);
5843 return 0;
5844 }
5845 goto discard;
5846
5847 case TCP_SYN_SENT:
5848 queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
5849 if (queued >= 0)
5850 return queued;
5851
5852
5853 tcp_urg(sk, skb, th);
5854 __kfree_skb(skb);
5855 tcp_data_snd_check(sk);
5856 return 0;
5857 }
5858
5859 req = tp->fastopen_rsk;
5860 if (req != NULL) {
5861 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
5862 sk->sk_state != TCP_FIN_WAIT1);
5863
5864 if (tcp_check_req(sk, skb, req, NULL, true) == NULL)
5865 goto discard;
5866 }
5867
5868 if (!th->ack && !th->rst && !th->syn)
5869 goto discard;
5870
5871 if (!tcp_validate_incoming(sk, skb, th, 0))
5872 return 0;
5873
5874
5875 acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
5876 FLAG_UPDATE_TS_RECENT) > 0;
5877
5878 switch (sk->sk_state) {
5879 case TCP_SYN_RECV:
5880 if (!acceptable)
5881 return 1;
5882
5883
5884
5885
5886 if (req) {
5887 synack_stamp = tcp_rsk(req)->snt_synack;
5888 tp->total_retrans = req->num_retrans;
5889 reqsk_fastopen_remove(sk, req, false);
5890 } else {
5891 synack_stamp = tp->lsndtime;
5892
5893 icsk->icsk_af_ops->rebuild_header(sk);
5894 tcp_init_congestion_control(sk);
5895
5896 tcp_mtup_init(sk);
5897 tp->copied_seq = tp->rcv_nxt;
5898 tcp_init_buffer_space(sk);
5899 }
5900 smp_mb();
5901 tcp_set_state(sk, TCP_ESTABLISHED);
5902 sk->sk_state_change(sk);
5903
5904
5905
5906
5907
5908 if (sk->sk_socket)
5909 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
5910
5911 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
5912 tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
5913 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5914 tcp_synack_rtt_meas(sk, synack_stamp);
5915
5916 if (tp->rx_opt.tstamp_ok)
5917 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5918
5919 if (req) {
5920
5921
5922
5923
5924
5925
5926
5927
5928 tcp_rearm_rto(sk);
5929 } else
5930 tcp_init_metrics(sk);
5931
5932 tcp_update_pacing_rate(sk);
5933
5934
5935 tp->lsndtime = tcp_time_stamp;
5936
5937 tcp_initialize_rcv_mss(sk);
5938 tcp_fast_path_on(tp);
5939 break;
5940
5941 case TCP_FIN_WAIT1: {
5942 int tmo;
5943
5944
5945
5946
5947
5948
5949 if (req != NULL) {
5950
5951
5952
5953
5954
5955
5956 if (!acceptable)
5957 return 1;
5958
5959 reqsk_fastopen_remove(sk, req, false);
5960 tcp_rearm_rto(sk);
5961 }
5962 if (tp->snd_una != tp->write_seq)
5963 break;
5964
5965 tcp_set_state(sk, TCP_FIN_WAIT2);
5966 sk->sk_shutdown |= SEND_SHUTDOWN;
5967
5968 sk_dst_confirm(sk);
5969
5970 if (!sock_flag(sk, SOCK_DEAD)) {
5971
5972 sk->sk_state_change(sk);
5973 break;
5974 }
5975
5976 if (tp->linger2 < 0 ||
5977 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5978 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
5979 tcp_done(sk);
5980 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5981 return 1;
5982 }
5983
5984 tmo = tcp_fin_time(sk);
5985 if (tmo > TCP_TIMEWAIT_LEN) {
5986 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
5987 } else if (th->fin || sock_owned_by_user(sk)) {
5988
5989
5990
5991
5992
5993
5994 inet_csk_reset_keepalive_timer(sk, tmo);
5995 } else {
5996 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
5997 goto discard;
5998 }
5999 break;
6000 }
6001
6002 case TCP_CLOSING:
6003 if (tp->snd_una == tp->write_seq) {
6004 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
6005 goto discard;
6006 }
6007 break;
6008
6009 case TCP_LAST_ACK:
6010 if (tp->snd_una == tp->write_seq) {
6011 tcp_update_metrics(sk);
6012 tcp_done(sk);
6013 goto discard;
6014 }
6015 break;
6016 }
6017
6018
6019 tcp_urg(sk, skb, th);
6020
6021
6022 switch (sk->sk_state) {
6023 case TCP_CLOSE_WAIT:
6024 case TCP_CLOSING:
6025 case TCP_LAST_ACK:
6026 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
6027 break;
6028 case TCP_FIN_WAIT1:
6029 case TCP_FIN_WAIT2:
6030
6031
6032
6033
6034 if (sk->sk_shutdown & RCV_SHUTDOWN) {
6035 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
6036 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
6037 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
6038 tcp_reset(sk);
6039 return 1;
6040 }
6041 }
6042
6043 case TCP_ESTABLISHED:
6044 tcp_data_queue(sk, skb);
6045 queued = 1;
6046 break;
6047 }
6048
6049
6050 if (sk->sk_state != TCP_CLOSE) {
6051 tcp_data_snd_check(sk);
6052 tcp_ack_snd_check(sk);
6053 }
6054
6055 if (!queued) {
6056discard:
6057 tcp_drop(sk, skb);
6058 }
6059 return 0;
6060}
6061EXPORT_SYMBOL(tcp_rcv_state_process);
6062
6063static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
6064{
6065 struct inet_request_sock *ireq = inet_rsk(req);
6066
6067 if (family == AF_INET)
6068 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
6069 &ireq->ir_rmt_addr, port);
6070#if IS_ENABLED(CONFIG_IPV6)
6071 else if (family == AF_INET6)
6072 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI6/%u\n"),
6073 &ireq->ir_v6_rmt_addr, port);
6074#endif
6075}
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089static void tcp_ecn_create_request(struct request_sock *req,
6090 const struct sk_buff *skb,
6091 const struct sock *listen_sk,
6092 const struct dst_entry *dst)
6093{
6094 const struct tcphdr *th = tcp_hdr(skb);
6095 const struct net *net = sock_net(listen_sk);
6096 bool th_ecn = th->ece && th->cwr;
6097 bool ect, ecn_ok;
6098 u32 ecn_ok_dst;
6099
6100 if (!th_ecn)
6101 return;
6102
6103 ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
6104 ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK);
6105 ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
6106
6107 if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
6108 (ecn_ok_dst & DST_FEATURE_ECN_CA))
6109 inet_rsk(req)->ecn_ok = 1;
6110}
6111
6112int tcp_conn_request(struct request_sock_ops *rsk_ops,
6113 const struct tcp_request_sock_ops *af_ops,
6114 struct sock *sk, struct sk_buff *skb)
6115{
6116 struct tcp_options_received tmp_opt;
6117 struct request_sock *req;
6118 struct tcp_sock *tp = tcp_sk(sk);
6119 struct dst_entry *dst = NULL;
6120 __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
6121 bool want_cookie = false, fastopen;
6122 struct flowi fl;
6123 struct tcp_fastopen_cookie foc = { .len = -1 };
6124 int err;
6125
6126
6127
6128
6129
6130
6131 if ((sysctl_tcp_syncookies == 2 ||
6132 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
6133 want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name);
6134 if (!want_cookie)
6135 goto drop;
6136 }
6137
6138
6139
6140
6141
6142
6143
6144 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
6145 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
6146 goto drop;
6147 }
6148
6149 req = inet_reqsk_alloc(rsk_ops);
6150 if (!req)
6151 goto drop;
6152
6153 inet_rsk(req)->ireq_family = sk->sk_family;
6154
6155 tcp_rsk(req)->af_specific = af_ops;
6156
6157 tcp_clear_options(&tmp_opt);
6158 tmp_opt.mss_clamp = af_ops->mss_clamp;
6159 tmp_opt.user_mss = tp->rx_opt.user_mss;
6160 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
6161
6162 if (want_cookie && !tmp_opt.saw_tstamp)
6163 tcp_clear_options(&tmp_opt);
6164
6165 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
6166 tcp_openreq_init(req, &tmp_opt, skb);
6167
6168 af_ops->init_req(req, sk, skb);
6169
6170 if (security_inet_conn_request(sk, skb, req))
6171 goto drop_and_free;
6172
6173 if (!want_cookie && !isn) {
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183 if (tcp_death_row.sysctl_tw_recycle) {
6184 bool strict;
6185
6186 dst = af_ops->route_req(sk, &fl, req, &strict);
6187
6188 if (dst && strict &&
6189 !tcp_peer_is_proven(req, dst, true,
6190 tmp_opt.saw_tstamp)) {
6191 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
6192 goto drop_and_release;
6193 }
6194 }
6195
6196 else if (!sysctl_tcp_syncookies &&
6197 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
6198 (sysctl_max_syn_backlog >> 2)) &&
6199 !tcp_peer_is_proven(req, dst, false,
6200 tmp_opt.saw_tstamp)) {
6201
6202
6203
6204
6205
6206
6207
6208 pr_drop_req(req, ntohs(tcp_hdr(skb)->source),
6209 rsk_ops->family);
6210 goto drop_and_release;
6211 }
6212
6213 isn = af_ops->init_seq(skb);
6214 }
6215 if (!dst) {
6216 dst = af_ops->route_req(sk, &fl, req, NULL);
6217 if (!dst)
6218 goto drop_and_free;
6219 }
6220
6221 tcp_ecn_create_request(req, skb, sk, dst);
6222
6223 if (want_cookie) {
6224 isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
6225 req->cookie_ts = tmp_opt.tstamp_ok;
6226 if (!tmp_opt.tstamp_ok)
6227 inet_rsk(req)->ecn_ok = 0;
6228 }
6229
6230 tcp_rsk(req)->snt_isn = isn;
6231 tcp_openreq_init_rwin(req, sk, dst);
6232 fastopen = !want_cookie &&
6233 tcp_try_fastopen(sk, skb, req, &foc, dst);
6234 err = af_ops->send_synack(sk, dst, &fl, req,
6235 skb_get_queue_mapping(skb), &foc);
6236 if (!fastopen) {
6237 if (err || want_cookie)
6238 goto drop_and_free;
6239
6240 tcp_rsk(req)->listener = NULL;
6241 af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
6242 }
6243
6244 return 0;
6245
6246drop_and_release:
6247 dst_release(dst);
6248drop_and_free:
6249 reqsk_free(req);
6250drop:
6251 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
6252 return 0;
6253}
6254EXPORT_SYMBOL(tcp_conn_request);
6255