1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64#define pr_fmt(fmt) "TCP: " fmt
65
66#include <linux/mm.h>
67#include <linux/slab.h>
68#include <linux/module.h>
69#include <linux/sysctl.h>
70#include <linux/kernel.h>
71#include <linux/prefetch.h>
72#include <net/dst.h>
73#include <net/tcp.h>
74#include <net/inet_common.h>
75#include <linux/ipsec.h>
76#include <asm/unaligned.h>
77#include <linux/errqueue.h>
78
79int sysctl_tcp_timestamps __read_mostly = 1;
80int sysctl_tcp_window_scaling __read_mostly = 1;
81int sysctl_tcp_sack __read_mostly = 1;
82int sysctl_tcp_fack __read_mostly = 1;
83int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
84int sysctl_tcp_max_reordering __read_mostly = 300;
85EXPORT_SYMBOL(sysctl_tcp_reordering);
86int sysctl_tcp_dsack __read_mostly = 1;
87int sysctl_tcp_app_win __read_mostly = 31;
88int sysctl_tcp_adv_win_scale __read_mostly = 1;
89EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
90
91
92int sysctl_tcp_challenge_ack_limit = 100;
93
94int sysctl_tcp_stdurg __read_mostly;
95int sysctl_tcp_rfc1337 __read_mostly;
96int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
97int sysctl_tcp_frto __read_mostly = 2;
98
99int sysctl_tcp_thin_dupack __read_mostly;
100
101int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
102int sysctl_tcp_early_retrans __read_mostly = 3;
103int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
104
105#define FLAG_DATA 0x01
106#define FLAG_WIN_UPDATE 0x02
107#define FLAG_DATA_ACKED 0x04
108#define FLAG_RETRANS_DATA_ACKED 0x08
109#define FLAG_SYN_ACKED 0x10
110#define FLAG_DATA_SACKED 0x20
111#define FLAG_ECE 0x40
112#define FLAG_SLOWPATH 0x100
113#define FLAG_ORIG_SACK_ACKED 0x200
114#define FLAG_SND_UNA_ADVANCED 0x400
115#define FLAG_DSACKING_ACK 0x800
116#define FLAG_SACK_RENEGING 0x2000
117#define FLAG_UPDATE_TS_RECENT 0x4000
118
119#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
120#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
121#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
122#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
123
124#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
125#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
126
127
128
129
130static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
131{
132 struct inet_connection_sock *icsk = inet_csk(sk);
133 const unsigned int lss = icsk->icsk_ack.last_seg_size;
134 unsigned int len;
135
136 icsk->icsk_ack.last_seg_size = 0;
137
138
139
140
141 len = skb_shinfo(skb)->gso_size ? : skb->len;
142 if (len >= icsk->icsk_ack.rcv_mss) {
143 icsk->icsk_ack.rcv_mss = len;
144 } else {
145
146
147
148
149
150 len += skb->data - skb_transport_header(skb);
151 if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
152
153
154
155
156
157 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
158 !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
159
160
161
162
163 len -= tcp_sk(sk)->tcp_header_len;
164 icsk->icsk_ack.last_seg_size = len;
165 if (len == lss) {
166 icsk->icsk_ack.rcv_mss = len;
167 return;
168 }
169 }
170 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
171 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
172 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
173 }
174}
175
176static void tcp_incr_quickack(struct sock *sk)
177{
178 struct inet_connection_sock *icsk = inet_csk(sk);
179 unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
180
181 if (quickacks == 0)
182 quickacks = 2;
183 if (quickacks > icsk->icsk_ack.quick)
184 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
185}
186
187static void tcp_enter_quickack_mode(struct sock *sk)
188{
189 struct inet_connection_sock *icsk = inet_csk(sk);
190 tcp_incr_quickack(sk);
191 icsk->icsk_ack.pingpong = 0;
192 icsk->icsk_ack.ato = TCP_ATO_MIN;
193}
194
195
196
197
198
199static inline bool tcp_in_quickack_mode(const struct sock *sk)
200{
201 const struct inet_connection_sock *icsk = inet_csk(sk);
202
203 return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong;
204}
205
206static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
207{
208 if (tp->ecn_flags & TCP_ECN_OK)
209 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
210}
211
212static void tcp_ecn_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb)
213{
214 if (tcp_hdr(skb)->cwr)
215 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
216}
217
218static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
219{
220 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
221}
222
223static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
224{
225 switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
226 case INET_ECN_NOT_ECT:
227
228
229
230
231 if (tp->ecn_flags & TCP_ECN_SEEN)
232 tcp_enter_quickack_mode((struct sock *)tp);
233 break;
234 case INET_ECN_CE:
235 if (tcp_ca_needs_ecn((struct sock *)tp))
236 tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_IS_CE);
237
238 if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
239
240 tcp_enter_quickack_mode((struct sock *)tp);
241 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
242 }
243 tp->ecn_flags |= TCP_ECN_SEEN;
244 break;
245 default:
246 if (tcp_ca_needs_ecn((struct sock *)tp))
247 tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_NO_CE);
248 tp->ecn_flags |= TCP_ECN_SEEN;
249 break;
250 }
251}
252
253static void tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
254{
255 if (tp->ecn_flags & TCP_ECN_OK)
256 __tcp_ecn_check_ce(tp, skb);
257}
258
259static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
260{
261 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
262 tp->ecn_flags &= ~TCP_ECN_OK;
263}
264
265static void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
266{
267 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
268 tp->ecn_flags &= ~TCP_ECN_OK;
269}
270
271static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
272{
273 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
274 return true;
275 return false;
276}
277
278
279
280
281
282
283static void tcp_sndbuf_expand(struct sock *sk)
284{
285 const struct tcp_sock *tp = tcp_sk(sk);
286 int sndmem, per_mss;
287 u32 nr_segs;
288
289
290
291
292 per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
293 MAX_TCP_HEADER +
294 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
295
296 per_mss = roundup_pow_of_two(per_mss) +
297 SKB_DATA_ALIGN(sizeof(struct sk_buff));
298
299 nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
300 nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
301
302
303
304
305
306 sndmem = 2 * nr_segs * per_mss;
307
308 if (sk->sk_sndbuf < sndmem)
309 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
310}
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
339{
340 struct tcp_sock *tp = tcp_sk(sk);
341
342 int truesize = tcp_win_from_space(skb->truesize) >> 1;
343 int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
344
345 while (tp->rcv_ssthresh <= window) {
346 if (truesize <= skb->len)
347 return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
348
349 truesize >>= 1;
350 window >>= 1;
351 }
352 return 0;
353}
354
355static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
356{
357 struct tcp_sock *tp = tcp_sk(sk);
358
359
360 if (tp->rcv_ssthresh < tp->window_clamp &&
361 (int)tp->rcv_ssthresh < tcp_space(sk) &&
362 !sk_under_memory_pressure(sk)) {
363 int incr;
364
365
366
367
368 if (tcp_win_from_space(skb->truesize) <= skb->len)
369 incr = 2 * tp->advmss;
370 else
371 incr = __tcp_grow_window(sk, skb);
372
373 if (incr) {
374 incr = max_t(int, incr, 2 * skb->len);
375 tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
376 tp->window_clamp);
377 inet_csk(sk)->icsk_ack.quick |= 1;
378 }
379 }
380}
381
382
383static void tcp_fixup_rcvbuf(struct sock *sk)
384{
385 u32 mss = tcp_sk(sk)->advmss;
386 int rcvmem;
387
388 rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) *
389 tcp_default_init_rwnd(mss);
390
391
392
393
394 if (sysctl_tcp_moderate_rcvbuf)
395 rcvmem <<= 2;
396
397 if (sk->sk_rcvbuf < rcvmem)
398 sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
399}
400
401
402
403
404void tcp_init_buffer_space(struct sock *sk)
405{
406 struct tcp_sock *tp = tcp_sk(sk);
407 int maxwin;
408
409 if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
410 tcp_fixup_rcvbuf(sk);
411 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
412 tcp_sndbuf_expand(sk);
413
414 tp->rcvq_space.space = tp->rcv_wnd;
415 tp->rcvq_space.time = tcp_time_stamp;
416 tp->rcvq_space.seq = tp->copied_seq;
417
418 maxwin = tcp_full_space(sk);
419
420 if (tp->window_clamp >= maxwin) {
421 tp->window_clamp = maxwin;
422
423 if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
424 tp->window_clamp = max(maxwin -
425 (maxwin >> sysctl_tcp_app_win),
426 4 * tp->advmss);
427 }
428
429
430 if (sysctl_tcp_app_win &&
431 tp->window_clamp > 2 * tp->advmss &&
432 tp->window_clamp + tp->advmss > maxwin)
433 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
434
435 tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
436 tp->snd_cwnd_stamp = tcp_time_stamp;
437}
438
439
440static void tcp_clamp_window(struct sock *sk)
441{
442 struct tcp_sock *tp = tcp_sk(sk);
443 struct inet_connection_sock *icsk = inet_csk(sk);
444
445 icsk->icsk_ack.quick = 0;
446
447 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
448 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
449 !sk_under_memory_pressure(sk) &&
450 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
451 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
452 sysctl_tcp_rmem[2]);
453 }
454 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
455 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
456}
457
458
459
460
461
462
463
464
465void tcp_initialize_rcv_mss(struct sock *sk)
466{
467 const struct tcp_sock *tp = tcp_sk(sk);
468 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
469
470 hint = min(hint, tp->rcv_wnd / 2);
471 hint = min(hint, TCP_MSS_DEFAULT);
472 hint = max(hint, TCP_MIN_MSS);
473
474 inet_csk(sk)->icsk_ack.rcv_mss = hint;
475}
476EXPORT_SYMBOL(tcp_initialize_rcv_mss);
477
478
479
480
481
482
483
484
485
486
487
488
489static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
490{
491 u32 new_sample = tp->rcv_rtt_est.rtt;
492 long m = sample;
493
494 if (m == 0)
495 m = 1;
496
497 if (new_sample != 0) {
498
499
500
501
502
503
504
505
506
507
508 if (!win_dep) {
509 m -= (new_sample >> 3);
510 new_sample += m;
511 } else {
512 m <<= 3;
513 if (m < new_sample)
514 new_sample = m;
515 }
516 } else {
517
518 new_sample = m << 3;
519 }
520
521 if (tp->rcv_rtt_est.rtt != new_sample)
522 tp->rcv_rtt_est.rtt = new_sample;
523}
524
525static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
526{
527 if (tp->rcv_rtt_est.time == 0)
528 goto new_measure;
529 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
530 return;
531 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_rtt_est.time, 1);
532
533new_measure:
534 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
535 tp->rcv_rtt_est.time = tcp_time_stamp;
536}
537
538static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
539 const struct sk_buff *skb)
540{
541 struct tcp_sock *tp = tcp_sk(sk);
542 if (tp->rx_opt.rcv_tsecr &&
543 (TCP_SKB_CB(skb)->end_seq -
544 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
545 tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
546}
547
548
549
550
551
552void tcp_rcv_space_adjust(struct sock *sk)
553{
554 struct tcp_sock *tp = tcp_sk(sk);
555 int time;
556 int copied;
557
558 time = tcp_time_stamp - tp->rcvq_space.time;
559 if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
560 return;
561
562
563 copied = tp->copied_seq - tp->rcvq_space.seq;
564 if (copied <= tp->rcvq_space.space)
565 goto new_measure;
566
567
568
569
570
571
572
573
574
575
576 if (sysctl_tcp_moderate_rcvbuf &&
577 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
578 int rcvwin, rcvmem, rcvbuf;
579
580
581
582
583 rcvwin = (copied << 1) + 16 * tp->advmss;
584
585
586
587
588
589
590 if (copied >=
591 tp->rcvq_space.space + (tp->rcvq_space.space >> 2)) {
592 if (copied >=
593 tp->rcvq_space.space + (tp->rcvq_space.space >> 1))
594 rcvwin <<= 1;
595 else
596 rcvwin += (rcvwin >> 1);
597 }
598
599 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
600 while (tcp_win_from_space(rcvmem) < tp->advmss)
601 rcvmem += 128;
602
603 rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]);
604 if (rcvbuf > sk->sk_rcvbuf) {
605 sk->sk_rcvbuf = rcvbuf;
606
607
608 tp->window_clamp = rcvwin;
609 }
610 }
611 tp->rcvq_space.space = copied;
612
613new_measure:
614 tp->rcvq_space.seq = tp->copied_seq;
615 tp->rcvq_space.time = tcp_time_stamp;
616}
617
618
619
620
621
622
623
624
625
626
627
628static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
629{
630 struct tcp_sock *tp = tcp_sk(sk);
631 struct inet_connection_sock *icsk = inet_csk(sk);
632 u32 now;
633
634 inet_csk_schedule_ack(sk);
635
636 tcp_measure_rcv_mss(sk, skb);
637
638 tcp_rcv_rtt_measure(tp);
639
640 now = tcp_time_stamp;
641
642 if (!icsk->icsk_ack.ato) {
643
644
645
646 tcp_incr_quickack(sk);
647 icsk->icsk_ack.ato = TCP_ATO_MIN;
648 } else {
649 int m = now - icsk->icsk_ack.lrcvtime;
650
651 if (m <= TCP_ATO_MIN / 2) {
652
653 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
654 } else if (m < icsk->icsk_ack.ato) {
655 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
656 if (icsk->icsk_ack.ato > icsk->icsk_rto)
657 icsk->icsk_ack.ato = icsk->icsk_rto;
658 } else if (m > icsk->icsk_rto) {
659
660
661
662 tcp_incr_quickack(sk);
663 sk_mem_reclaim(sk);
664 }
665 }
666 icsk->icsk_ack.lrcvtime = now;
667
668 tcp_ecn_check_ce(tp, skb);
669
670 if (skb->len >= 128)
671 tcp_grow_window(sk, skb);
672}
673
674
675
676
677
678
679
680
681
682
683static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
684{
685 struct tcp_sock *tp = tcp_sk(sk);
686 long m = mrtt_us;
687 u32 srtt = tp->srtt_us;
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705 if (srtt != 0) {
706 m -= (srtt >> 3);
707 srtt += m;
708 if (m < 0) {
709 m = -m;
710 m -= (tp->mdev_us >> 2);
711
712
713
714
715
716
717
718
719 if (m > 0)
720 m >>= 3;
721 } else {
722 m -= (tp->mdev_us >> 2);
723 }
724 tp->mdev_us += m;
725 if (tp->mdev_us > tp->mdev_max_us) {
726 tp->mdev_max_us = tp->mdev_us;
727 if (tp->mdev_max_us > tp->rttvar_us)
728 tp->rttvar_us = tp->mdev_max_us;
729 }
730 if (after(tp->snd_una, tp->rtt_seq)) {
731 if (tp->mdev_max_us < tp->rttvar_us)
732 tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
733 tp->rtt_seq = tp->snd_nxt;
734 tp->mdev_max_us = tcp_rto_min_us(sk);
735 }
736 } else {
737
738 srtt = m << 3;
739 tp->mdev_us = m << 1;
740 tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
741 tp->mdev_max_us = tp->rttvar_us;
742 tp->rtt_seq = tp->snd_nxt;
743 }
744 tp->srtt_us = max(1U, srtt);
745}
746
747
748
749
750
751
752
753static void tcp_update_pacing_rate(struct sock *sk)
754{
755 const struct tcp_sock *tp = tcp_sk(sk);
756 u64 rate;
757
758
759 rate = (u64)tp->mss_cache * 2 * (USEC_PER_SEC << 3);
760
761 rate *= max(tp->snd_cwnd, tp->packets_out);
762
763 if (likely(tp->srtt_us))
764 do_div(rate, tp->srtt_us);
765
766
767
768
769
770 ACCESS_ONCE(sk->sk_pacing_rate) = min_t(u64, rate,
771 sk->sk_max_pacing_rate);
772}
773
774
775
776
777static void tcp_set_rto(struct sock *sk)
778{
779 const struct tcp_sock *tp = tcp_sk(sk);
780
781
782
783
784
785
786
787
788
789
790 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
791
792
793
794
795
796
797
798
799
800
801 tcp_bound_rto(sk);
802}
803
804__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
805{
806 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
807
808 if (!cwnd)
809 cwnd = TCP_INIT_CWND;
810 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
811}
812
813
814
815
816
817void tcp_disable_fack(struct tcp_sock *tp)
818{
819
820 if (tcp_is_fack(tp))
821 tp->lost_skb_hint = NULL;
822 tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;
823}
824
825
826static void tcp_dsack_seen(struct tcp_sock *tp)
827{
828 tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
829}
830
831static void tcp_update_reordering(struct sock *sk, const int metric,
832 const int ts)
833{
834 struct tcp_sock *tp = tcp_sk(sk);
835 if (metric > tp->reordering) {
836 int mib_idx;
837
838 tp->reordering = min(sysctl_tcp_max_reordering, metric);
839
840
841 if (ts)
842 mib_idx = LINUX_MIB_TCPTSREORDER;
843 else if (tcp_is_reno(tp))
844 mib_idx = LINUX_MIB_TCPRENOREORDER;
845 else if (tcp_is_fack(tp))
846 mib_idx = LINUX_MIB_TCPFACKREORDER;
847 else
848 mib_idx = LINUX_MIB_TCPSACKREORDER;
849
850 NET_INC_STATS_BH(sock_net(sk), mib_idx);
851#if FASTRETRANS_DEBUG > 1
852 pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
853 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
854 tp->reordering,
855 tp->fackets_out,
856 tp->sacked_out,
857 tp->undo_marker ? tp->undo_retrans : 0);
858#endif
859 tcp_disable_fack(tp);
860 }
861
862 if (metric > 0)
863 tcp_disable_early_retrans(tp);
864}
865
866
867static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
868{
869 if (!tp->retransmit_skb_hint ||
870 before(TCP_SKB_CB(skb)->seq,
871 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
872 tp->retransmit_skb_hint = skb;
873
874 if (!tp->lost_out ||
875 after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high))
876 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
877}
878
879static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
880{
881 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
882 tcp_verify_retransmit_hint(tp, skb);
883
884 tp->lost_out += tcp_skb_pcount(skb);
885 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
886 }
887}
888
889static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
890 struct sk_buff *skb)
891{
892 tcp_verify_retransmit_hint(tp, skb);
893
894 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
895 tp->lost_out += tcp_skb_pcount(skb);
896 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
897 }
898}
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
995 u32 start_seq, u32 end_seq)
996{
997
998 if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
999 return false;
1000
1001
1002 if (!before(start_seq, tp->snd_nxt))
1003 return false;
1004
1005
1006
1007
1008 if (after(start_seq, tp->snd_una))
1009 return true;
1010
1011 if (!is_dsack || !tp->undo_marker)
1012 return false;
1013
1014
1015 if (after(end_seq, tp->snd_una))
1016 return false;
1017
1018 if (!before(start_seq, tp->undo_marker))
1019 return true;
1020
1021
1022 if (!after(end_seq, tp->undo_marker))
1023 return false;
1024
1025
1026
1027
1028 return !before(start_seq, end_seq - tp->max_window);
1029}
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040static void tcp_mark_lost_retrans(struct sock *sk)
1041{
1042 const struct inet_connection_sock *icsk = inet_csk(sk);
1043 struct tcp_sock *tp = tcp_sk(sk);
1044 struct sk_buff *skb;
1045 int cnt = 0;
1046 u32 new_low_seq = tp->snd_nxt;
1047 u32 received_upto = tcp_highest_sack_seq(tp);
1048
1049 if (!tcp_is_fack(tp) || !tp->retrans_out ||
1050 !after(received_upto, tp->lost_retrans_low) ||
1051 icsk->icsk_ca_state != TCP_CA_Recovery)
1052 return;
1053
1054 tcp_for_write_queue(skb, sk) {
1055 u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
1056
1057 if (skb == tcp_send_head(sk))
1058 break;
1059 if (cnt == tp->retrans_out)
1060 break;
1061 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1062 continue;
1063
1064 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS))
1065 continue;
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078 if (after(received_upto, ack_seq)) {
1079 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1080 tp->retrans_out -= tcp_skb_pcount(skb);
1081
1082 tcp_skb_mark_lost_uncond_verify(tp, skb);
1083 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
1084 } else {
1085 if (before(ack_seq, new_low_seq))
1086 new_low_seq = ack_seq;
1087 cnt += tcp_skb_pcount(skb);
1088 }
1089 }
1090
1091 if (tp->retrans_out)
1092 tp->lost_retrans_low = new_low_seq;
1093}
1094
1095static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1096 struct tcp_sack_block_wire *sp, int num_sacks,
1097 u32 prior_snd_una)
1098{
1099 struct tcp_sock *tp = tcp_sk(sk);
1100 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
1101 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
1102 bool dup_sack = false;
1103
1104 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1105 dup_sack = true;
1106 tcp_dsack_seen(tp);
1107 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
1108 } else if (num_sacks > 1) {
1109 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
1110 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
1111
1112 if (!after(end_seq_0, end_seq_1) &&
1113 !before(start_seq_0, start_seq_1)) {
1114 dup_sack = true;
1115 tcp_dsack_seen(tp);
1116 NET_INC_STATS_BH(sock_net(sk),
1117 LINUX_MIB_TCPDSACKOFORECV);
1118 }
1119 }
1120
1121
1122 if (dup_sack && tp->undo_marker && tp->undo_retrans > 0 &&
1123 !after(end_seq_0, prior_snd_una) &&
1124 after(end_seq_0, tp->undo_marker))
1125 tp->undo_retrans--;
1126
1127 return dup_sack;
1128}
1129
1130struct tcp_sacktag_state {
1131 int reord;
1132 int fack_count;
1133 long rtt_us;
1134 int flag;
1135};
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1146 u32 start_seq, u32 end_seq)
1147{
1148 int err;
1149 bool in_sack;
1150 unsigned int pkt_len;
1151 unsigned int mss;
1152
1153 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1154 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1155
1156 if (tcp_skb_pcount(skb) > 1 && !in_sack &&
1157 after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
1158 mss = tcp_skb_mss(skb);
1159 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1160
1161 if (!in_sack) {
1162 pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
1163 if (pkt_len < mss)
1164 pkt_len = mss;
1165 } else {
1166 pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
1167 if (pkt_len < mss)
1168 return -EINVAL;
1169 }
1170
1171
1172
1173
1174 if (pkt_len > mss) {
1175 unsigned int new_len = (pkt_len / mss) * mss;
1176 if (!in_sack && new_len < pkt_len) {
1177 new_len += mss;
1178 if (new_len >= skb->len)
1179 return 0;
1180 }
1181 pkt_len = new_len;
1182 }
1183 err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC);
1184 if (err < 0)
1185 return err;
1186 }
1187
1188 return in_sack;
1189}
1190
1191
1192static u8 tcp_sacktag_one(struct sock *sk,
1193 struct tcp_sacktag_state *state, u8 sacked,
1194 u32 start_seq, u32 end_seq,
1195 int dup_sack, int pcount,
1196 const struct skb_mstamp *xmit_time)
1197{
1198 struct tcp_sock *tp = tcp_sk(sk);
1199 int fack_count = state->fack_count;
1200
1201
1202 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1203 if (tp->undo_marker && tp->undo_retrans > 0 &&
1204 after(end_seq, tp->undo_marker))
1205 tp->undo_retrans--;
1206 if (sacked & TCPCB_SACKED_ACKED)
1207 state->reord = min(fack_count, state->reord);
1208 }
1209
1210
1211 if (!after(end_seq, tp->snd_una))
1212 return sacked;
1213
1214 if (!(sacked & TCPCB_SACKED_ACKED)) {
1215 if (sacked & TCPCB_SACKED_RETRANS) {
1216
1217
1218
1219
1220 if (sacked & TCPCB_LOST) {
1221 sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1222 tp->lost_out -= pcount;
1223 tp->retrans_out -= pcount;
1224 }
1225 } else {
1226 if (!(sacked & TCPCB_RETRANS)) {
1227
1228
1229
1230 if (before(start_seq,
1231 tcp_highest_sack_seq(tp)))
1232 state->reord = min(fack_count,
1233 state->reord);
1234 if (!after(end_seq, tp->high_seq))
1235 state->flag |= FLAG_ORIG_SACK_ACKED;
1236
1237 if (state->rtt_us < 0) {
1238 struct skb_mstamp now;
1239
1240 skb_mstamp_get(&now);
1241 state->rtt_us = skb_mstamp_us_delta(&now,
1242 xmit_time);
1243 }
1244 }
1245
1246 if (sacked & TCPCB_LOST) {
1247 sacked &= ~TCPCB_LOST;
1248 tp->lost_out -= pcount;
1249 }
1250 }
1251
1252 sacked |= TCPCB_SACKED_ACKED;
1253 state->flag |= FLAG_DATA_SACKED;
1254 tp->sacked_out += pcount;
1255
1256 fack_count += pcount;
1257
1258
1259 if (!tcp_is_fack(tp) && tp->lost_skb_hint &&
1260 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
1261 tp->lost_cnt_hint += pcount;
1262
1263 if (fack_count > tp->fackets_out)
1264 tp->fackets_out = fack_count;
1265 }
1266
1267
1268
1269
1270
1271 if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
1272 sacked &= ~TCPCB_SACKED_RETRANS;
1273 tp->retrans_out -= pcount;
1274 }
1275
1276 return sacked;
1277}
1278
1279
1280
1281
1282static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1283 struct tcp_sacktag_state *state,
1284 unsigned int pcount, int shifted, int mss,
1285 bool dup_sack)
1286{
1287 struct tcp_sock *tp = tcp_sk(sk);
1288 struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
1289 u32 start_seq = TCP_SKB_CB(skb)->seq;
1290 u32 end_seq = start_seq + shifted;
1291
1292 BUG_ON(!pcount);
1293
1294
1295
1296
1297
1298
1299
1300 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1301 start_seq, end_seq, dup_sack, pcount,
1302 &skb->skb_mstamp);
1303
1304 if (skb == tp->lost_skb_hint)
1305 tp->lost_cnt_hint += pcount;
1306
1307 TCP_SKB_CB(prev)->end_seq += shifted;
1308 TCP_SKB_CB(skb)->seq += shifted;
1309
1310 tcp_skb_pcount_add(prev, pcount);
1311 BUG_ON(tcp_skb_pcount(skb) < pcount);
1312 tcp_skb_pcount_add(skb, -pcount);
1313
1314
1315
1316
1317
1318
1319 if (!skb_shinfo(prev)->gso_size) {
1320 skb_shinfo(prev)->gso_size = mss;
1321 skb_shinfo(prev)->gso_type = sk->sk_gso_type;
1322 }
1323
1324
1325 if (tcp_skb_pcount(skb) <= 1) {
1326 skb_shinfo(skb)->gso_size = 0;
1327 skb_shinfo(skb)->gso_type = 0;
1328 }
1329
1330
1331 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
1332
1333 if (skb->len > 0) {
1334 BUG_ON(!tcp_skb_pcount(skb));
1335 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED);
1336 return false;
1337 }
1338
1339
1340
1341 if (skb == tp->retransmit_skb_hint)
1342 tp->retransmit_skb_hint = prev;
1343 if (skb == tp->lost_skb_hint) {
1344 tp->lost_skb_hint = prev;
1345 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1346 }
1347
1348 TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1349 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1350 TCP_SKB_CB(prev)->end_seq++;
1351
1352 if (skb == tcp_highest_sack(sk))
1353 tcp_advance_highest_sack(sk, skb);
1354
1355 tcp_unlink_write_queue(skb, sk);
1356 sk_wmem_free_skb(sk, skb);
1357
1358 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED);
1359
1360 return true;
1361}
1362
1363
1364
1365
1366static int tcp_skb_seglen(const struct sk_buff *skb)
1367{
1368 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
1369}
1370
1371
1372static int skb_can_shift(const struct sk_buff *skb)
1373{
1374 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1375}
1376
1377
1378
1379
1380static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1381 struct tcp_sacktag_state *state,
1382 u32 start_seq, u32 end_seq,
1383 bool dup_sack)
1384{
1385 struct tcp_sock *tp = tcp_sk(sk);
1386 struct sk_buff *prev;
1387 int mss;
1388 int pcount = 0;
1389 int len;
1390 int in_sack;
1391
1392 if (!sk_can_gso(sk))
1393 goto fallback;
1394
1395
1396 if (!dup_sack &&
1397 (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
1398 goto fallback;
1399 if (!skb_can_shift(skb))
1400 goto fallback;
1401
1402 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1403 goto fallback;
1404
1405
1406 if (unlikely(skb == tcp_write_queue_head(sk)))
1407 goto fallback;
1408 prev = tcp_write_queue_prev(sk, skb);
1409
1410 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1411 goto fallback;
1412
1413 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1414 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1415
1416 if (in_sack) {
1417 len = skb->len;
1418 pcount = tcp_skb_pcount(skb);
1419 mss = tcp_skb_seglen(skb);
1420
1421
1422
1423
1424 if (mss != tcp_skb_seglen(prev))
1425 goto fallback;
1426 } else {
1427 if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
1428 goto noop;
1429
1430
1431
1432
1433 if (tcp_skb_pcount(skb) <= 1)
1434 goto noop;
1435
1436 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
1437 if (!in_sack) {
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449 goto fallback;
1450 }
1451
1452 len = end_seq - TCP_SKB_CB(skb)->seq;
1453 BUG_ON(len < 0);
1454 BUG_ON(len > skb->len);
1455
1456
1457
1458
1459
1460 mss = tcp_skb_mss(skb);
1461
1462
1463
1464
1465 if (mss != tcp_skb_seglen(prev))
1466 goto fallback;
1467
1468 if (len == mss) {
1469 pcount = 1;
1470 } else if (len < mss) {
1471 goto noop;
1472 } else {
1473 pcount = len / mss;
1474 len = pcount * mss;
1475 }
1476 }
1477
1478
1479 if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
1480 goto fallback;
1481
1482 if (!skb_shift(prev, skb, len))
1483 goto fallback;
1484 if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
1485 goto out;
1486
1487
1488
1489
1490 if (prev == tcp_write_queue_tail(sk))
1491 goto out;
1492 skb = tcp_write_queue_next(sk, prev);
1493
1494 if (!skb_can_shift(skb) ||
1495 (skb == tcp_send_head(sk)) ||
1496 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1497 (mss != tcp_skb_seglen(skb)))
1498 goto out;
1499
1500 len = skb->len;
1501 if (skb_shift(prev, skb, len)) {
1502 pcount += tcp_skb_pcount(skb);
1503 tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
1504 }
1505
1506out:
1507 state->fack_count += pcount;
1508 return prev;
1509
1510noop:
1511 return skb;
1512
1513fallback:
1514 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
1515 return NULL;
1516}
1517
1518static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1519 struct tcp_sack_block *next_dup,
1520 struct tcp_sacktag_state *state,
1521 u32 start_seq, u32 end_seq,
1522 bool dup_sack_in)
1523{
1524 struct tcp_sock *tp = tcp_sk(sk);
1525 struct sk_buff *tmp;
1526
1527 tcp_for_write_queue_from(skb, sk) {
1528 int in_sack = 0;
1529 bool dup_sack = dup_sack_in;
1530
1531 if (skb == tcp_send_head(sk))
1532 break;
1533
1534
1535 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1536 break;
1537
1538 if (next_dup &&
1539 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
1540 in_sack = tcp_match_skb_to_sack(sk, skb,
1541 next_dup->start_seq,
1542 next_dup->end_seq);
1543 if (in_sack > 0)
1544 dup_sack = true;
1545 }
1546
1547
1548
1549
1550
1551 if (in_sack <= 0) {
1552 tmp = tcp_shift_skb_data(sk, skb, state,
1553 start_seq, end_seq, dup_sack);
1554 if (tmp) {
1555 if (tmp != skb) {
1556 skb = tmp;
1557 continue;
1558 }
1559
1560 in_sack = 0;
1561 } else {
1562 in_sack = tcp_match_skb_to_sack(sk, skb,
1563 start_seq,
1564 end_seq);
1565 }
1566 }
1567
1568 if (unlikely(in_sack < 0))
1569 break;
1570
1571 if (in_sack) {
1572 TCP_SKB_CB(skb)->sacked =
1573 tcp_sacktag_one(sk,
1574 state,
1575 TCP_SKB_CB(skb)->sacked,
1576 TCP_SKB_CB(skb)->seq,
1577 TCP_SKB_CB(skb)->end_seq,
1578 dup_sack,
1579 tcp_skb_pcount(skb),
1580 &skb->skb_mstamp);
1581
1582 if (!before(TCP_SKB_CB(skb)->seq,
1583 tcp_highest_sack_seq(tp)))
1584 tcp_advance_highest_sack(sk, skb);
1585 }
1586
1587 state->fack_count += tcp_skb_pcount(skb);
1588 }
1589 return skb;
1590}
1591
1592
1593
1594
1595static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1596 struct tcp_sacktag_state *state,
1597 u32 skip_to_seq)
1598{
1599 tcp_for_write_queue_from(skb, sk) {
1600 if (skb == tcp_send_head(sk))
1601 break;
1602
1603 if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
1604 break;
1605
1606 state->fack_count += tcp_skb_pcount(skb);
1607 }
1608 return skb;
1609}
1610
1611static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1612 struct sock *sk,
1613 struct tcp_sack_block *next_dup,
1614 struct tcp_sacktag_state *state,
1615 u32 skip_to_seq)
1616{
1617 if (!next_dup)
1618 return skb;
1619
1620 if (before(next_dup->start_seq, skip_to_seq)) {
1621 skb = tcp_sacktag_skip(skb, sk, state, next_dup->start_seq);
1622 skb = tcp_sacktag_walk(skb, sk, NULL, state,
1623 next_dup->start_seq, next_dup->end_seq,
1624 1);
1625 }
1626
1627 return skb;
1628}
1629
1630static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
1631{
1632 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1633}
1634
1635static int
1636tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1637 u32 prior_snd_una, long *sack_rtt_us)
1638{
1639 struct tcp_sock *tp = tcp_sk(sk);
1640 const unsigned char *ptr = (skb_transport_header(ack_skb) +
1641 TCP_SKB_CB(ack_skb)->sacked);
1642 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1643 struct tcp_sack_block sp[TCP_NUM_SACKS];
1644 struct tcp_sack_block *cache;
1645 struct tcp_sacktag_state state;
1646 struct sk_buff *skb;
1647 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1648 int used_sacks;
1649 bool found_dup_sack = false;
1650 int i, j;
1651 int first_sack_index;
1652
1653 state.flag = 0;
1654 state.reord = tp->packets_out;
1655 state.rtt_us = -1L;
1656
1657 if (!tp->sacked_out) {
1658 if (WARN_ON(tp->fackets_out))
1659 tp->fackets_out = 0;
1660 tcp_highest_sack_reset(sk);
1661 }
1662
1663 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1664 num_sacks, prior_snd_una);
1665 if (found_dup_sack)
1666 state.flag |= FLAG_DSACKING_ACK;
1667
1668
1669
1670
1671
1672 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
1673 return 0;
1674
1675 if (!tp->packets_out)
1676 goto out;
1677
1678 used_sacks = 0;
1679 first_sack_index = 0;
1680 for (i = 0; i < num_sacks; i++) {
1681 bool dup_sack = !i && found_dup_sack;
1682
1683 sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
1684 sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
1685
1686 if (!tcp_is_sackblock_valid(tp, dup_sack,
1687 sp[used_sacks].start_seq,
1688 sp[used_sacks].end_seq)) {
1689 int mib_idx;
1690
1691 if (dup_sack) {
1692 if (!tp->undo_marker)
1693 mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
1694 else
1695 mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
1696 } else {
1697
1698 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1699 !after(sp[used_sacks].end_seq, tp->snd_una))
1700 continue;
1701 mib_idx = LINUX_MIB_TCPSACKDISCARD;
1702 }
1703
1704 NET_INC_STATS_BH(sock_net(sk), mib_idx);
1705 if (i == 0)
1706 first_sack_index = -1;
1707 continue;
1708 }
1709
1710
1711 if (!after(sp[used_sacks].end_seq, prior_snd_una))
1712 continue;
1713
1714 used_sacks++;
1715 }
1716
1717
1718 for (i = used_sacks - 1; i > 0; i--) {
1719 for (j = 0; j < i; j++) {
1720 if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
1721 swap(sp[j], sp[j + 1]);
1722
1723
1724 if (j == first_sack_index)
1725 first_sack_index = j + 1;
1726 }
1727 }
1728 }
1729
1730 skb = tcp_write_queue_head(sk);
1731 state.fack_count = 0;
1732 i = 0;
1733
1734 if (!tp->sacked_out) {
1735
1736 cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1737 } else {
1738 cache = tp->recv_sack_cache;
1739
1740 while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
1741 !cache->end_seq)
1742 cache++;
1743 }
1744
1745 while (i < used_sacks) {
1746 u32 start_seq = sp[i].start_seq;
1747 u32 end_seq = sp[i].end_seq;
1748 bool dup_sack = (found_dup_sack && (i == first_sack_index));
1749 struct tcp_sack_block *next_dup = NULL;
1750
1751 if (found_dup_sack && ((i + 1) == first_sack_index))
1752 next_dup = &sp[i + 1];
1753
1754
1755 while (tcp_sack_cache_ok(tp, cache) &&
1756 !before(start_seq, cache->end_seq))
1757 cache++;
1758
1759
1760 if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
1761 after(end_seq, cache->start_seq)) {
1762
1763
1764 if (before(start_seq, cache->start_seq)) {
1765 skb = tcp_sacktag_skip(skb, sk, &state,
1766 start_seq);
1767 skb = tcp_sacktag_walk(skb, sk, next_dup,
1768 &state,
1769 start_seq,
1770 cache->start_seq,
1771 dup_sack);
1772 }
1773
1774
1775 if (!after(end_seq, cache->end_seq))
1776 goto advance_sp;
1777
1778 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1779 &state,
1780 cache->end_seq);
1781
1782
1783 if (tcp_highest_sack_seq(tp) == cache->end_seq) {
1784
1785 skb = tcp_highest_sack(sk);
1786 if (!skb)
1787 break;
1788 state.fack_count = tp->fackets_out;
1789 cache++;
1790 goto walk;
1791 }
1792
1793 skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq);
1794
1795 cache++;
1796 continue;
1797 }
1798
1799 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
1800 skb = tcp_highest_sack(sk);
1801 if (!skb)
1802 break;
1803 state.fack_count = tp->fackets_out;
1804 }
1805 skb = tcp_sacktag_skip(skb, sk, &state, start_seq);
1806
1807walk:
1808 skb = tcp_sacktag_walk(skb, sk, next_dup, &state,
1809 start_seq, end_seq, dup_sack);
1810
1811advance_sp:
1812 i++;
1813 }
1814
1815
1816 for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
1817 tp->recv_sack_cache[i].start_seq = 0;
1818 tp->recv_sack_cache[i].end_seq = 0;
1819 }
1820 for (j = 0; j < used_sacks; j++)
1821 tp->recv_sack_cache[i++] = sp[j];
1822
1823 if ((state.reord < tp->fackets_out) &&
1824 ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
1825 tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
1826
1827 tcp_mark_lost_retrans(sk);
1828 tcp_verify_left_out(tp);
1829out:
1830
1831#if FASTRETRANS_DEBUG > 0
1832 WARN_ON((int)tp->sacked_out < 0);
1833 WARN_ON((int)tp->lost_out < 0);
1834 WARN_ON((int)tp->retrans_out < 0);
1835 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1836#endif
1837 *sack_rtt_us = state.rtt_us;
1838 return state.flag;
1839}
1840
1841
1842
1843
1844static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
1845{
1846 u32 holes;
1847
1848 holes = max(tp->lost_out, 1U);
1849 holes = min(holes, tp->packets_out);
1850
1851 if ((tp->sacked_out + holes) > tp->packets_out) {
1852 tp->sacked_out = tp->packets_out - holes;
1853 return true;
1854 }
1855 return false;
1856}
1857
1858
1859
1860
1861
1862static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1863{
1864 struct tcp_sock *tp = tcp_sk(sk);
1865 if (tcp_limit_reno_sacked(tp))
1866 tcp_update_reordering(sk, tp->packets_out + addend, 0);
1867}
1868
1869
1870
1871static void tcp_add_reno_sack(struct sock *sk)
1872{
1873 struct tcp_sock *tp = tcp_sk(sk);
1874 tp->sacked_out++;
1875 tcp_check_reno_reordering(sk, 0);
1876 tcp_verify_left_out(tp);
1877}
1878
1879
1880
1881static void tcp_remove_reno_sacks(struct sock *sk, int acked)
1882{
1883 struct tcp_sock *tp = tcp_sk(sk);
1884
1885 if (acked > 0) {
1886
1887 if (acked - 1 >= tp->sacked_out)
1888 tp->sacked_out = 0;
1889 else
1890 tp->sacked_out -= acked - 1;
1891 }
1892 tcp_check_reno_reordering(sk, acked);
1893 tcp_verify_left_out(tp);
1894}
1895
1896static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1897{
1898 tp->sacked_out = 0;
1899}
1900
1901void tcp_clear_retrans(struct tcp_sock *tp)
1902{
1903 tp->retrans_out = 0;
1904 tp->lost_out = 0;
1905 tp->undo_marker = 0;
1906 tp->undo_retrans = -1;
1907 tp->fackets_out = 0;
1908 tp->sacked_out = 0;
1909}
1910
1911static inline void tcp_init_undo(struct tcp_sock *tp)
1912{
1913 tp->undo_marker = tp->snd_una;
1914
1915 tp->undo_retrans = tp->retrans_out ? : -1;
1916}
1917
1918
1919
1920
1921
1922void tcp_enter_loss(struct sock *sk)
1923{
1924 const struct inet_connection_sock *icsk = inet_csk(sk);
1925 struct tcp_sock *tp = tcp_sk(sk);
1926 struct sk_buff *skb;
1927 bool new_recovery = false;
1928 bool is_reneg;
1929
1930
1931 if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
1932 !after(tp->high_seq, tp->snd_una) ||
1933 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
1934 new_recovery = true;
1935 tp->prior_ssthresh = tcp_current_ssthresh(sk);
1936 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1937 tcp_ca_event(sk, CA_EVENT_LOSS);
1938 tcp_init_undo(tp);
1939 }
1940 tp->snd_cwnd = 1;
1941 tp->snd_cwnd_cnt = 0;
1942 tp->snd_cwnd_stamp = tcp_time_stamp;
1943
1944 tp->retrans_out = 0;
1945 tp->lost_out = 0;
1946
1947 if (tcp_is_reno(tp))
1948 tcp_reset_reno_sack(tp);
1949
1950 skb = tcp_write_queue_head(sk);
1951 is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
1952 if (is_reneg) {
1953 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
1954 tp->sacked_out = 0;
1955 tp->fackets_out = 0;
1956 }
1957 tcp_clear_all_retrans_hints(tp);
1958
1959 tcp_for_write_queue(skb, sk) {
1960 if (skb == tcp_send_head(sk))
1961 break;
1962
1963 TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
1964 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) {
1965 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
1966 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1967 tp->lost_out += tcp_skb_pcount(skb);
1968 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
1969 }
1970 }
1971 tcp_verify_left_out(tp);
1972
1973
1974
1975
1976 if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
1977 tp->sacked_out >= sysctl_tcp_reordering)
1978 tp->reordering = min_t(unsigned int, tp->reordering,
1979 sysctl_tcp_reordering);
1980 tcp_set_ca_state(sk, TCP_CA_Loss);
1981 tp->high_seq = tp->snd_nxt;
1982 tcp_ecn_queue_cwr(tp);
1983
1984
1985
1986
1987
1988 tp->frto = sysctl_tcp_frto &&
1989 (new_recovery || icsk->icsk_retransmits) &&
1990 !inet_csk(sk)->icsk_mtup.probe_size;
1991}
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003static bool tcp_check_sack_reneging(struct sock *sk, int flag)
2004{
2005 if (flag & FLAG_SACK_RENEGING) {
2006 struct tcp_sock *tp = tcp_sk(sk);
2007 unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
2008 msecs_to_jiffies(10));
2009
2010 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2011 delay, TCP_RTO_MAX);
2012 return true;
2013 }
2014 return false;
2015}
2016
2017static inline int tcp_fackets_out(const struct tcp_sock *tp)
2018{
2019 return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
2020}
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
2038{
2039 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
2040}
2041
2042static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
2043{
2044 struct tcp_sock *tp = tcp_sk(sk);
2045 unsigned long delay;
2046
2047
2048
2049
2050
2051 if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 ||
2052 (flag & FLAG_ECE) || !tp->srtt_us)
2053 return false;
2054
2055 delay = max(usecs_to_jiffies(tp->srtt_us >> 5),
2056 msecs_to_jiffies(2));
2057
2058 if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
2059 return false;
2060
2061 inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay,
2062 TCP_RTO_MAX);
2063 return true;
2064}
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159static bool tcp_time_to_recover(struct sock *sk, int flag)
2160{
2161 struct tcp_sock *tp = tcp_sk(sk);
2162 __u32 packets_out;
2163
2164
2165 if (tp->lost_out)
2166 return true;
2167
2168
2169 if (tcp_dupack_heuristics(tp) > tp->reordering)
2170 return true;
2171
2172
2173
2174
2175 packets_out = tp->packets_out;
2176 if (packets_out <= tp->reordering &&
2177 tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
2178 !tcp_may_send_now(sk)) {
2179
2180
2181
2182 return true;
2183 }
2184
2185
2186
2187
2188
2189
2190 if ((tp->thin_dupack || sysctl_tcp_thin_dupack) &&
2191 tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 &&
2192 tcp_is_sack(tp) && !tcp_send_head(sk))
2193 return true;
2194
2195
2196
2197
2198
2199
2200 if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
2201 (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) &&
2202 !tcp_may_send_now(sk))
2203 return !tcp_pause_early_retransmit(sk, flag);
2204
2205 return false;
2206}
2207
2208
2209
2210
2211
2212
2213
2214static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2215{
2216 struct tcp_sock *tp = tcp_sk(sk);
2217 struct sk_buff *skb;
2218 int cnt, oldcnt;
2219 int err;
2220 unsigned int mss;
2221
2222 const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
2223
2224 WARN_ON(packets > tp->packets_out);
2225 if (tp->lost_skb_hint) {
2226 skb = tp->lost_skb_hint;
2227 cnt = tp->lost_cnt_hint;
2228
2229 if (mark_head && skb != tcp_write_queue_head(sk))
2230 return;
2231 } else {
2232 skb = tcp_write_queue_head(sk);
2233 cnt = 0;
2234 }
2235
2236 tcp_for_write_queue_from(skb, sk) {
2237 if (skb == tcp_send_head(sk))
2238 break;
2239
2240
2241 tp->lost_skb_hint = skb;
2242 tp->lost_cnt_hint = cnt;
2243
2244 if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
2245 break;
2246
2247 oldcnt = cnt;
2248 if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
2249 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2250 cnt += tcp_skb_pcount(skb);
2251
2252 if (cnt > packets) {
2253 if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
2254 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
2255 (oldcnt >= packets))
2256 break;
2257
2258 mss = skb_shinfo(skb)->gso_size;
2259 err = tcp_fragment(sk, skb, (packets - oldcnt) * mss,
2260 mss, GFP_ATOMIC);
2261 if (err < 0)
2262 break;
2263 cnt = packets;
2264 }
2265
2266 tcp_skb_mark_lost(tp, skb);
2267
2268 if (mark_head)
2269 break;
2270 }
2271 tcp_verify_left_out(tp);
2272}
2273
2274
2275
2276static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2277{
2278 struct tcp_sock *tp = tcp_sk(sk);
2279
2280 if (tcp_is_reno(tp)) {
2281 tcp_mark_head_lost(sk, 1, 1);
2282 } else if (tcp_is_fack(tp)) {
2283 int lost = tp->fackets_out - tp->reordering;
2284 if (lost <= 0)
2285 lost = 1;
2286 tcp_mark_head_lost(sk, lost, 0);
2287 } else {
2288 int sacked_upto = tp->sacked_out - tp->reordering;
2289 if (sacked_upto >= 0)
2290 tcp_mark_head_lost(sk, sacked_upto, 0);
2291 else if (fast_rexmit)
2292 tcp_mark_head_lost(sk, 1, 1);
2293 }
2294}
2295
2296
2297
2298
2299static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
2300{
2301 tp->snd_cwnd = min(tp->snd_cwnd,
2302 tcp_packets_in_flight(tp) + tcp_max_burst(tp));
2303 tp->snd_cwnd_stamp = tcp_time_stamp;
2304}
2305
2306
2307
2308
2309static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
2310{
2311 return !tp->retrans_stamp ||
2312 (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2313 before(tp->rx_opt.rcv_tsecr, tp->retrans_stamp));
2314}
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332static bool tcp_any_retrans_done(const struct sock *sk)
2333{
2334 const struct tcp_sock *tp = tcp_sk(sk);
2335 struct sk_buff *skb;
2336
2337 if (tp->retrans_out)
2338 return true;
2339
2340 skb = tcp_write_queue_head(sk);
2341 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2342 return true;
2343
2344 return false;
2345}
2346
2347#if FASTRETRANS_DEBUG > 1
2348static void DBGUNDO(struct sock *sk, const char *msg)
2349{
2350 struct tcp_sock *tp = tcp_sk(sk);
2351 struct inet_sock *inet = inet_sk(sk);
2352
2353 if (sk->sk_family == AF_INET) {
2354 pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2355 msg,
2356 &inet->inet_daddr, ntohs(inet->inet_dport),
2357 tp->snd_cwnd, tcp_left_out(tp),
2358 tp->snd_ssthresh, tp->prior_ssthresh,
2359 tp->packets_out);
2360 }
2361#if IS_ENABLED(CONFIG_IPV6)
2362 else if (sk->sk_family == AF_INET6) {
2363 struct ipv6_pinfo *np = inet6_sk(sk);
2364 pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2365 msg,
2366 &np->daddr, ntohs(inet->inet_dport),
2367 tp->snd_cwnd, tcp_left_out(tp),
2368 tp->snd_ssthresh, tp->prior_ssthresh,
2369 tp->packets_out);
2370 }
2371#endif
2372}
2373#else
2374#define DBGUNDO(x...) do { } while (0)
2375#endif
2376
2377static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
2378{
2379 struct tcp_sock *tp = tcp_sk(sk);
2380
2381 if (unmark_loss) {
2382 struct sk_buff *skb;
2383
2384 tcp_for_write_queue(skb, sk) {
2385 if (skb == tcp_send_head(sk))
2386 break;
2387 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2388 }
2389 tp->lost_out = 0;
2390 tcp_clear_all_retrans_hints(tp);
2391 }
2392
2393 if (tp->prior_ssthresh) {
2394 const struct inet_connection_sock *icsk = inet_csk(sk);
2395
2396 if (icsk->icsk_ca_ops->undo_cwnd)
2397 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
2398 else
2399 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
2400
2401 if (tp->prior_ssthresh > tp->snd_ssthresh) {
2402 tp->snd_ssthresh = tp->prior_ssthresh;
2403 tcp_ecn_withdraw_cwr(tp);
2404 }
2405 } else {
2406 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
2407 }
2408 tp->snd_cwnd_stamp = tcp_time_stamp;
2409 tp->undo_marker = 0;
2410}
2411
2412static inline bool tcp_may_undo(const struct tcp_sock *tp)
2413{
2414 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2415}
2416
2417
2418static bool tcp_try_undo_recovery(struct sock *sk)
2419{
2420 struct tcp_sock *tp = tcp_sk(sk);
2421
2422 if (tcp_may_undo(tp)) {
2423 int mib_idx;
2424
2425
2426
2427
2428 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2429 tcp_undo_cwnd_reduction(sk, false);
2430 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2431 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2432 else
2433 mib_idx = LINUX_MIB_TCPFULLUNDO;
2434
2435 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2436 }
2437 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2438
2439
2440
2441 tcp_moderate_cwnd(tp);
2442 if (!tcp_any_retrans_done(sk))
2443 tp->retrans_stamp = 0;
2444 return true;
2445 }
2446 tcp_set_ca_state(sk, TCP_CA_Open);
2447 return false;
2448}
2449
2450
2451static bool tcp_try_undo_dsack(struct sock *sk)
2452{
2453 struct tcp_sock *tp = tcp_sk(sk);
2454
2455 if (tp->undo_marker && !tp->undo_retrans) {
2456 DBGUNDO(sk, "D-SACK");
2457 tcp_undo_cwnd_reduction(sk, false);
2458 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2459 return true;
2460 }
2461 return false;
2462}
2463
2464
2465static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
2466{
2467 struct tcp_sock *tp = tcp_sk(sk);
2468
2469 if (frto_undo || tcp_may_undo(tp)) {
2470 tcp_undo_cwnd_reduction(sk, true);
2471
2472 DBGUNDO(sk, "partial loss");
2473 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2474 if (frto_undo)
2475 NET_INC_STATS_BH(sock_net(sk),
2476 LINUX_MIB_TCPSPURIOUSRTOS);
2477 inet_csk(sk)->icsk_retransmits = 0;
2478 if (frto_undo || tcp_is_sack(tp))
2479 tcp_set_ca_state(sk, TCP_CA_Open);
2480 return true;
2481 }
2482 return false;
2483}
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495static void tcp_init_cwnd_reduction(struct sock *sk)
2496{
2497 struct tcp_sock *tp = tcp_sk(sk);
2498
2499 tp->high_seq = tp->snd_nxt;
2500 tp->tlp_high_seq = 0;
2501 tp->snd_cwnd_cnt = 0;
2502 tp->prior_cwnd = tp->snd_cwnd;
2503 tp->prr_delivered = 0;
2504 tp->prr_out = 0;
2505 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
2506 tcp_ecn_queue_cwr(tp);
2507}
2508
2509static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
2510 int fast_rexmit)
2511{
2512 struct tcp_sock *tp = tcp_sk(sk);
2513 int sndcnt = 0;
2514 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2515 int newly_acked_sacked = prior_unsacked -
2516 (tp->packets_out - tp->sacked_out);
2517
2518 tp->prr_delivered += newly_acked_sacked;
2519 if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
2520 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2521 tp->prior_cwnd - 1;
2522 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2523 } else {
2524 sndcnt = min_t(int, delta,
2525 max_t(int, tp->prr_delivered - tp->prr_out,
2526 newly_acked_sacked) + 1);
2527 }
2528
2529 sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
2530 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2531}
2532
2533static inline void tcp_end_cwnd_reduction(struct sock *sk)
2534{
2535 struct tcp_sock *tp = tcp_sk(sk);
2536
2537
2538 if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
2539 (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
2540 tp->snd_cwnd = tp->snd_ssthresh;
2541 tp->snd_cwnd_stamp = tcp_time_stamp;
2542 }
2543 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2544}
2545
2546
2547void tcp_enter_cwr(struct sock *sk)
2548{
2549 struct tcp_sock *tp = tcp_sk(sk);
2550
2551 tp->prior_ssthresh = 0;
2552 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2553 tp->undo_marker = 0;
2554 tcp_init_cwnd_reduction(sk);
2555 tcp_set_ca_state(sk, TCP_CA_CWR);
2556 }
2557}
2558
2559static void tcp_try_keep_open(struct sock *sk)
2560{
2561 struct tcp_sock *tp = tcp_sk(sk);
2562 int state = TCP_CA_Open;
2563
2564 if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
2565 state = TCP_CA_Disorder;
2566
2567 if (inet_csk(sk)->icsk_ca_state != state) {
2568 tcp_set_ca_state(sk, state);
2569 tp->high_seq = tp->snd_nxt;
2570 }
2571}
2572
2573static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
2574{
2575 struct tcp_sock *tp = tcp_sk(sk);
2576
2577 tcp_verify_left_out(tp);
2578
2579 if (!tcp_any_retrans_done(sk))
2580 tp->retrans_stamp = 0;
2581
2582 if (flag & FLAG_ECE)
2583 tcp_enter_cwr(sk);
2584
2585 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2586 tcp_try_keep_open(sk);
2587 } else {
2588 tcp_cwnd_reduction(sk, prior_unsacked, 0);
2589 }
2590}
2591
2592static void tcp_mtup_probe_failed(struct sock *sk)
2593{
2594 struct inet_connection_sock *icsk = inet_csk(sk);
2595
2596 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2597 icsk->icsk_mtup.probe_size = 0;
2598}
2599
2600static void tcp_mtup_probe_success(struct sock *sk)
2601{
2602 struct tcp_sock *tp = tcp_sk(sk);
2603 struct inet_connection_sock *icsk = inet_csk(sk);
2604
2605
2606 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2607 tp->snd_cwnd = tp->snd_cwnd *
2608 tcp_mss_to_mtu(sk, tp->mss_cache) /
2609 icsk->icsk_mtup.probe_size;
2610 tp->snd_cwnd_cnt = 0;
2611 tp->snd_cwnd_stamp = tcp_time_stamp;
2612 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2613
2614 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2615 icsk->icsk_mtup.probe_size = 0;
2616 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2617}
2618
2619
2620
2621
2622
2623void tcp_simple_retransmit(struct sock *sk)
2624{
2625 const struct inet_connection_sock *icsk = inet_csk(sk);
2626 struct tcp_sock *tp = tcp_sk(sk);
2627 struct sk_buff *skb;
2628 unsigned int mss = tcp_current_mss(sk);
2629 u32 prior_lost = tp->lost_out;
2630
2631 tcp_for_write_queue(skb, sk) {
2632 if (skb == tcp_send_head(sk))
2633 break;
2634 if (tcp_skb_seglen(skb) > mss &&
2635 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2636 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2637 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2638 tp->retrans_out -= tcp_skb_pcount(skb);
2639 }
2640 tcp_skb_mark_lost_uncond_verify(tp, skb);
2641 }
2642 }
2643
2644 tcp_clear_retrans_hints_partial(tp);
2645
2646 if (prior_lost == tp->lost_out)
2647 return;
2648
2649 if (tcp_is_reno(tp))
2650 tcp_limit_reno_sacked(tp);
2651
2652 tcp_verify_left_out(tp);
2653
2654
2655
2656
2657
2658
2659 if (icsk->icsk_ca_state != TCP_CA_Loss) {
2660 tp->high_seq = tp->snd_nxt;
2661 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2662 tp->prior_ssthresh = 0;
2663 tp->undo_marker = 0;
2664 tcp_set_ca_state(sk, TCP_CA_Loss);
2665 }
2666 tcp_xmit_retransmit_queue(sk);
2667}
2668EXPORT_SYMBOL(tcp_simple_retransmit);
2669
2670static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2671{
2672 struct tcp_sock *tp = tcp_sk(sk);
2673 int mib_idx;
2674
2675 if (tcp_is_reno(tp))
2676 mib_idx = LINUX_MIB_TCPRENORECOVERY;
2677 else
2678 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
2679
2680 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2681
2682 tp->prior_ssthresh = 0;
2683 tcp_init_undo(tp);
2684
2685 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2686 if (!ece_ack)
2687 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2688 tcp_init_cwnd_reduction(sk);
2689 }
2690 tcp_set_ca_state(sk, TCP_CA_Recovery);
2691}
2692
2693
2694
2695
2696static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
2697{
2698 struct tcp_sock *tp = tcp_sk(sk);
2699 bool recovered = !before(tp->snd_una, tp->high_seq);
2700
2701 if ((flag & FLAG_SND_UNA_ADVANCED) &&
2702 tcp_try_undo_loss(sk, false))
2703 return;
2704
2705 if (tp->frto) {
2706
2707
2708
2709 if ((flag & FLAG_ORIG_SACK_ACKED) &&
2710 tcp_try_undo_loss(sk, true))
2711 return;
2712
2713 if (after(tp->snd_nxt, tp->high_seq)) {
2714 if (flag & FLAG_DATA_SACKED || is_dupack)
2715 tp->frto = 0;
2716 } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
2717 tp->high_seq = tp->snd_nxt;
2718 __tcp_push_pending_frames(sk, tcp_current_mss(sk),
2719 TCP_NAGLE_OFF);
2720 if (after(tp->snd_nxt, tp->high_seq))
2721 return;
2722 tp->frto = 0;
2723 }
2724 }
2725
2726 if (recovered) {
2727
2728 tcp_try_undo_recovery(sk);
2729 return;
2730 }
2731 if (tcp_is_reno(tp)) {
2732
2733
2734
2735 if (after(tp->snd_nxt, tp->high_seq) && is_dupack)
2736 tcp_add_reno_sack(sk);
2737 else if (flag & FLAG_SND_UNA_ADVANCED)
2738 tcp_reset_reno_sack(tp);
2739 }
2740 tcp_xmit_retransmit_queue(sk);
2741}
2742
2743
2744static bool tcp_try_undo_partial(struct sock *sk, const int acked,
2745 const int prior_unsacked)
2746{
2747 struct tcp_sock *tp = tcp_sk(sk);
2748
2749 if (tp->undo_marker && tcp_packet_delayed(tp)) {
2750
2751
2752
2753 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
2754
2755
2756
2757
2758
2759
2760 if (tp->retrans_out) {
2761 tcp_cwnd_reduction(sk, prior_unsacked, 0);
2762 return true;
2763 }
2764
2765 if (!tcp_any_retrans_done(sk))
2766 tp->retrans_stamp = 0;
2767
2768 DBGUNDO(sk, "partial recovery");
2769 tcp_undo_cwnd_reduction(sk, true);
2770 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2771 tcp_try_keep_open(sk);
2772 return true;
2773 }
2774 return false;
2775}
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2789 const int prior_unsacked,
2790 bool is_dupack, int flag)
2791{
2792 struct inet_connection_sock *icsk = inet_csk(sk);
2793 struct tcp_sock *tp = tcp_sk(sk);
2794 bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
2795 (tcp_fackets_out(tp) > tp->reordering));
2796 int fast_rexmit = 0;
2797
2798 if (WARN_ON(!tp->packets_out && tp->sacked_out))
2799 tp->sacked_out = 0;
2800 if (WARN_ON(!tp->sacked_out && tp->fackets_out))
2801 tp->fackets_out = 0;
2802
2803
2804
2805 if (flag & FLAG_ECE)
2806 tp->prior_ssthresh = 0;
2807
2808
2809 if (tcp_check_sack_reneging(sk, flag))
2810 return;
2811
2812
2813 tcp_verify_left_out(tp);
2814
2815
2816
2817 if (icsk->icsk_ca_state == TCP_CA_Open) {
2818 WARN_ON(tp->retrans_out != 0);
2819 tp->retrans_stamp = 0;
2820 } else if (!before(tp->snd_una, tp->high_seq)) {
2821 switch (icsk->icsk_ca_state) {
2822 case TCP_CA_CWR:
2823
2824
2825 if (tp->snd_una != tp->high_seq) {
2826 tcp_end_cwnd_reduction(sk);
2827 tcp_set_ca_state(sk, TCP_CA_Open);
2828 }
2829 break;
2830
2831 case TCP_CA_Recovery:
2832 if (tcp_is_reno(tp))
2833 tcp_reset_reno_sack(tp);
2834 if (tcp_try_undo_recovery(sk))
2835 return;
2836 tcp_end_cwnd_reduction(sk);
2837 break;
2838 }
2839 }
2840
2841
2842 switch (icsk->icsk_ca_state) {
2843 case TCP_CA_Recovery:
2844 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
2845 if (tcp_is_reno(tp) && is_dupack)
2846 tcp_add_reno_sack(sk);
2847 } else {
2848 if (tcp_try_undo_partial(sk, acked, prior_unsacked))
2849 return;
2850
2851 do_lost = tcp_is_reno(tp) ||
2852 tcp_fackets_out(tp) > tp->reordering;
2853 }
2854 if (tcp_try_undo_dsack(sk)) {
2855 tcp_try_keep_open(sk);
2856 return;
2857 }
2858 break;
2859 case TCP_CA_Loss:
2860 tcp_process_loss(sk, flag, is_dupack);
2861 if (icsk->icsk_ca_state != TCP_CA_Open)
2862 return;
2863
2864 default:
2865 if (tcp_is_reno(tp)) {
2866 if (flag & FLAG_SND_UNA_ADVANCED)
2867 tcp_reset_reno_sack(tp);
2868 if (is_dupack)
2869 tcp_add_reno_sack(sk);
2870 }
2871
2872 if (icsk->icsk_ca_state <= TCP_CA_Disorder)
2873 tcp_try_undo_dsack(sk);
2874
2875 if (!tcp_time_to_recover(sk, flag)) {
2876 tcp_try_to_open(sk, flag, prior_unsacked);
2877 return;
2878 }
2879
2880
2881 if (icsk->icsk_ca_state < TCP_CA_CWR &&
2882 icsk->icsk_mtup.probe_size &&
2883 tp->snd_una == tp->mtu_probe.probe_seq_start) {
2884 tcp_mtup_probe_failed(sk);
2885
2886 tp->snd_cwnd++;
2887 tcp_simple_retransmit(sk);
2888 return;
2889 }
2890
2891
2892 tcp_enter_recovery(sk, (flag & FLAG_ECE));
2893 fast_rexmit = 1;
2894 }
2895
2896 if (do_lost)
2897 tcp_update_scoreboard(sk, fast_rexmit);
2898 tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit);
2899 tcp_xmit_retransmit_queue(sk);
2900}
2901
2902static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
2903 long seq_rtt_us, long sack_rtt_us)
2904{
2905 const struct tcp_sock *tp = tcp_sk(sk);
2906
2907
2908
2909
2910
2911
2912 if (flag & FLAG_RETRANS_DATA_ACKED)
2913 seq_rtt_us = -1L;
2914
2915 if (seq_rtt_us < 0)
2916 seq_rtt_us = sack_rtt_us;
2917
2918
2919
2920
2921
2922
2923
2924 if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2925 flag & FLAG_ACKED)
2926 seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - tp->rx_opt.rcv_tsecr);
2927
2928 if (seq_rtt_us < 0)
2929 return false;
2930
2931 tcp_rtt_estimator(sk, seq_rtt_us);
2932 tcp_set_rto(sk);
2933
2934
2935 inet_csk(sk)->icsk_backoff = 0;
2936 return true;
2937}
2938
2939
2940static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp)
2941{
2942 struct tcp_sock *tp = tcp_sk(sk);
2943 long seq_rtt_us = -1L;
2944
2945 if (synack_stamp && !tp->total_retrans)
2946 seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - synack_stamp);
2947
2948
2949
2950
2951 if (!tp->srtt_us)
2952 tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L);
2953}
2954
2955static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
2956{
2957 const struct inet_connection_sock *icsk = inet_csk(sk);
2958
2959 icsk->icsk_ca_ops->cong_avoid(sk, ack, acked);
2960 tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
2961}
2962
2963
2964
2965
2966void tcp_rearm_rto(struct sock *sk)
2967{
2968 const struct inet_connection_sock *icsk = inet_csk(sk);
2969 struct tcp_sock *tp = tcp_sk(sk);
2970
2971
2972
2973
2974 if (tp->fastopen_rsk)
2975 return;
2976
2977 if (!tp->packets_out) {
2978 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
2979 } else {
2980 u32 rto = inet_csk(sk)->icsk_rto;
2981
2982 if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2983 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2984 struct sk_buff *skb = tcp_write_queue_head(sk);
2985 const u32 rto_time_stamp =
2986 tcp_skb_timestamp(skb) + rto;
2987 s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);
2988
2989
2990
2991 if (delta > 0)
2992 rto = delta;
2993 }
2994 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
2995 TCP_RTO_MAX);
2996 }
2997}
2998
2999
3000
3001
3002void tcp_resume_early_retransmit(struct sock *sk)
3003{
3004 struct tcp_sock *tp = tcp_sk(sk);
3005
3006 tcp_rearm_rto(sk);
3007
3008
3009 if (!tp->do_early_retrans)
3010 return;
3011
3012 tcp_enter_recovery(sk, false);
3013 tcp_update_scoreboard(sk, 1);
3014 tcp_xmit_retransmit_queue(sk);
3015}
3016
3017
3018static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
3019{
3020 struct tcp_sock *tp = tcp_sk(sk);
3021 u32 packets_acked;
3022
3023 BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
3024
3025 packets_acked = tcp_skb_pcount(skb);
3026 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
3027 return 0;
3028 packets_acked -= tcp_skb_pcount(skb);
3029
3030 if (packets_acked) {
3031 BUG_ON(tcp_skb_pcount(skb) == 0);
3032 BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
3033 }
3034
3035 return packets_acked;
3036}
3037
3038static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
3039 u32 prior_snd_una)
3040{
3041 const struct skb_shared_info *shinfo;
3042
3043
3044 if (likely(!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)))
3045 return;
3046
3047 shinfo = skb_shinfo(skb);
3048 if ((shinfo->tx_flags & SKBTX_ACK_TSTAMP) &&
3049 between(shinfo->tskey, prior_snd_una, tcp_sk(sk)->snd_una - 1))
3050 __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
3051}
3052
3053
3054
3055
3056
3057static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3058 u32 prior_snd_una, long sack_rtt_us)
3059{
3060 const struct inet_connection_sock *icsk = inet_csk(sk);
3061 struct skb_mstamp first_ackt, last_ackt, now;
3062 struct tcp_sock *tp = tcp_sk(sk);
3063 u32 prior_sacked = tp->sacked_out;
3064 u32 reord = tp->packets_out;
3065 bool fully_acked = true;
3066 long ca_seq_rtt_us = -1L;
3067 long seq_rtt_us = -1L;
3068 struct sk_buff *skb;
3069 u32 pkts_acked = 0;
3070 bool rtt_update;
3071 int flag = 0;
3072
3073 first_ackt.v64 = 0;
3074
3075 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
3076 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3077 u8 sacked = scb->sacked;
3078 u32 acked_pcount;
3079
3080 tcp_ack_tstamp(sk, skb, prior_snd_una);
3081
3082
3083 if (after(scb->end_seq, tp->snd_una)) {
3084 if (tcp_skb_pcount(skb) == 1 ||
3085 !after(tp->snd_una, scb->seq))
3086 break;
3087
3088 acked_pcount = tcp_tso_acked(sk, skb);
3089 if (!acked_pcount)
3090 break;
3091
3092 fully_acked = false;
3093 } else {
3094
3095 prefetchw(skb->next);
3096 acked_pcount = tcp_skb_pcount(skb);
3097 }
3098
3099 if (unlikely(sacked & TCPCB_RETRANS)) {
3100 if (sacked & TCPCB_SACKED_RETRANS)
3101 tp->retrans_out -= acked_pcount;
3102 flag |= FLAG_RETRANS_DATA_ACKED;
3103 } else if (!(sacked & TCPCB_SACKED_ACKED)) {
3104 last_ackt = skb->skb_mstamp;
3105 WARN_ON_ONCE(last_ackt.v64 == 0);
3106 if (!first_ackt.v64)
3107 first_ackt = last_ackt;
3108
3109 reord = min(pkts_acked, reord);
3110 if (!after(scb->end_seq, tp->high_seq))
3111 flag |= FLAG_ORIG_SACK_ACKED;
3112 }
3113
3114 if (sacked & TCPCB_SACKED_ACKED)
3115 tp->sacked_out -= acked_pcount;
3116 if (sacked & TCPCB_LOST)
3117 tp->lost_out -= acked_pcount;
3118
3119 tp->packets_out -= acked_pcount;
3120 pkts_acked += acked_pcount;
3121
3122
3123
3124
3125
3126
3127
3128
3129 if (likely(!(scb->tcp_flags & TCPHDR_SYN))) {
3130 flag |= FLAG_DATA_ACKED;
3131 } else {
3132 flag |= FLAG_SYN_ACKED;
3133 tp->retrans_stamp = 0;
3134 }
3135
3136 if (!fully_acked)
3137 break;
3138
3139 tcp_unlink_write_queue(skb, sk);
3140 sk_wmem_free_skb(sk, skb);
3141 if (unlikely(skb == tp->retransmit_skb_hint))
3142 tp->retransmit_skb_hint = NULL;
3143 if (unlikely(skb == tp->lost_skb_hint))
3144 tp->lost_skb_hint = NULL;
3145 }
3146
3147 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
3148 tp->snd_up = tp->snd_una;
3149
3150 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
3151 flag |= FLAG_SACK_RENEGING;
3152
3153 skb_mstamp_get(&now);
3154 if (likely(first_ackt.v64)) {
3155 seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
3156 ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
3157 }
3158
3159 rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us);
3160
3161 if (flag & FLAG_ACKED) {
3162 const struct tcp_congestion_ops *ca_ops
3163 = inet_csk(sk)->icsk_ca_ops;
3164
3165 tcp_rearm_rto(sk);
3166 if (unlikely(icsk->icsk_mtup.probe_size &&
3167 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
3168 tcp_mtup_probe_success(sk);
3169 }
3170
3171 if (tcp_is_reno(tp)) {
3172 tcp_remove_reno_sacks(sk, pkts_acked);
3173 } else {
3174 int delta;
3175
3176
3177 if (reord < prior_fackets)
3178 tcp_update_reordering(sk, tp->fackets_out - reord, 0);
3179
3180 delta = tcp_is_fack(tp) ? pkts_acked :
3181 prior_sacked - tp->sacked_out;
3182 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3183 }
3184
3185 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
3186
3187 if (ca_ops->pkts_acked) {
3188 long rtt_us = min_t(ulong, ca_seq_rtt_us, sack_rtt_us);
3189 ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
3190 }
3191
3192 } else if (skb && rtt_update && sack_rtt_us >= 0 &&
3193 sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
3194
3195
3196
3197
3198 tcp_rearm_rto(sk);
3199 }
3200
3201#if FASTRETRANS_DEBUG > 0
3202 WARN_ON((int)tp->sacked_out < 0);
3203 WARN_ON((int)tp->lost_out < 0);
3204 WARN_ON((int)tp->retrans_out < 0);
3205 if (!tp->packets_out && tcp_is_sack(tp)) {
3206 icsk = inet_csk(sk);
3207 if (tp->lost_out) {
3208 pr_debug("Leak l=%u %d\n",
3209 tp->lost_out, icsk->icsk_ca_state);
3210 tp->lost_out = 0;
3211 }
3212 if (tp->sacked_out) {
3213 pr_debug("Leak s=%u %d\n",
3214 tp->sacked_out, icsk->icsk_ca_state);
3215 tp->sacked_out = 0;
3216 }
3217 if (tp->retrans_out) {
3218 pr_debug("Leak r=%u %d\n",
3219 tp->retrans_out, icsk->icsk_ca_state);
3220 tp->retrans_out = 0;
3221 }
3222 }
3223#endif
3224 return flag;
3225}
3226
3227static void tcp_ack_probe(struct sock *sk)
3228{
3229 const struct tcp_sock *tp = tcp_sk(sk);
3230 struct inet_connection_sock *icsk = inet_csk(sk);
3231
3232
3233
3234 if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
3235 icsk->icsk_backoff = 0;
3236 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
3237
3238
3239
3240 } else {
3241 unsigned long when = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
3242
3243 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3244 when, TCP_RTO_MAX);
3245 }
3246}
3247
3248static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
3249{
3250 return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3251 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3252}
3253
3254
3255static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3256{
3257 if (tcp_in_cwnd_reduction(sk))
3258 return false;
3259
3260
3261
3262
3263
3264
3265
3266 if (tcp_sk(sk)->reordering > sysctl_tcp_reordering)
3267 return flag & FLAG_FORWARD_PROGRESS;
3268
3269 return flag & FLAG_DATA_ACKED;
3270}
3271
3272
3273
3274
3275static inline bool tcp_may_update_window(const struct tcp_sock *tp,
3276 const u32 ack, const u32 ack_seq,
3277 const u32 nwin)
3278{
3279 return after(ack, tp->snd_una) ||
3280 after(ack_seq, tp->snd_wl1) ||
3281 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
3282}
3283
3284
3285static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
3286{
3287 u32 delta = ack - tp->snd_una;
3288
3289 u64_stats_update_begin(&tp->syncp);
3290 tp->bytes_acked += delta;
3291 u64_stats_update_end(&tp->syncp);
3292 tp->snd_una = ack;
3293}
3294
3295
3296static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
3297{
3298 u32 delta = seq - tp->rcv_nxt;
3299
3300 u64_stats_update_begin(&tp->syncp);
3301 tp->bytes_received += delta;
3302 u64_stats_update_end(&tp->syncp);
3303 tp->rcv_nxt = seq;
3304}
3305
3306
3307
3308
3309
3310
3311static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
3312 u32 ack_seq)
3313{
3314 struct tcp_sock *tp = tcp_sk(sk);
3315 int flag = 0;
3316 u32 nwin = ntohs(tcp_hdr(skb)->window);
3317
3318 if (likely(!tcp_hdr(skb)->syn))
3319 nwin <<= tp->rx_opt.snd_wscale;
3320
3321 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
3322 flag |= FLAG_WIN_UPDATE;
3323 tcp_update_wl(tp, ack_seq);
3324
3325 if (tp->snd_wnd != nwin) {
3326 tp->snd_wnd = nwin;
3327
3328
3329
3330
3331 tp->pred_flags = 0;
3332 tcp_fast_path_check(sk);
3333
3334 if (nwin > tp->max_window) {
3335 tp->max_window = nwin;
3336 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
3337 }
3338 }
3339 }
3340
3341 tcp_snd_una_update(tp, ack);
3342
3343 return flag;
3344}
3345
3346
3347
3348
3349
3350
3351
3352
3353bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
3354 int mib_idx, u32 *last_oow_ack_time)
3355{
3356
3357 if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
3358 !tcp_hdr(skb)->syn)
3359 goto not_rate_limited;
3360
3361 if (*last_oow_ack_time) {
3362 s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
3363
3364 if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
3365 NET_INC_STATS_BH(net, mib_idx);
3366 return true;
3367 }
3368 }
3369
3370 *last_oow_ack_time = tcp_time_stamp;
3371
3372not_rate_limited:
3373 return false;
3374}
3375
3376
3377static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
3378{
3379
3380 static u32 challenge_timestamp;
3381 static unsigned int challenge_count;
3382 struct tcp_sock *tp = tcp_sk(sk);
3383 u32 now;
3384
3385
3386 if (tcp_oow_rate_limited(sock_net(sk), skb,
3387 LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
3388 &tp->last_oow_ack_time))
3389 return;
3390
3391
3392 now = jiffies / HZ;
3393 if (now != challenge_timestamp) {
3394 challenge_timestamp = now;
3395 challenge_count = 0;
3396 }
3397 if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
3398 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
3399 tcp_send_ack(sk);
3400 }
3401}
3402
3403static void tcp_store_ts_recent(struct tcp_sock *tp)
3404{
3405 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
3406 tp->rx_opt.ts_recent_stamp = get_seconds();
3407}
3408
3409static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3410{
3411 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
3412
3413
3414
3415
3416
3417
3418
3419 if (tcp_paws_check(&tp->rx_opt, 0))
3420 tcp_store_ts_recent(tp);
3421 }
3422}
3423
3424
3425
3426
3427
3428
3429static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
3430{
3431 struct tcp_sock *tp = tcp_sk(sk);
3432
3433 if (before(ack, tp->tlp_high_seq))
3434 return;
3435
3436 if (flag & FLAG_DSACKING_ACK) {
3437
3438 tp->tlp_high_seq = 0;
3439 } else if (after(ack, tp->tlp_high_seq)) {
3440
3441
3442
3443 tcp_init_cwnd_reduction(sk);
3444 tcp_set_ca_state(sk, TCP_CA_CWR);
3445 tcp_end_cwnd_reduction(sk);
3446 tcp_try_keep_open(sk);
3447 NET_INC_STATS_BH(sock_net(sk),
3448 LINUX_MIB_TCPLOSSPROBERECOVERY);
3449 } else if (!(flag & (FLAG_SND_UNA_ADVANCED |
3450 FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
3451
3452 tp->tlp_high_seq = 0;
3453 }
3454}
3455
3456static inline void tcp_in_ack_event(struct sock *sk, u32 flags)
3457{
3458 const struct inet_connection_sock *icsk = inet_csk(sk);
3459
3460 if (icsk->icsk_ca_ops->in_ack_event)
3461 icsk->icsk_ca_ops->in_ack_event(sk, flags);
3462}
3463
3464
3465static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3466{
3467 struct inet_connection_sock *icsk = inet_csk(sk);
3468 struct tcp_sock *tp = tcp_sk(sk);
3469 u32 prior_snd_una = tp->snd_una;
3470 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3471 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3472 bool is_dupack = false;
3473 u32 prior_fackets;
3474 int prior_packets = tp->packets_out;
3475 const int prior_unsacked = tp->packets_out - tp->sacked_out;
3476 int acked = 0;
3477 long sack_rtt_us = -1L;
3478
3479
3480 prefetchw(sk->sk_write_queue.next);
3481
3482
3483
3484
3485 if (before(ack, prior_snd_una)) {
3486
3487 if (before(ack, prior_snd_una - tp->max_window)) {
3488 tcp_send_challenge_ack(sk, skb);
3489 return -1;
3490 }
3491 goto old_ack;
3492 }
3493
3494
3495
3496
3497 if (after(ack, tp->snd_nxt))
3498 goto invalid_ack;
3499
3500 if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
3501 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
3502 tcp_rearm_rto(sk);
3503
3504 if (after(ack, prior_snd_una)) {
3505 flag |= FLAG_SND_UNA_ADVANCED;
3506 icsk->icsk_retransmits = 0;
3507 }
3508
3509 prior_fackets = tp->fackets_out;
3510
3511
3512
3513
3514 if (flag & FLAG_UPDATE_TS_RECENT)
3515 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
3516
3517 if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
3518
3519
3520
3521
3522 tcp_update_wl(tp, ack_seq);
3523 tcp_snd_una_update(tp, ack);
3524 flag |= FLAG_WIN_UPDATE;
3525
3526 tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
3527
3528 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS);
3529 } else {
3530 u32 ack_ev_flags = CA_ACK_SLOWPATH;
3531
3532 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3533 flag |= FLAG_DATA;
3534 else
3535 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS);
3536
3537 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3538
3539 if (TCP_SKB_CB(skb)->sacked)
3540 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3541 &sack_rtt_us);
3542
3543 if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
3544 flag |= FLAG_ECE;
3545 ack_ev_flags |= CA_ACK_ECE;
3546 }
3547
3548 if (flag & FLAG_WIN_UPDATE)
3549 ack_ev_flags |= CA_ACK_WIN_UPDATE;
3550
3551 tcp_in_ack_event(sk, ack_ev_flags);
3552 }
3553
3554
3555
3556
3557 sk->sk_err_soft = 0;
3558 icsk->icsk_probes_out = 0;
3559 tp->rcv_tstamp = tcp_time_stamp;
3560 if (!prior_packets)
3561 goto no_queue;
3562
3563
3564 acked = tp->packets_out;
3565 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
3566 sack_rtt_us);
3567 acked -= tp->packets_out;
3568
3569
3570 if (tcp_may_raise_cwnd(sk, flag))
3571 tcp_cong_avoid(sk, ack, acked);
3572
3573 if (tcp_ack_is_dubious(sk, flag)) {
3574 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3575 tcp_fastretrans_alert(sk, acked, prior_unsacked,
3576 is_dupack, flag);
3577 }
3578 if (tp->tlp_high_seq)
3579 tcp_process_tlp_ack(sk, ack, flag);
3580
3581 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
3582 struct dst_entry *dst = __sk_dst_get(sk);
3583 if (dst)
3584 dst_confirm(dst);
3585 }
3586
3587 if (icsk->icsk_pending == ICSK_TIME_RETRANS)
3588 tcp_schedule_loss_probe(sk);
3589 tcp_update_pacing_rate(sk);
3590 return 1;
3591
3592no_queue:
3593
3594 if (flag & FLAG_DSACKING_ACK)
3595 tcp_fastretrans_alert(sk, acked, prior_unsacked,
3596 is_dupack, flag);
3597
3598
3599
3600
3601 if (tcp_send_head(sk))
3602 tcp_ack_probe(sk);
3603
3604 if (tp->tlp_high_seq)
3605 tcp_process_tlp_ack(sk, ack, flag);
3606 return 1;
3607
3608invalid_ack:
3609 SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3610 return -1;
3611
3612old_ack:
3613
3614
3615
3616 if (TCP_SKB_CB(skb)->sacked) {
3617 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3618 &sack_rtt_us);
3619 tcp_fastretrans_alert(sk, acked, prior_unsacked,
3620 is_dupack, flag);
3621 }
3622
3623 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3624 return 0;
3625}
3626
3627static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
3628 bool syn, struct tcp_fastopen_cookie *foc,
3629 bool exp_opt)
3630{
3631
3632 if (!foc || !syn || len < 0 || (len & 1))
3633 return;
3634
3635 if (len >= TCP_FASTOPEN_COOKIE_MIN &&
3636 len <= TCP_FASTOPEN_COOKIE_MAX)
3637 memcpy(foc->val, cookie, len);
3638 else if (len != 0)
3639 len = -1;
3640 foc->len = len;
3641 foc->exp = exp_opt;
3642}
3643
3644
3645
3646
3647
3648void tcp_parse_options(const struct sk_buff *skb,
3649 struct tcp_options_received *opt_rx, int estab,
3650 struct tcp_fastopen_cookie *foc)
3651{
3652 const unsigned char *ptr;
3653 const struct tcphdr *th = tcp_hdr(skb);
3654 int length = (th->doff * 4) - sizeof(struct tcphdr);
3655
3656 ptr = (const unsigned char *)(th + 1);
3657 opt_rx->saw_tstamp = 0;
3658
3659 while (length > 0) {
3660 int opcode = *ptr++;
3661 int opsize;
3662
3663 switch (opcode) {
3664 case TCPOPT_EOL:
3665 return;
3666 case TCPOPT_NOP:
3667 length--;
3668 continue;
3669 default:
3670 opsize = *ptr++;
3671 if (opsize < 2)
3672 return;
3673 if (opsize > length)
3674 return;
3675 switch (opcode) {
3676 case TCPOPT_MSS:
3677 if (opsize == TCPOLEN_MSS && th->syn && !estab) {
3678 u16 in_mss = get_unaligned_be16(ptr);
3679 if (in_mss) {
3680 if (opt_rx->user_mss &&
3681 opt_rx->user_mss < in_mss)
3682 in_mss = opt_rx->user_mss;
3683 opt_rx->mss_clamp = in_mss;
3684 }
3685 }
3686 break;
3687 case TCPOPT_WINDOW:
3688 if (opsize == TCPOLEN_WINDOW && th->syn &&
3689 !estab && sysctl_tcp_window_scaling) {
3690 __u8 snd_wscale = *(__u8 *)ptr;
3691 opt_rx->wscale_ok = 1;
3692 if (snd_wscale > 14) {
3693 net_info_ratelimited("%s: Illegal window scaling value %d >14 received\n",
3694 __func__,
3695 snd_wscale);
3696 snd_wscale = 14;
3697 }
3698 opt_rx->snd_wscale = snd_wscale;
3699 }
3700 break;
3701 case TCPOPT_TIMESTAMP:
3702 if ((opsize == TCPOLEN_TIMESTAMP) &&
3703 ((estab && opt_rx->tstamp_ok) ||
3704 (!estab && sysctl_tcp_timestamps))) {
3705 opt_rx->saw_tstamp = 1;
3706 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
3707 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
3708 }
3709 break;
3710 case TCPOPT_SACK_PERM:
3711 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3712 !estab && sysctl_tcp_sack) {
3713 opt_rx->sack_ok = TCP_SACK_SEEN;
3714 tcp_sack_reset(opt_rx);
3715 }
3716 break;
3717
3718 case TCPOPT_SACK:
3719 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
3720 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
3721 opt_rx->sack_ok) {
3722 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
3723 }
3724 break;
3725#ifdef CONFIG_TCP_MD5SIG
3726 case TCPOPT_MD5SIG:
3727
3728
3729
3730
3731 break;
3732#endif
3733 case TCPOPT_FASTOPEN:
3734 tcp_parse_fastopen_option(
3735 opsize - TCPOLEN_FASTOPEN_BASE,
3736 ptr, th->syn, foc, false);
3737 break;
3738
3739 case TCPOPT_EXP:
3740
3741
3742
3743 if (opsize >= TCPOLEN_EXP_FASTOPEN_BASE &&
3744 get_unaligned_be16(ptr) ==
3745 TCPOPT_FASTOPEN_MAGIC)
3746 tcp_parse_fastopen_option(opsize -
3747 TCPOLEN_EXP_FASTOPEN_BASE,
3748 ptr + 2, th->syn, foc, true);
3749 break;
3750
3751 }
3752 ptr += opsize-2;
3753 length -= opsize;
3754 }
3755 }
3756}
3757EXPORT_SYMBOL(tcp_parse_options);
3758
3759static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
3760{
3761 const __be32 *ptr = (const __be32 *)(th + 1);
3762
3763 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
3764 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
3765 tp->rx_opt.saw_tstamp = 1;
3766 ++ptr;
3767 tp->rx_opt.rcv_tsval = ntohl(*ptr);
3768 ++ptr;
3769 if (*ptr)
3770 tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
3771 else
3772 tp->rx_opt.rcv_tsecr = 0;
3773 return true;
3774 }
3775 return false;
3776}
3777
3778
3779
3780
3781static bool tcp_fast_parse_options(const struct sk_buff *skb,
3782 const struct tcphdr *th, struct tcp_sock *tp)
3783{
3784
3785
3786
3787 if (th->doff == (sizeof(*th) / 4)) {
3788 tp->rx_opt.saw_tstamp = 0;
3789 return false;
3790 } else if (tp->rx_opt.tstamp_ok &&
3791 th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
3792 if (tcp_parse_aligned_timestamp(tp, th))
3793 return true;
3794 }
3795
3796 tcp_parse_options(skb, &tp->rx_opt, 1, NULL);
3797 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
3798 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
3799
3800 return true;
3801}
3802
3803#ifdef CONFIG_TCP_MD5SIG
3804
3805
3806
3807const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
3808{
3809 int length = (th->doff << 2) - sizeof(*th);
3810 const u8 *ptr = (const u8 *)(th + 1);
3811
3812
3813 if (length < TCPOLEN_MD5SIG)
3814 return NULL;
3815
3816 while (length > 0) {
3817 int opcode = *ptr++;
3818 int opsize;
3819
3820 switch (opcode) {
3821 case TCPOPT_EOL:
3822 return NULL;
3823 case TCPOPT_NOP:
3824 length--;
3825 continue;
3826 default:
3827 opsize = *ptr++;
3828 if (opsize < 2 || opsize > length)
3829 return NULL;
3830 if (opcode == TCPOPT_MD5SIG)
3831 return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
3832 }
3833 ptr += opsize - 2;
3834 length -= opsize;
3835 }
3836 return NULL;
3837}
3838EXPORT_SYMBOL(tcp_parse_md5sig_option);
3839#endif
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
3865{
3866 const struct tcp_sock *tp = tcp_sk(sk);
3867 const struct tcphdr *th = tcp_hdr(skb);
3868 u32 seq = TCP_SKB_CB(skb)->seq;
3869 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3870
3871 return (
3872 (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
3873
3874
3875 ack == tp->snd_una &&
3876
3877
3878 !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
3879
3880
3881 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
3882}
3883
3884static inline bool tcp_paws_discard(const struct sock *sk,
3885 const struct sk_buff *skb)
3886{
3887 const struct tcp_sock *tp = tcp_sk(sk);
3888
3889 return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
3890 !tcp_disordered_ack(sk, skb);
3891}
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
3907{
3908 return !before(end_seq, tp->rcv_wup) &&
3909 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
3910}
3911
3912
3913void tcp_reset(struct sock *sk)
3914{
3915
3916 switch (sk->sk_state) {
3917 case TCP_SYN_SENT:
3918 sk->sk_err = ECONNREFUSED;
3919 break;
3920 case TCP_CLOSE_WAIT:
3921 sk->sk_err = EPIPE;
3922 break;
3923 case TCP_CLOSE:
3924 return;
3925 default:
3926 sk->sk_err = ECONNRESET;
3927 }
3928
3929 smp_wmb();
3930
3931 if (!sock_flag(sk, SOCK_DEAD))
3932 sk->sk_error_report(sk);
3933
3934 tcp_done(sk);
3935}
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951static void tcp_fin(struct sock *sk)
3952{
3953 struct tcp_sock *tp = tcp_sk(sk);
3954 const struct dst_entry *dst;
3955
3956 inet_csk_schedule_ack(sk);
3957
3958 sk->sk_shutdown |= RCV_SHUTDOWN;
3959 sock_set_flag(sk, SOCK_DONE);
3960
3961 switch (sk->sk_state) {
3962 case TCP_SYN_RECV:
3963 case TCP_ESTABLISHED:
3964
3965 tcp_set_state(sk, TCP_CLOSE_WAIT);
3966 dst = __sk_dst_get(sk);
3967 if (!dst || !dst_metric(dst, RTAX_QUICKACK))
3968 inet_csk(sk)->icsk_ack.pingpong = 1;
3969 break;
3970
3971 case TCP_CLOSE_WAIT:
3972 case TCP_CLOSING:
3973
3974
3975
3976 break;
3977 case TCP_LAST_ACK:
3978
3979 break;
3980
3981 case TCP_FIN_WAIT1:
3982
3983
3984
3985
3986 tcp_send_ack(sk);
3987 tcp_set_state(sk, TCP_CLOSING);
3988 break;
3989 case TCP_FIN_WAIT2:
3990
3991 tcp_send_ack(sk);
3992 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
3993 break;
3994 default:
3995
3996
3997
3998 pr_err("%s: Impossible, sk->sk_state=%d\n",
3999 __func__, sk->sk_state);
4000 break;
4001 }
4002
4003
4004
4005
4006 __skb_queue_purge(&tp->out_of_order_queue);
4007 if (tcp_is_sack(tp))
4008 tcp_sack_reset(&tp->rx_opt);
4009 sk_mem_reclaim(sk);
4010
4011 if (!sock_flag(sk, SOCK_DEAD)) {
4012 sk->sk_state_change(sk);
4013
4014
4015 if (sk->sk_shutdown == SHUTDOWN_MASK ||
4016 sk->sk_state == TCP_CLOSE)
4017 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
4018 else
4019 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
4020 }
4021}
4022
4023static inline bool tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
4024 u32 end_seq)
4025{
4026 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
4027 if (before(seq, sp->start_seq))
4028 sp->start_seq = seq;
4029 if (after(end_seq, sp->end_seq))
4030 sp->end_seq = end_seq;
4031 return true;
4032 }
4033 return false;
4034}
4035
4036static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4037{
4038 struct tcp_sock *tp = tcp_sk(sk);
4039
4040 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
4041 int mib_idx;
4042
4043 if (before(seq, tp->rcv_nxt))
4044 mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
4045 else
4046 mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
4047
4048 NET_INC_STATS_BH(sock_net(sk), mib_idx);
4049
4050 tp->rx_opt.dsack = 1;
4051 tp->duplicate_sack[0].start_seq = seq;
4052 tp->duplicate_sack[0].end_seq = end_seq;
4053 }
4054}
4055
4056static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
4057{
4058 struct tcp_sock *tp = tcp_sk(sk);
4059
4060 if (!tp->rx_opt.dsack)
4061 tcp_dsack_set(sk, seq, end_seq);
4062 else
4063 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
4064}
4065
4066static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
4067{
4068 struct tcp_sock *tp = tcp_sk(sk);
4069
4070 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
4071 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4072 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4073 tcp_enter_quickack_mode(sk);
4074
4075 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
4076 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4077
4078 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
4079 end_seq = tp->rcv_nxt;
4080 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
4081 }
4082 }
4083
4084 tcp_send_ack(sk);
4085}
4086
4087
4088
4089
4090static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
4091{
4092 int this_sack;
4093 struct tcp_sack_block *sp = &tp->selective_acks[0];
4094 struct tcp_sack_block *swalk = sp + 1;
4095
4096
4097
4098
4099 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
4100 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
4101 int i;
4102
4103
4104
4105
4106 tp->rx_opt.num_sacks--;
4107 for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
4108 sp[i] = sp[i + 1];
4109 continue;
4110 }
4111 this_sack++, swalk++;
4112 }
4113}
4114
4115static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
4116{
4117 struct tcp_sock *tp = tcp_sk(sk);
4118 struct tcp_sack_block *sp = &tp->selective_acks[0];
4119 int cur_sacks = tp->rx_opt.num_sacks;
4120 int this_sack;
4121
4122 if (!cur_sacks)
4123 goto new_sack;
4124
4125 for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
4126 if (tcp_sack_extend(sp, seq, end_seq)) {
4127
4128 for (; this_sack > 0; this_sack--, sp--)
4129 swap(*sp, *(sp - 1));
4130 if (cur_sacks > 1)
4131 tcp_sack_maybe_coalesce(tp);
4132 return;
4133 }
4134 }
4135
4136
4137
4138
4139
4140
4141
4142 if (this_sack >= TCP_NUM_SACKS) {
4143 this_sack--;
4144 tp->rx_opt.num_sacks--;
4145 sp--;
4146 }
4147 for (; this_sack > 0; this_sack--, sp--)
4148 *sp = *(sp - 1);
4149
4150new_sack:
4151
4152 sp->start_seq = seq;
4153 sp->end_seq = end_seq;
4154 tp->rx_opt.num_sacks++;
4155}
4156
4157
4158
4159static void tcp_sack_remove(struct tcp_sock *tp)
4160{
4161 struct tcp_sack_block *sp = &tp->selective_acks[0];
4162 int num_sacks = tp->rx_opt.num_sacks;
4163 int this_sack;
4164
4165
4166 if (skb_queue_empty(&tp->out_of_order_queue)) {
4167 tp->rx_opt.num_sacks = 0;
4168 return;
4169 }
4170
4171 for (this_sack = 0; this_sack < num_sacks;) {
4172
4173 if (!before(tp->rcv_nxt, sp->start_seq)) {
4174 int i;
4175
4176
4177 WARN_ON(before(tp->rcv_nxt, sp->end_seq));
4178
4179
4180 for (i = this_sack+1; i < num_sacks; i++)
4181 tp->selective_acks[i-1] = tp->selective_acks[i];
4182 num_sacks--;
4183 continue;
4184 }
4185 this_sack++;
4186 sp++;
4187 }
4188 tp->rx_opt.num_sacks = num_sacks;
4189}
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204static bool tcp_try_coalesce(struct sock *sk,
4205 struct sk_buff *to,
4206 struct sk_buff *from,
4207 bool *fragstolen)
4208{
4209 int delta;
4210
4211 *fragstolen = false;
4212
4213
4214 if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
4215 return false;
4216
4217 if (!skb_try_coalesce(to, from, fragstolen, &delta))
4218 return false;
4219
4220 atomic_add(delta, &sk->sk_rmem_alloc);
4221 sk_mem_charge(sk, delta);
4222 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
4223 TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
4224 TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
4225 TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
4226 return true;
4227}
4228
4229
4230
4231
4232static void tcp_ofo_queue(struct sock *sk)
4233{
4234 struct tcp_sock *tp = tcp_sk(sk);
4235 __u32 dsack_high = tp->rcv_nxt;
4236 struct sk_buff *skb, *tail;
4237 bool fragstolen, eaten;
4238
4239 while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
4240 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
4241 break;
4242
4243 if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
4244 __u32 dsack = dsack_high;
4245 if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
4246 dsack_high = TCP_SKB_CB(skb)->end_seq;
4247 tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
4248 }
4249
4250 __skb_unlink(skb, &tp->out_of_order_queue);
4251 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4252 SOCK_DEBUG(sk, "ofo packet was already received\n");
4253 __kfree_skb(skb);
4254 continue;
4255 }
4256 SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
4257 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4258 TCP_SKB_CB(skb)->end_seq);
4259
4260 tail = skb_peek_tail(&sk->sk_receive_queue);
4261 eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
4262 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
4263 if (!eaten)
4264 __skb_queue_tail(&sk->sk_receive_queue, skb);
4265 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
4266 tcp_fin(sk);
4267 if (eaten)
4268 kfree_skb_partial(skb, fragstolen);
4269 }
4270}
4271
4272static bool tcp_prune_ofo_queue(struct sock *sk);
4273static int tcp_prune_queue(struct sock *sk);
4274
4275static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
4276 unsigned int size)
4277{
4278 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
4279 !sk_rmem_schedule(sk, skb, size)) {
4280
4281 if (tcp_prune_queue(sk) < 0)
4282 return -1;
4283
4284 if (!sk_rmem_schedule(sk, skb, size)) {
4285 if (!tcp_prune_ofo_queue(sk))
4286 return -1;
4287
4288 if (!sk_rmem_schedule(sk, skb, size))
4289 return -1;
4290 }
4291 }
4292 return 0;
4293}
4294
4295static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4296{
4297 struct tcp_sock *tp = tcp_sk(sk);
4298 struct sk_buff *skb1;
4299 u32 seq, end_seq;
4300
4301 tcp_ecn_check_ce(tp, skb);
4302
4303 if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
4304 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP);
4305 __kfree_skb(skb);
4306 return;
4307 }
4308
4309
4310 tp->pred_flags = 0;
4311 inet_csk_schedule_ack(sk);
4312
4313 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
4314 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
4315 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4316
4317 skb1 = skb_peek_tail(&tp->out_of_order_queue);
4318 if (!skb1) {
4319
4320 if (tcp_is_sack(tp)) {
4321 tp->rx_opt.num_sacks = 1;
4322 tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
4323 tp->selective_acks[0].end_seq =
4324 TCP_SKB_CB(skb)->end_seq;
4325 }
4326 __skb_queue_head(&tp->out_of_order_queue, skb);
4327 goto end;
4328 }
4329
4330 seq = TCP_SKB_CB(skb)->seq;
4331 end_seq = TCP_SKB_CB(skb)->end_seq;
4332
4333 if (seq == TCP_SKB_CB(skb1)->end_seq) {
4334 bool fragstolen;
4335
4336 if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
4337 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4338 } else {
4339 tcp_grow_window(sk, skb);
4340 kfree_skb_partial(skb, fragstolen);
4341 skb = NULL;
4342 }
4343
4344 if (!tp->rx_opt.num_sacks ||
4345 tp->selective_acks[0].end_seq != seq)
4346 goto add_sack;
4347
4348
4349 tp->selective_acks[0].end_seq = end_seq;
4350 goto end;
4351 }
4352
4353
4354 while (1) {
4355 if (!after(TCP_SKB_CB(skb1)->seq, seq))
4356 break;
4357 if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
4358 skb1 = NULL;
4359 break;
4360 }
4361 skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
4362 }
4363
4364
4365 if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4366 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4367
4368 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
4369 __kfree_skb(skb);
4370 skb = NULL;
4371 tcp_dsack_set(sk, seq, end_seq);
4372 goto add_sack;
4373 }
4374 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4375
4376 tcp_dsack_set(sk, seq,
4377 TCP_SKB_CB(skb1)->end_seq);
4378 } else {
4379 if (skb_queue_is_first(&tp->out_of_order_queue,
4380 skb1))
4381 skb1 = NULL;
4382 else
4383 skb1 = skb_queue_prev(
4384 &tp->out_of_order_queue,
4385 skb1);
4386 }
4387 }
4388 if (!skb1)
4389 __skb_queue_head(&tp->out_of_order_queue, skb);
4390 else
4391 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4392
4393
4394 while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
4395 skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
4396
4397 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4398 break;
4399 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4400 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4401 end_seq);
4402 break;
4403 }
4404 __skb_unlink(skb1, &tp->out_of_order_queue);
4405 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4406 TCP_SKB_CB(skb1)->end_seq);
4407 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
4408 __kfree_skb(skb1);
4409 }
4410
4411add_sack:
4412 if (tcp_is_sack(tp))
4413 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4414end:
4415 if (skb) {
4416 tcp_grow_window(sk, skb);
4417 skb_set_owner_r(skb, sk);
4418 }
4419}
4420
4421static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
4422 bool *fragstolen)
4423{
4424 int eaten;
4425 struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
4426
4427 __skb_pull(skb, hdrlen);
4428 eaten = (tail &&
4429 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
4430 tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
4431 if (!eaten) {
4432 __skb_queue_tail(&sk->sk_receive_queue, skb);
4433 skb_set_owner_r(skb, sk);
4434 }
4435 return eaten;
4436}
4437
4438int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
4439{
4440 struct sk_buff *skb;
4441 bool fragstolen;
4442
4443 if (size == 0)
4444 return 0;
4445
4446 skb = alloc_skb(size, sk->sk_allocation);
4447 if (!skb)
4448 goto err;
4449
4450 if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
4451 goto err_free;
4452
4453 if (memcpy_from_msg(skb_put(skb, size), msg, size))
4454 goto err_free;
4455
4456 TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
4457 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
4458 TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
4459
4460 if (tcp_queue_rcv(sk, skb, 0, &fragstolen)) {
4461 WARN_ON_ONCE(fragstolen);
4462 __kfree_skb(skb);
4463 }
4464 return size;
4465
4466err_free:
4467 kfree_skb(skb);
4468err:
4469 return -ENOMEM;
4470}
4471
4472static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4473{
4474 struct tcp_sock *tp = tcp_sk(sk);
4475 int eaten = -1;
4476 bool fragstolen = false;
4477
4478 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
4479 goto drop;
4480
4481 skb_dst_drop(skb);
4482 __skb_pull(skb, tcp_hdr(skb)->doff * 4);
4483
4484 tcp_ecn_accept_cwr(tp, skb);
4485
4486 tp->rx_opt.dsack = 0;
4487
4488
4489
4490
4491
4492 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
4493 if (tcp_receive_window(tp) == 0)
4494 goto out_of_window;
4495
4496
4497 if (tp->ucopy.task == current &&
4498 tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
4499 sock_owned_by_user(sk) && !tp->urg_data) {
4500 int chunk = min_t(unsigned int, skb->len,
4501 tp->ucopy.len);
4502
4503 __set_current_state(TASK_RUNNING);
4504
4505 local_bh_enable();
4506 if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) {
4507 tp->ucopy.len -= chunk;
4508 tp->copied_seq += chunk;
4509 eaten = (chunk == skb->len);
4510 tcp_rcv_space_adjust(sk);
4511 }
4512 local_bh_disable();
4513 }
4514
4515 if (eaten <= 0) {
4516queue_and_out:
4517 if (eaten < 0 &&
4518 tcp_try_rmem_schedule(sk, skb, skb->truesize))
4519 goto drop;
4520
4521 eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
4522 }
4523 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
4524 if (skb->len)
4525 tcp_event_data_recv(sk, skb);
4526 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
4527 tcp_fin(sk);
4528
4529 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4530 tcp_ofo_queue(sk);
4531
4532
4533
4534
4535 if (skb_queue_empty(&tp->out_of_order_queue))
4536 inet_csk(sk)->icsk_ack.pingpong = 0;
4537 }
4538
4539 if (tp->rx_opt.num_sacks)
4540 tcp_sack_remove(tp);
4541
4542 tcp_fast_path_check(sk);
4543
4544 if (eaten > 0)
4545 kfree_skb_partial(skb, fragstolen);
4546 if (!sock_flag(sk, SOCK_DEAD))
4547 sk->sk_data_ready(sk);
4548 return;
4549 }
4550
4551 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4552
4553 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4554 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4555
4556out_of_window:
4557 tcp_enter_quickack_mode(sk);
4558 inet_csk_schedule_ack(sk);
4559drop:
4560 __kfree_skb(skb);
4561 return;
4562 }
4563
4564
4565 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
4566 goto out_of_window;
4567
4568 tcp_enter_quickack_mode(sk);
4569
4570 if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4571
4572 SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
4573 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4574 TCP_SKB_CB(skb)->end_seq);
4575
4576 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
4577
4578
4579
4580
4581 if (!tcp_receive_window(tp))
4582 goto out_of_window;
4583 goto queue_and_out;
4584 }
4585
4586 tcp_data_queue_ofo(sk, skb);
4587}
4588
4589static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
4590 struct sk_buff_head *list)
4591{
4592 struct sk_buff *next = NULL;
4593
4594 if (!skb_queue_is_last(list, skb))
4595 next = skb_queue_next(list, skb);
4596
4597 __skb_unlink(skb, list);
4598 __kfree_skb(skb);
4599 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
4600
4601 return next;
4602}
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612static void
4613tcp_collapse(struct sock *sk, struct sk_buff_head *list,
4614 struct sk_buff *head, struct sk_buff *tail,
4615 u32 start, u32 end)
4616{
4617 struct sk_buff *skb, *n;
4618 bool end_of_skbs;
4619
4620
4621
4622 skb = head;
4623restart:
4624 end_of_skbs = true;
4625 skb_queue_walk_from_safe(list, skb, n) {
4626 if (skb == tail)
4627 break;
4628
4629 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4630 skb = tcp_collapse_one(sk, skb, list);
4631 if (!skb)
4632 break;
4633 goto restart;
4634 }
4635
4636
4637
4638
4639
4640
4641 if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
4642 (tcp_win_from_space(skb->truesize) > skb->len ||
4643 before(TCP_SKB_CB(skb)->seq, start))) {
4644 end_of_skbs = false;
4645 break;
4646 }
4647
4648 if (!skb_queue_is_last(list, skb)) {
4649 struct sk_buff *next = skb_queue_next(list, skb);
4650 if (next != tail &&
4651 TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) {
4652 end_of_skbs = false;
4653 break;
4654 }
4655 }
4656
4657
4658 start = TCP_SKB_CB(skb)->end_seq;
4659 }
4660 if (end_of_skbs ||
4661 (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
4662 return;
4663
4664 while (before(start, end)) {
4665 int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start);
4666 struct sk_buff *nskb;
4667
4668 nskb = alloc_skb(copy, GFP_ATOMIC);
4669 if (!nskb)
4670 return;
4671
4672 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
4673 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
4674 __skb_queue_before(list, skb, nskb);
4675 skb_set_owner_r(nskb, sk);
4676
4677
4678 while (copy > 0) {
4679 int offset = start - TCP_SKB_CB(skb)->seq;
4680 int size = TCP_SKB_CB(skb)->end_seq - start;
4681
4682 BUG_ON(offset < 0);
4683 if (size > 0) {
4684 size = min(copy, size);
4685 if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
4686 BUG();
4687 TCP_SKB_CB(nskb)->end_seq += size;
4688 copy -= size;
4689 start += size;
4690 }
4691 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
4692 skb = tcp_collapse_one(sk, skb, list);
4693 if (!skb ||
4694 skb == tail ||
4695 (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
4696 return;
4697 }
4698 }
4699 }
4700}
4701
4702
4703
4704
4705static void tcp_collapse_ofo_queue(struct sock *sk)
4706{
4707 struct tcp_sock *tp = tcp_sk(sk);
4708 struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
4709 struct sk_buff *head;
4710 u32 start, end;
4711
4712 if (!skb)
4713 return;
4714
4715 start = TCP_SKB_CB(skb)->seq;
4716 end = TCP_SKB_CB(skb)->end_seq;
4717 head = skb;
4718
4719 for (;;) {
4720 struct sk_buff *next = NULL;
4721
4722 if (!skb_queue_is_last(&tp->out_of_order_queue, skb))
4723 next = skb_queue_next(&tp->out_of_order_queue, skb);
4724 skb = next;
4725
4726
4727
4728 if (!skb ||
4729 after(TCP_SKB_CB(skb)->seq, end) ||
4730 before(TCP_SKB_CB(skb)->end_seq, start)) {
4731 tcp_collapse(sk, &tp->out_of_order_queue,
4732 head, skb, start, end);
4733 head = skb;
4734 if (!skb)
4735 break;
4736
4737 start = TCP_SKB_CB(skb)->seq;
4738 end = TCP_SKB_CB(skb)->end_seq;
4739 } else {
4740 if (before(TCP_SKB_CB(skb)->seq, start))
4741 start = TCP_SKB_CB(skb)->seq;
4742 if (after(TCP_SKB_CB(skb)->end_seq, end))
4743 end = TCP_SKB_CB(skb)->end_seq;
4744 }
4745 }
4746}
4747
4748
4749
4750
4751
4752static bool tcp_prune_ofo_queue(struct sock *sk)
4753{
4754 struct tcp_sock *tp = tcp_sk(sk);
4755 bool res = false;
4756
4757 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4758 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED);
4759 __skb_queue_purge(&tp->out_of_order_queue);
4760
4761
4762
4763
4764
4765
4766 if (tp->rx_opt.sack_ok)
4767 tcp_sack_reset(&tp->rx_opt);
4768 sk_mem_reclaim(sk);
4769 res = true;
4770 }
4771 return res;
4772}
4773
4774
4775
4776
4777
4778
4779
4780
4781static int tcp_prune_queue(struct sock *sk)
4782{
4783 struct tcp_sock *tp = tcp_sk(sk);
4784
4785 SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
4786
4787 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PRUNECALLED);
4788
4789 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
4790 tcp_clamp_window(sk);
4791 else if (sk_under_memory_pressure(sk))
4792 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
4793
4794 tcp_collapse_ofo_queue(sk);
4795 if (!skb_queue_empty(&sk->sk_receive_queue))
4796 tcp_collapse(sk, &sk->sk_receive_queue,
4797 skb_peek(&sk->sk_receive_queue),
4798 NULL,
4799 tp->copied_seq, tp->rcv_nxt);
4800 sk_mem_reclaim(sk);
4801
4802 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4803 return 0;
4804
4805
4806
4807
4808 tcp_prune_ofo_queue(sk);
4809
4810 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4811 return 0;
4812
4813
4814
4815
4816
4817 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_RCVPRUNED);
4818
4819
4820 tp->pred_flags = 0;
4821 return -1;
4822}
4823
4824static bool tcp_should_expand_sndbuf(const struct sock *sk)
4825{
4826 const struct tcp_sock *tp = tcp_sk(sk);
4827
4828
4829
4830
4831 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
4832 return false;
4833
4834
4835 if (sk_under_memory_pressure(sk))
4836 return false;
4837
4838
4839 if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
4840 return false;
4841
4842
4843 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
4844 return false;
4845
4846 return true;
4847}
4848
4849
4850
4851
4852
4853
4854
4855static void tcp_new_space(struct sock *sk)
4856{
4857 struct tcp_sock *tp = tcp_sk(sk);
4858
4859 if (tcp_should_expand_sndbuf(sk)) {
4860 tcp_sndbuf_expand(sk);
4861 tp->snd_cwnd_stamp = tcp_time_stamp;
4862 }
4863
4864 sk->sk_write_space(sk);
4865}
4866
4867static void tcp_check_space(struct sock *sk)
4868{
4869 if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
4870 sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
4871
4872 smp_mb__after_atomic();
4873 if (sk->sk_socket &&
4874 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
4875 tcp_new_space(sk);
4876 }
4877}
4878
4879static inline void tcp_data_snd_check(struct sock *sk)
4880{
4881 tcp_push_pending_frames(sk);
4882 tcp_check_space(sk);
4883}
4884
4885
4886
4887
4888static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
4889{
4890 struct tcp_sock *tp = tcp_sk(sk);
4891
4892
4893 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
4894
4895
4896
4897 __tcp_select_window(sk) >= tp->rcv_wnd) ||
4898
4899 tcp_in_quickack_mode(sk) ||
4900
4901 (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
4902
4903 tcp_send_ack(sk);
4904 } else {
4905
4906 tcp_send_delayed_ack(sk);
4907 }
4908}
4909
4910static inline void tcp_ack_snd_check(struct sock *sk)
4911{
4912 if (!inet_csk_ack_scheduled(sk)) {
4913
4914 return;
4915 }
4916 __tcp_ack_snd_check(sk, 1);
4917}
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
4930{
4931 struct tcp_sock *tp = tcp_sk(sk);
4932 u32 ptr = ntohs(th->urg_ptr);
4933
4934 if (ptr && !sysctl_tcp_stdurg)
4935 ptr--;
4936 ptr += ntohl(th->seq);
4937
4938
4939 if (after(tp->copied_seq, ptr))
4940 return;
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952 if (before(ptr, tp->rcv_nxt))
4953 return;
4954
4955
4956 if (tp->urg_data && !after(ptr, tp->urg_seq))
4957 return;
4958
4959
4960 sk_send_sigurg(sk);
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
4978 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
4979 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
4980 tp->copied_seq++;
4981 if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
4982 __skb_unlink(skb, &sk->sk_receive_queue);
4983 __kfree_skb(skb);
4984 }
4985 }
4986
4987 tp->urg_data = TCP_URG_NOTYET;
4988 tp->urg_seq = ptr;
4989
4990
4991 tp->pred_flags = 0;
4992}
4993
4994
4995static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
4996{
4997 struct tcp_sock *tp = tcp_sk(sk);
4998
4999
5000 if (th->urg)
5001 tcp_check_urg(sk, th);
5002
5003
5004 if (tp->urg_data == TCP_URG_NOTYET) {
5005 u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
5006 th->syn;
5007
5008
5009 if (ptr < skb->len) {
5010 u8 tmp;
5011 if (skb_copy_bits(skb, ptr, &tmp, 1))
5012 BUG();
5013 tp->urg_data = TCP_URG_VALID | tmp;
5014 if (!sock_flag(sk, SOCK_DEAD))
5015 sk->sk_data_ready(sk);
5016 }
5017 }
5018}
5019
5020static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
5021{
5022 struct tcp_sock *tp = tcp_sk(sk);
5023 int chunk = skb->len - hlen;
5024 int err;
5025
5026 local_bh_enable();
5027 if (skb_csum_unnecessary(skb))
5028 err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk);
5029 else
5030 err = skb_copy_and_csum_datagram_msg(skb, hlen, tp->ucopy.msg);
5031
5032 if (!err) {
5033 tp->ucopy.len -= chunk;
5034 tp->copied_seq += chunk;
5035 tcp_rcv_space_adjust(sk);
5036 }
5037
5038 local_bh_disable();
5039 return err;
5040}
5041
5042static __sum16 __tcp_checksum_complete_user(struct sock *sk,
5043 struct sk_buff *skb)
5044{
5045 __sum16 result;
5046
5047 if (sock_owned_by_user(sk)) {
5048 local_bh_enable();
5049 result = __tcp_checksum_complete(skb);
5050 local_bh_disable();
5051 } else {
5052 result = __tcp_checksum_complete(skb);
5053 }
5054 return result;
5055}
5056
5057static inline bool tcp_checksum_complete_user(struct sock *sk,
5058 struct sk_buff *skb)
5059{
5060 return !skb_csum_unnecessary(skb) &&
5061 __tcp_checksum_complete_user(sk, skb);
5062}
5063
5064
5065
5066
5067static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5068 const struct tcphdr *th, int syn_inerr)
5069{
5070 struct tcp_sock *tp = tcp_sk(sk);
5071
5072
5073 if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
5074 tcp_paws_discard(sk, skb)) {
5075 if (!th->rst) {
5076 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
5077 if (!tcp_oow_rate_limited(sock_net(sk), skb,
5078 LINUX_MIB_TCPACKSKIPPEDPAWS,
5079 &tp->last_oow_ack_time))
5080 tcp_send_dupack(sk, skb);
5081 goto discard;
5082 }
5083
5084 }
5085
5086
5087 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
5088
5089
5090
5091
5092
5093
5094 if (!th->rst) {
5095 if (th->syn)
5096 goto syn_challenge;
5097 if (!tcp_oow_rate_limited(sock_net(sk), skb,
5098 LINUX_MIB_TCPACKSKIPPEDSEQ,
5099 &tp->last_oow_ack_time))
5100 tcp_send_dupack(sk, skb);
5101 }
5102 goto discard;
5103 }
5104
5105
5106 if (th->rst) {
5107
5108
5109
5110
5111
5112
5113 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt)
5114 tcp_reset(sk);
5115 else
5116 tcp_send_challenge_ack(sk, skb);
5117 goto discard;
5118 }
5119
5120
5121
5122
5123
5124
5125 if (th->syn) {
5126syn_challenge:
5127 if (syn_inerr)
5128 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5129 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
5130 tcp_send_challenge_ack(sk, skb);
5131 goto discard;
5132 }
5133
5134 return true;
5135
5136discard:
5137 __kfree_skb(skb);
5138 return false;
5139}
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5165 const struct tcphdr *th, unsigned int len)
5166{
5167 struct tcp_sock *tp = tcp_sk(sk);
5168
5169 if (unlikely(!sk->sk_rx_dst))
5170 inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186 tp->rx_opt.saw_tstamp = 0;
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
5198 TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
5199 !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
5200 int tcp_header_len = tp->tcp_header_len;
5201
5202
5203
5204
5205
5206
5207
5208 if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
5209
5210 if (!tcp_parse_aligned_timestamp(tp, th))
5211 goto slow_path;
5212
5213
5214 if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
5215 goto slow_path;
5216
5217
5218
5219
5220
5221
5222 }
5223
5224 if (len <= tcp_header_len) {
5225
5226 if (len == tcp_header_len) {
5227
5228
5229
5230
5231 if (tcp_header_len ==
5232 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5233 tp->rcv_nxt == tp->rcv_wup)
5234 tcp_store_ts_recent(tp);
5235
5236
5237
5238
5239 tcp_ack(sk, skb, 0);
5240 __kfree_skb(skb);
5241 tcp_data_snd_check(sk);
5242 return;
5243 } else {
5244 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5245 goto discard;
5246 }
5247 } else {
5248 int eaten = 0;
5249 bool fragstolen = false;
5250
5251 if (tp->ucopy.task == current &&
5252 tp->copied_seq == tp->rcv_nxt &&
5253 len - tcp_header_len <= tp->ucopy.len &&
5254 sock_owned_by_user(sk)) {
5255 __set_current_state(TASK_RUNNING);
5256
5257 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) {
5258
5259
5260
5261
5262 if (tcp_header_len ==
5263 (sizeof(struct tcphdr) +
5264 TCPOLEN_TSTAMP_ALIGNED) &&
5265 tp->rcv_nxt == tp->rcv_wup)
5266 tcp_store_ts_recent(tp);
5267
5268 tcp_rcv_rtt_measure_ts(sk, skb);
5269
5270 __skb_pull(skb, tcp_header_len);
5271 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
5272 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
5273 eaten = 1;
5274 }
5275 }
5276 if (!eaten) {
5277 if (tcp_checksum_complete_user(sk, skb))
5278 goto csum_error;
5279
5280 if ((int)skb->truesize > sk->sk_forward_alloc)
5281 goto step5;
5282
5283
5284
5285
5286
5287 if (tcp_header_len ==
5288 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
5289 tp->rcv_nxt == tp->rcv_wup)
5290 tcp_store_ts_recent(tp);
5291
5292 tcp_rcv_rtt_measure_ts(sk, skb);
5293
5294 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
5295
5296
5297 eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
5298 &fragstolen);
5299 }
5300
5301 tcp_event_data_recv(sk, skb);
5302
5303 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
5304
5305 tcp_ack(sk, skb, FLAG_DATA);
5306 tcp_data_snd_check(sk);
5307 if (!inet_csk_ack_scheduled(sk))
5308 goto no_ack;
5309 }
5310
5311 __tcp_ack_snd_check(sk, 0);
5312no_ack:
5313 if (eaten)
5314 kfree_skb_partial(skb, fragstolen);
5315 sk->sk_data_ready(sk);
5316 return;
5317 }
5318 }
5319
5320slow_path:
5321 if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
5322 goto csum_error;
5323
5324 if (!th->ack && !th->rst && !th->syn)
5325 goto discard;
5326
5327
5328
5329
5330
5331 if (!tcp_validate_incoming(sk, skb, th, 1))
5332 return;
5333
5334step5:
5335 if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
5336 goto discard;
5337
5338 tcp_rcv_rtt_measure_ts(sk, skb);
5339
5340
5341 tcp_urg(sk, skb, th);
5342
5343
5344 tcp_data_queue(sk, skb);
5345
5346 tcp_data_snd_check(sk);
5347 tcp_ack_snd_check(sk);
5348 return;
5349
5350csum_error:
5351 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
5352 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5353
5354discard:
5355 __kfree_skb(skb);
5356}
5357EXPORT_SYMBOL(tcp_rcv_established);
5358
5359void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
5360{
5361 struct tcp_sock *tp = tcp_sk(sk);
5362 struct inet_connection_sock *icsk = inet_csk(sk);
5363
5364 tcp_set_state(sk, TCP_ESTABLISHED);
5365
5366 if (skb) {
5367 icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
5368 security_inet_conn_established(sk, skb);
5369 }
5370
5371
5372 icsk->icsk_af_ops->rebuild_header(sk);
5373
5374 tcp_init_metrics(sk);
5375
5376 tcp_init_congestion_control(sk);
5377
5378
5379
5380
5381 tp->lsndtime = tcp_time_stamp;
5382
5383 tcp_init_buffer_space(sk);
5384
5385 if (sock_flag(sk, SOCK_KEEPOPEN))
5386 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
5387
5388 if (!tp->rx_opt.snd_wscale)
5389 __tcp_fast_path_on(tp, tp->snd_wnd);
5390 else
5391 tp->pred_flags = 0;
5392
5393 if (!sock_flag(sk, SOCK_DEAD)) {
5394 sk->sk_state_change(sk);
5395 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
5396 }
5397}
5398
5399static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5400 struct tcp_fastopen_cookie *cookie)
5401{
5402 struct tcp_sock *tp = tcp_sk(sk);
5403 struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL;
5404 u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
5405 bool syn_drop = false;
5406
5407 if (mss == tp->rx_opt.user_mss) {
5408 struct tcp_options_received opt;
5409
5410
5411 tcp_clear_options(&opt);
5412 opt.user_mss = opt.mss_clamp = 0;
5413 tcp_parse_options(synack, &opt, 0, NULL);
5414 mss = opt.mss_clamp;
5415 }
5416
5417 if (!tp->syn_fastopen) {
5418
5419 cookie->len = -1;
5420 } else if (tp->total_retrans) {
5421
5422
5423
5424
5425
5426 syn_drop = (cookie->len < 0 && data);
5427 } else if (cookie->len < 0 && !tp->syn_data) {
5428
5429
5430
5431
5432 try_exp = tp->syn_fastopen_exp ? 2 : 1;
5433 }
5434
5435 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
5436
5437 if (data) {
5438 tcp_for_write_queue_from(data, sk) {
5439 if (data == tcp_send_head(sk) ||
5440 __tcp_retransmit_skb(sk, data))
5441 break;
5442 }
5443 tcp_rearm_rto(sk);
5444 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL);
5445 return true;
5446 }
5447 tp->syn_data_acked = tp->syn_data;
5448 if (tp->syn_data_acked)
5449 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
5450 return false;
5451}
5452
5453static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5454 const struct tcphdr *th, unsigned int len)
5455{
5456 struct inet_connection_sock *icsk = inet_csk(sk);
5457 struct tcp_sock *tp = tcp_sk(sk);
5458 struct tcp_fastopen_cookie foc = { .len = -1 };
5459 int saved_clamp = tp->rx_opt.mss_clamp;
5460
5461 tcp_parse_options(skb, &tp->rx_opt, 0, &foc);
5462 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
5463 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
5464
5465 if (th->ack) {
5466
5467
5468
5469
5470
5471
5472
5473
5474 if (!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_una) ||
5475 after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt))
5476 goto reset_and_undo;
5477
5478 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
5479 !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
5480 tcp_time_stamp)) {
5481 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED);
5482 goto reset_and_undo;
5483 }
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493 if (th->rst) {
5494 tcp_reset(sk);
5495 goto discard;
5496 }
5497
5498
5499
5500
5501
5502
5503
5504
5505 if (!th->syn)
5506 goto discard_and_undo;
5507
5508
5509
5510
5511
5512
5513
5514
5515 tcp_ecn_rcv_synack(tp, th);
5516
5517 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5518 tcp_ack(sk, skb, FLAG_SLOWPATH);
5519
5520
5521
5522
5523 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5524 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5525
5526
5527
5528
5529 tp->snd_wnd = ntohs(th->window);
5530
5531 if (!tp->rx_opt.wscale_ok) {
5532 tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
5533 tp->window_clamp = min(tp->window_clamp, 65535U);
5534 }
5535
5536 if (tp->rx_opt.saw_tstamp) {
5537 tp->rx_opt.tstamp_ok = 1;
5538 tp->tcp_header_len =
5539 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5540 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5541 tcp_store_ts_recent(tp);
5542 } else {
5543 tp->tcp_header_len = sizeof(struct tcphdr);
5544 }
5545
5546 if (tcp_is_sack(tp) && sysctl_tcp_fack)
5547 tcp_enable_fack(tp);
5548
5549 tcp_mtup_init(sk);
5550 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5551 tcp_initialize_rcv_mss(sk);
5552
5553
5554
5555
5556 tp->copied_seq = tp->rcv_nxt;
5557
5558 smp_mb();
5559
5560 tcp_finish_connect(sk, skb);
5561
5562 if ((tp->syn_fastopen || tp->syn_data) &&
5563 tcp_rcv_fastopen_synack(sk, skb, &foc))
5564 return -1;
5565
5566 if (sk->sk_write_pending ||
5567 icsk->icsk_accept_queue.rskq_defer_accept ||
5568 icsk->icsk_ack.pingpong) {
5569
5570
5571
5572
5573
5574
5575
5576 inet_csk_schedule_ack(sk);
5577 icsk->icsk_ack.lrcvtime = tcp_time_stamp;
5578 tcp_enter_quickack_mode(sk);
5579 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
5580 TCP_DELACK_MAX, TCP_RTO_MAX);
5581
5582discard:
5583 __kfree_skb(skb);
5584 return 0;
5585 } else {
5586 tcp_send_ack(sk);
5587 }
5588 return -1;
5589 }
5590
5591
5592
5593 if (th->rst) {
5594
5595
5596
5597
5598
5599
5600 goto discard_and_undo;
5601 }
5602
5603
5604 if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
5605 tcp_paws_reject(&tp->rx_opt, 0))
5606 goto discard_and_undo;
5607
5608 if (th->syn) {
5609
5610
5611
5612
5613 tcp_set_state(sk, TCP_SYN_RECV);
5614
5615 if (tp->rx_opt.saw_tstamp) {
5616 tp->rx_opt.tstamp_ok = 1;
5617 tcp_store_ts_recent(tp);
5618 tp->tcp_header_len =
5619 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5620 } else {
5621 tp->tcp_header_len = sizeof(struct tcphdr);
5622 }
5623
5624 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5625 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5626
5627
5628
5629
5630 tp->snd_wnd = ntohs(th->window);
5631 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
5632 tp->max_window = tp->snd_wnd;
5633
5634 tcp_ecn_rcv_syn(tp, th);
5635
5636 tcp_mtup_init(sk);
5637 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5638 tcp_initialize_rcv_mss(sk);
5639
5640 tcp_send_synack(sk);
5641#if 0
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653 return -1;
5654#else
5655 goto discard;
5656#endif
5657 }
5658
5659
5660
5661
5662discard_and_undo:
5663 tcp_clear_options(&tp->rx_opt);
5664 tp->rx_opt.mss_clamp = saved_clamp;
5665 goto discard;
5666
5667reset_and_undo:
5668 tcp_clear_options(&tp->rx_opt);
5669 tp->rx_opt.mss_clamp = saved_clamp;
5670 return 1;
5671}
5672
5673
5674
5675
5676
5677
5678
5679
5680int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5681 const struct tcphdr *th, unsigned int len)
5682{
5683 struct tcp_sock *tp = tcp_sk(sk);
5684 struct inet_connection_sock *icsk = inet_csk(sk);
5685 struct request_sock *req;
5686 int queued = 0;
5687 bool acceptable;
5688 u32 synack_stamp;
5689
5690 tp->rx_opt.saw_tstamp = 0;
5691
5692 switch (sk->sk_state) {
5693 case TCP_CLOSE:
5694 goto discard;
5695
5696 case TCP_LISTEN:
5697 if (th->ack)
5698 return 1;
5699
5700 if (th->rst)
5701 goto discard;
5702
5703 if (th->syn) {
5704 if (th->fin)
5705 goto discard;
5706 if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
5707 return 1;
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726 kfree_skb(skb);
5727 return 0;
5728 }
5729 goto discard;
5730
5731 case TCP_SYN_SENT:
5732 queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
5733 if (queued >= 0)
5734 return queued;
5735
5736
5737 tcp_urg(sk, skb, th);
5738 __kfree_skb(skb);
5739 tcp_data_snd_check(sk);
5740 return 0;
5741 }
5742
5743 req = tp->fastopen_rsk;
5744 if (req) {
5745 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
5746 sk->sk_state != TCP_FIN_WAIT1);
5747
5748 if (!tcp_check_req(sk, skb, req, true))
5749 goto discard;
5750 }
5751
5752 if (!th->ack && !th->rst && !th->syn)
5753 goto discard;
5754
5755 if (!tcp_validate_incoming(sk, skb, th, 0))
5756 return 0;
5757
5758
5759 acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
5760 FLAG_UPDATE_TS_RECENT) > 0;
5761
5762 switch (sk->sk_state) {
5763 case TCP_SYN_RECV:
5764 if (!acceptable)
5765 return 1;
5766
5767
5768
5769
5770 if (req) {
5771 synack_stamp = tcp_rsk(req)->snt_synack;
5772 tp->total_retrans = req->num_retrans;
5773 reqsk_fastopen_remove(sk, req, false);
5774 } else {
5775 synack_stamp = tp->lsndtime;
5776
5777 icsk->icsk_af_ops->rebuild_header(sk);
5778 tcp_init_congestion_control(sk);
5779
5780 tcp_mtup_init(sk);
5781 tp->copied_seq = tp->rcv_nxt;
5782 tcp_init_buffer_space(sk);
5783 }
5784 smp_mb();
5785 tcp_set_state(sk, TCP_ESTABLISHED);
5786 sk->sk_state_change(sk);
5787
5788
5789
5790
5791
5792 if (sk->sk_socket)
5793 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
5794
5795 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
5796 tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
5797 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5798 tcp_synack_rtt_meas(sk, synack_stamp);
5799
5800 if (tp->rx_opt.tstamp_ok)
5801 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5802
5803 if (req) {
5804
5805
5806
5807
5808
5809
5810
5811
5812 tcp_rearm_rto(sk);
5813 } else
5814 tcp_init_metrics(sk);
5815
5816 tcp_update_pacing_rate(sk);
5817
5818
5819 tp->lsndtime = tcp_time_stamp;
5820
5821 tcp_initialize_rcv_mss(sk);
5822 tcp_fast_path_on(tp);
5823 break;
5824
5825 case TCP_FIN_WAIT1: {
5826 struct dst_entry *dst;
5827 int tmo;
5828
5829
5830
5831
5832
5833
5834 if (req) {
5835
5836
5837
5838
5839
5840
5841 if (!acceptable)
5842 return 1;
5843
5844 reqsk_fastopen_remove(sk, req, false);
5845 tcp_rearm_rto(sk);
5846 }
5847 if (tp->snd_una != tp->write_seq)
5848 break;
5849
5850 tcp_set_state(sk, TCP_FIN_WAIT2);
5851 sk->sk_shutdown |= SEND_SHUTDOWN;
5852
5853 dst = __sk_dst_get(sk);
5854 if (dst)
5855 dst_confirm(dst);
5856
5857 if (!sock_flag(sk, SOCK_DEAD)) {
5858
5859 sk->sk_state_change(sk);
5860 break;
5861 }
5862
5863 if (tp->linger2 < 0 ||
5864 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5865 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
5866 tcp_done(sk);
5867 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5868 return 1;
5869 }
5870
5871 tmo = tcp_fin_time(sk);
5872 if (tmo > TCP_TIMEWAIT_LEN) {
5873 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
5874 } else if (th->fin || sock_owned_by_user(sk)) {
5875
5876
5877
5878
5879
5880
5881 inet_csk_reset_keepalive_timer(sk, tmo);
5882 } else {
5883 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
5884 goto discard;
5885 }
5886 break;
5887 }
5888
5889 case TCP_CLOSING:
5890 if (tp->snd_una == tp->write_seq) {
5891 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
5892 goto discard;
5893 }
5894 break;
5895
5896 case TCP_LAST_ACK:
5897 if (tp->snd_una == tp->write_seq) {
5898 tcp_update_metrics(sk);
5899 tcp_done(sk);
5900 goto discard;
5901 }
5902 break;
5903 }
5904
5905
5906 tcp_urg(sk, skb, th);
5907
5908
5909 switch (sk->sk_state) {
5910 case TCP_CLOSE_WAIT:
5911 case TCP_CLOSING:
5912 case TCP_LAST_ACK:
5913 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
5914 break;
5915 case TCP_FIN_WAIT1:
5916 case TCP_FIN_WAIT2:
5917
5918
5919
5920
5921 if (sk->sk_shutdown & RCV_SHUTDOWN) {
5922 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5923 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
5924 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5925 tcp_reset(sk);
5926 return 1;
5927 }
5928 }
5929
5930 case TCP_ESTABLISHED:
5931 tcp_data_queue(sk, skb);
5932 queued = 1;
5933 break;
5934 }
5935
5936
5937 if (sk->sk_state != TCP_CLOSE) {
5938 tcp_data_snd_check(sk);
5939 tcp_ack_snd_check(sk);
5940 }
5941
5942 if (!queued) {
5943discard:
5944 __kfree_skb(skb);
5945 }
5946 return 0;
5947}
5948EXPORT_SYMBOL(tcp_rcv_state_process);
5949
5950static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
5951{
5952 struct inet_request_sock *ireq = inet_rsk(req);
5953
5954 if (family == AF_INET)
5955 net_dbg_ratelimited("drop open request from %pI4/%u\n",
5956 &ireq->ir_rmt_addr, port);
5957#if IS_ENABLED(CONFIG_IPV6)
5958 else if (family == AF_INET6)
5959 net_dbg_ratelimited("drop open request from %pI6/%u\n",
5960 &ireq->ir_v6_rmt_addr, port);
5961#endif
5962}
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976static void tcp_ecn_create_request(struct request_sock *req,
5977 const struct sk_buff *skb,
5978 const struct sock *listen_sk,
5979 const struct dst_entry *dst)
5980{
5981 const struct tcphdr *th = tcp_hdr(skb);
5982 const struct net *net = sock_net(listen_sk);
5983 bool th_ecn = th->ece && th->cwr;
5984 bool ect, ecn_ok;
5985
5986 if (!th_ecn)
5987 return;
5988
5989 ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
5990 ecn_ok = net->ipv4.sysctl_tcp_ecn || dst_feature(dst, RTAX_FEATURE_ECN);
5991
5992 if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk))
5993 inet_rsk(req)->ecn_ok = 1;
5994}
5995
5996static void tcp_openreq_init(struct request_sock *req,
5997 const struct tcp_options_received *rx_opt,
5998 struct sk_buff *skb, const struct sock *sk)
5999{
6000 struct inet_request_sock *ireq = inet_rsk(req);
6001
6002 req->rcv_wnd = 0;
6003 req->cookie_ts = 0;
6004 tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
6005 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
6006 tcp_rsk(req)->snt_synack = tcp_time_stamp;
6007 tcp_rsk(req)->last_oow_ack_time = 0;
6008 req->mss = rx_opt->mss_clamp;
6009 req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
6010 ireq->tstamp_ok = rx_opt->tstamp_ok;
6011 ireq->sack_ok = rx_opt->sack_ok;
6012 ireq->snd_wscale = rx_opt->snd_wscale;
6013 ireq->wscale_ok = rx_opt->wscale_ok;
6014 ireq->acked = 0;
6015 ireq->ecn_ok = 0;
6016 ireq->ir_rmt_port = tcp_hdr(skb)->source;
6017 ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
6018 ireq->ir_mark = inet_request_mark(sk, skb);
6019}
6020
6021struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
6022 struct sock *sk_listener)
6023{
6024 struct request_sock *req = reqsk_alloc(ops, sk_listener);
6025
6026 if (req) {
6027 struct inet_request_sock *ireq = inet_rsk(req);
6028
6029 kmemcheck_annotate_bitfield(ireq, flags);
6030 ireq->opt = NULL;
6031 atomic64_set(&ireq->ir_cookie, 0);
6032 ireq->ireq_state = TCP_NEW_SYN_RECV;
6033 write_pnet(&ireq->ireq_net, sock_net(sk_listener));
6034 ireq->ireq_family = sk_listener->sk_family;
6035 }
6036
6037 return req;
6038}
6039EXPORT_SYMBOL(inet_reqsk_alloc);
6040
6041
6042
6043
6044static bool tcp_syn_flood_action(struct sock *sk,
6045 const struct sk_buff *skb,
6046 const char *proto)
6047{
6048 const char *msg = "Dropping request";
6049 bool want_cookie = false;
6050 struct listen_sock *lopt;
6051
6052#ifdef CONFIG_SYN_COOKIES
6053 if (sysctl_tcp_syncookies) {
6054 msg = "Sending cookies";
6055 want_cookie = true;
6056 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
6057 } else
6058#endif
6059 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
6060
6061 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
6062 if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
6063 lopt->synflood_warned = 1;
6064 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
6065 proto, ntohs(tcp_hdr(skb)->dest), msg);
6066 }
6067 return want_cookie;
6068}
6069
6070int tcp_conn_request(struct request_sock_ops *rsk_ops,
6071 const struct tcp_request_sock_ops *af_ops,
6072 struct sock *sk, struct sk_buff *skb)
6073{
6074 struct tcp_options_received tmp_opt;
6075 struct request_sock *req;
6076 struct tcp_sock *tp = tcp_sk(sk);
6077 struct dst_entry *dst = NULL;
6078 __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
6079 bool want_cookie = false, fastopen;
6080 struct flowi fl;
6081 struct tcp_fastopen_cookie foc = { .len = -1 };
6082 int err;
6083
6084
6085
6086
6087
6088
6089 if ((sysctl_tcp_syncookies == 2 ||
6090 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
6091 want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name);
6092 if (!want_cookie)
6093 goto drop;
6094 }
6095
6096
6097
6098
6099
6100
6101
6102 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
6103 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
6104 goto drop;
6105 }
6106
6107 req = inet_reqsk_alloc(rsk_ops, sk);
6108 if (!req)
6109 goto drop;
6110
6111 tcp_rsk(req)->af_specific = af_ops;
6112
6113 tcp_clear_options(&tmp_opt);
6114 tmp_opt.mss_clamp = af_ops->mss_clamp;
6115 tmp_opt.user_mss = tp->rx_opt.user_mss;
6116 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
6117
6118 if (want_cookie && !tmp_opt.saw_tstamp)
6119 tcp_clear_options(&tmp_opt);
6120
6121 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
6122 tcp_openreq_init(req, &tmp_opt, skb, sk);
6123
6124
6125 inet_rsk(req)->ir_iif = sk->sk_bound_dev_if;
6126
6127 af_ops->init_req(req, sk, skb);
6128
6129 if (security_inet_conn_request(sk, skb, req))
6130 goto drop_and_free;
6131
6132 if (!want_cookie && !isn) {
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142 if (tcp_death_row.sysctl_tw_recycle) {
6143 bool strict;
6144
6145 dst = af_ops->route_req(sk, &fl, req, &strict);
6146
6147 if (dst && strict &&
6148 !tcp_peer_is_proven(req, dst, true,
6149 tmp_opt.saw_tstamp)) {
6150 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
6151 goto drop_and_release;
6152 }
6153 }
6154
6155 else if (!sysctl_tcp_syncookies &&
6156 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
6157 (sysctl_max_syn_backlog >> 2)) &&
6158 !tcp_peer_is_proven(req, dst, false,
6159 tmp_opt.saw_tstamp)) {
6160
6161
6162
6163
6164
6165
6166
6167 pr_drop_req(req, ntohs(tcp_hdr(skb)->source),
6168 rsk_ops->family);
6169 goto drop_and_release;
6170 }
6171
6172 isn = af_ops->init_seq(skb);
6173 }
6174 if (!dst) {
6175 dst = af_ops->route_req(sk, &fl, req, NULL);
6176 if (!dst)
6177 goto drop_and_free;
6178 }
6179
6180 tcp_ecn_create_request(req, skb, sk, dst);
6181
6182 if (want_cookie) {
6183 isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
6184 req->cookie_ts = tmp_opt.tstamp_ok;
6185 if (!tmp_opt.tstamp_ok)
6186 inet_rsk(req)->ecn_ok = 0;
6187 }
6188
6189 tcp_rsk(req)->snt_isn = isn;
6190 tcp_openreq_init_rwin(req, sk, dst);
6191 fastopen = !want_cookie &&
6192 tcp_try_fastopen(sk, skb, req, &foc, dst);
6193 err = af_ops->send_synack(sk, dst, &fl, req,
6194 skb_get_queue_mapping(skb), &foc);
6195 if (!fastopen) {
6196 if (err || want_cookie)
6197 goto drop_and_free;
6198
6199 tcp_rsk(req)->tfo_listener = false;
6200 af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
6201 }
6202
6203 return 0;
6204
6205drop_and_release:
6206 dst_release(dst);
6207drop_and_free:
6208 reqsk_free(req);
6209drop:
6210 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
6211 return 0;
6212}
6213EXPORT_SYMBOL(tcp_conn_request);
6214