1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37#define pr_fmt(fmt) "TCP: " fmt
38
39#include <net/tcp.h>
40
41#include <linux/compiler.h>
42#include <linux/gfp.h>
43#include <linux/module.h>
44#include <linux/static_key.h>
45
46#include <trace/events/tcp.h>
47
48static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
49 int push_one, gfp_t gfp);
50
51
52static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
53{
54 struct inet_connection_sock *icsk = inet_csk(sk);
55 struct tcp_sock *tp = tcp_sk(sk);
56 unsigned int prior_packets = tp->packets_out;
57
58 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
59
60 __skb_unlink(skb, &sk->sk_write_queue);
61 tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
62
63 tp->packets_out += tcp_skb_pcount(skb);
64 if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
65 tcp_rearm_rto(sk);
66
67 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
68 tcp_skb_pcount(skb));
69}
70
71
72
73
74
75
76
77
78static inline __u32 tcp_acceptable_seq(const struct sock *sk)
79{
80 const struct tcp_sock *tp = tcp_sk(sk);
81
82 if (!before(tcp_wnd_end(tp), tp->snd_nxt) ||
83 (tp->rx_opt.wscale_ok &&
84 ((tp->snd_nxt - tcp_wnd_end(tp)) < (1 << tp->rx_opt.rcv_wscale))))
85 return tp->snd_nxt;
86 else
87 return tcp_wnd_end(tp);
88}
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104static __u16 tcp_advertise_mss(struct sock *sk)
105{
106 struct tcp_sock *tp = tcp_sk(sk);
107 const struct dst_entry *dst = __sk_dst_get(sk);
108 int mss = tp->advmss;
109
110 if (dst) {
111 unsigned int metric = dst_metric_advmss(dst);
112
113 if (metric < mss) {
114 mss = metric;
115 tp->advmss = mss;
116 }
117 }
118
119 return (__u16)mss;
120}
121
122
123
124
125void tcp_cwnd_restart(struct sock *sk, s32 delta)
126{
127 struct tcp_sock *tp = tcp_sk(sk);
128 u32 restart_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
129 u32 cwnd = tp->snd_cwnd;
130
131 tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
132
133 tp->snd_ssthresh = tcp_current_ssthresh(sk);
134 restart_cwnd = min(restart_cwnd, cwnd);
135
136 while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
137 cwnd >>= 1;
138 tp->snd_cwnd = max(cwnd, restart_cwnd);
139 tp->snd_cwnd_stamp = tcp_jiffies32;
140 tp->snd_cwnd_used = 0;
141}
142
143
144static void tcp_event_data_sent(struct tcp_sock *tp,
145 struct sock *sk)
146{
147 struct inet_connection_sock *icsk = inet_csk(sk);
148 const u32 now = tcp_jiffies32;
149
150 if (tcp_packets_in_flight(tp) == 0)
151 tcp_ca_event(sk, CA_EVENT_TX_START);
152
153 tp->lsndtime = now;
154
155
156
157
158 if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
159 icsk->icsk_ack.pingpong = 1;
160}
161
162
163static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
164{
165 tcp_dec_quickack_mode(sk, pkts);
166 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
167}
168
169
170u32 tcp_default_init_rwnd(u32 mss)
171{
172
173
174
175
176
177 u32 init_rwnd = TCP_INIT_CWND * 2;
178
179 if (mss > 1460)
180 init_rwnd = max((1460 * init_rwnd) / mss, 2U);
181 return init_rwnd;
182}
183
184
185
186
187
188
189
190
191void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
192 __u32 *rcv_wnd, __u32 *window_clamp,
193 int wscale_ok, __u8 *rcv_wscale,
194 __u32 init_rcv_wnd)
195{
196 unsigned int space = (__space < 0 ? 0 : __space);
197
198
199 if (*window_clamp == 0)
200 (*window_clamp) = (U16_MAX << TCP_MAX_WSCALE);
201 space = min(*window_clamp, space);
202
203
204 if (space > mss)
205 space = rounddown(space, mss);
206
207
208
209
210
211
212
213
214
215 if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
216 (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
217 else
218 (*rcv_wnd) = space;
219
220 (*rcv_wscale) = 0;
221 if (wscale_ok) {
222
223 space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
224 space = max_t(u32, space, sysctl_rmem_max);
225 space = min_t(u32, space, *window_clamp);
226 while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) {
227 space >>= 1;
228 (*rcv_wscale)++;
229 }
230 }
231
232 if (mss > (1 << *rcv_wscale)) {
233 if (!init_rcv_wnd)
234 init_rcv_wnd = tcp_default_init_rwnd(mss);
235 *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
236 }
237
238
239 (*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp);
240}
241EXPORT_SYMBOL(tcp_select_initial_window);
242
243
244
245
246
247
248static u16 tcp_select_window(struct sock *sk)
249{
250 struct tcp_sock *tp = tcp_sk(sk);
251 u32 old_win = tp->rcv_wnd;
252 u32 cur_win = tcp_receive_window(tp);
253 u32 new_win = __tcp_select_window(sk);
254
255
256 if (new_win < cur_win) {
257
258
259
260
261
262
263
264 if (new_win == 0)
265 NET_INC_STATS(sock_net(sk),
266 LINUX_MIB_TCPWANTZEROWINDOWADV);
267 new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
268 }
269 tp->rcv_wnd = new_win;
270 tp->rcv_wup = tp->rcv_nxt;
271
272
273
274
275 if (!tp->rx_opt.rcv_wscale &&
276 sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
277 new_win = min(new_win, MAX_TCP_WINDOW);
278 else
279 new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
280
281
282 new_win >>= tp->rx_opt.rcv_wscale;
283
284
285 if (new_win == 0) {
286 tp->pred_flags = 0;
287 if (old_win)
288 NET_INC_STATS(sock_net(sk),
289 LINUX_MIB_TCPTOZEROWINDOWADV);
290 } else if (old_win == 0) {
291 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV);
292 }
293
294 return new_win;
295}
296
297
298static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
299{
300 const struct tcp_sock *tp = tcp_sk(sk);
301
302 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
303 if (!(tp->ecn_flags & TCP_ECN_OK))
304 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
305 else if (tcp_ca_needs_ecn(sk) ||
306 tcp_bpf_ca_needs_ecn(sk))
307 INET_ECN_xmit(sk);
308}
309
310
311static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
312{
313 struct tcp_sock *tp = tcp_sk(sk);
314 bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
315 bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
316 tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
317
318 if (!use_ecn) {
319 const struct dst_entry *dst = __sk_dst_get(sk);
320
321 if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
322 use_ecn = true;
323 }
324
325 tp->ecn_flags = 0;
326
327 if (use_ecn) {
328 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
329 tp->ecn_flags = TCP_ECN_OK;
330 if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
331 INET_ECN_xmit(sk);
332 }
333}
334
335static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
336{
337 if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)
338
339
340
341 TCP_SKB_CB(skb)->tcp_flags &= ~(TCPHDR_ECE | TCPHDR_CWR);
342}
343
344static void
345tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)
346{
347 if (inet_rsk(req)->ecn_ok)
348 th->ece = 1;
349}
350
351
352
353
354static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
355 struct tcphdr *th, int tcp_header_len)
356{
357 struct tcp_sock *tp = tcp_sk(sk);
358
359 if (tp->ecn_flags & TCP_ECN_OK) {
360
361 if (skb->len != tcp_header_len &&
362 !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
363 INET_ECN_xmit(sk);
364 if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) {
365 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
366 th->cwr = 1;
367 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
368 }
369 } else if (!tcp_ca_needs_ecn(sk)) {
370
371 INET_ECN_dontxmit(sk);
372 }
373 if (tp->ecn_flags & TCP_ECN_DEMAND_CWR)
374 th->ece = 1;
375 }
376}
377
378
379
380
381static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
382{
383 skb->ip_summed = CHECKSUM_PARTIAL;
384
385 TCP_SKB_CB(skb)->tcp_flags = flags;
386 TCP_SKB_CB(skb)->sacked = 0;
387
388 tcp_skb_pcount_set(skb, 1);
389
390 TCP_SKB_CB(skb)->seq = seq;
391 if (flags & (TCPHDR_SYN | TCPHDR_FIN))
392 seq++;
393 TCP_SKB_CB(skb)->end_seq = seq;
394}
395
396static inline bool tcp_urg_mode(const struct tcp_sock *tp)
397{
398 return tp->snd_una != tp->snd_up;
399}
400
401#define OPTION_SACK_ADVERTISE (1 << 0)
402#define OPTION_TS (1 << 1)
403#define OPTION_MD5 (1 << 2)
404#define OPTION_WSCALE (1 << 3)
405#define OPTION_FAST_OPEN_COOKIE (1 << 8)
406#define OPTION_SMC (1 << 9)
407
408static void smc_options_write(__be32 *ptr, u16 *options)
409{
410#if IS_ENABLED(CONFIG_SMC)
411 if (static_branch_unlikely(&tcp_have_smc)) {
412 if (unlikely(OPTION_SMC & *options)) {
413 *ptr++ = htonl((TCPOPT_NOP << 24) |
414 (TCPOPT_NOP << 16) |
415 (TCPOPT_EXP << 8) |
416 (TCPOLEN_EXP_SMC_BASE));
417 *ptr++ = htonl(TCPOPT_SMC_MAGIC);
418 }
419 }
420#endif
421}
422
423struct tcp_out_options {
424 u16 options;
425 u16 mss;
426 u8 ws;
427 u8 num_sack_blocks;
428 u8 hash_size;
429 __u8 *hash_location;
430 __u32 tsval, tsecr;
431 struct tcp_fastopen_cookie *fastopen_cookie;
432};
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
448 struct tcp_out_options *opts)
449{
450 u16 options = opts->options;
451
452 if (unlikely(OPTION_MD5 & options)) {
453 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
454 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
455
456 opts->hash_location = (__u8 *)ptr;
457 ptr += 4;
458 }
459
460 if (unlikely(opts->mss)) {
461 *ptr++ = htonl((TCPOPT_MSS << 24) |
462 (TCPOLEN_MSS << 16) |
463 opts->mss);
464 }
465
466 if (likely(OPTION_TS & options)) {
467 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
468 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
469 (TCPOLEN_SACK_PERM << 16) |
470 (TCPOPT_TIMESTAMP << 8) |
471 TCPOLEN_TIMESTAMP);
472 options &= ~OPTION_SACK_ADVERTISE;
473 } else {
474 *ptr++ = htonl((TCPOPT_NOP << 24) |
475 (TCPOPT_NOP << 16) |
476 (TCPOPT_TIMESTAMP << 8) |
477 TCPOLEN_TIMESTAMP);
478 }
479 *ptr++ = htonl(opts->tsval);
480 *ptr++ = htonl(opts->tsecr);
481 }
482
483 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
484 *ptr++ = htonl((TCPOPT_NOP << 24) |
485 (TCPOPT_NOP << 16) |
486 (TCPOPT_SACK_PERM << 8) |
487 TCPOLEN_SACK_PERM);
488 }
489
490 if (unlikely(OPTION_WSCALE & options)) {
491 *ptr++ = htonl((TCPOPT_NOP << 24) |
492 (TCPOPT_WINDOW << 16) |
493 (TCPOLEN_WINDOW << 8) |
494 opts->ws);
495 }
496
497 if (unlikely(opts->num_sack_blocks)) {
498 struct tcp_sack_block *sp = tp->rx_opt.dsack ?
499 tp->duplicate_sack : tp->selective_acks;
500 int this_sack;
501
502 *ptr++ = htonl((TCPOPT_NOP << 24) |
503 (TCPOPT_NOP << 16) |
504 (TCPOPT_SACK << 8) |
505 (TCPOLEN_SACK_BASE + (opts->num_sack_blocks *
506 TCPOLEN_SACK_PERBLOCK)));
507
508 for (this_sack = 0; this_sack < opts->num_sack_blocks;
509 ++this_sack) {
510 *ptr++ = htonl(sp[this_sack].start_seq);
511 *ptr++ = htonl(sp[this_sack].end_seq);
512 }
513
514 tp->rx_opt.dsack = 0;
515 }
516
517 if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) {
518 struct tcp_fastopen_cookie *foc = opts->fastopen_cookie;
519 u8 *p = (u8 *)ptr;
520 u32 len;
521
522 if (foc->exp) {
523 len = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
524 *ptr = htonl((TCPOPT_EXP << 24) | (len << 16) |
525 TCPOPT_FASTOPEN_MAGIC);
526 p += TCPOLEN_EXP_FASTOPEN_BASE;
527 } else {
528 len = TCPOLEN_FASTOPEN_BASE + foc->len;
529 *p++ = TCPOPT_FASTOPEN;
530 *p++ = len;
531 }
532
533 memcpy(p, foc->val, foc->len);
534 if ((len & 3) == 2) {
535 p[foc->len] = TCPOPT_NOP;
536 p[foc->len + 1] = TCPOPT_NOP;
537 }
538 ptr += (len + 3) >> 2;
539 }
540
541 smc_options_write(ptr, &options);
542}
543
544static void smc_set_option(const struct tcp_sock *tp,
545 struct tcp_out_options *opts,
546 unsigned int *remaining)
547{
548#if IS_ENABLED(CONFIG_SMC)
549 if (static_branch_unlikely(&tcp_have_smc)) {
550 if (tp->syn_smc) {
551 if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
552 opts->options |= OPTION_SMC;
553 *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
554 }
555 }
556 }
557#endif
558}
559
560static void smc_set_option_cond(const struct tcp_sock *tp,
561 const struct inet_request_sock *ireq,
562 struct tcp_out_options *opts,
563 unsigned int *remaining)
564{
565#if IS_ENABLED(CONFIG_SMC)
566 if (static_branch_unlikely(&tcp_have_smc)) {
567 if (tp->syn_smc && ireq->smc_ok) {
568 if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
569 opts->options |= OPTION_SMC;
570 *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
571 }
572 }
573 }
574#endif
575}
576
577
578
579
580static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
581 struct tcp_out_options *opts,
582 struct tcp_md5sig_key **md5)
583{
584 struct tcp_sock *tp = tcp_sk(sk);
585 unsigned int remaining = MAX_TCP_OPTION_SPACE;
586 struct tcp_fastopen_request *fastopen = tp->fastopen_req;
587
588#ifdef CONFIG_TCP_MD5SIG
589 *md5 = tp->af_specific->md5_lookup(sk, sk);
590 if (*md5) {
591 opts->options |= OPTION_MD5;
592 remaining -= TCPOLEN_MD5SIG_ALIGNED;
593 }
594#else
595 *md5 = NULL;
596#endif
597
598
599
600
601
602
603
604
605
606
607 opts->mss = tcp_advertise_mss(sk);
608 remaining -= TCPOLEN_MSS_ALIGNED;
609
610 if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) {
611 opts->options |= OPTION_TS;
612 opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
613 opts->tsecr = tp->rx_opt.ts_recent;
614 remaining -= TCPOLEN_TSTAMP_ALIGNED;
615 }
616 if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
617 opts->ws = tp->rx_opt.rcv_wscale;
618 opts->options |= OPTION_WSCALE;
619 remaining -= TCPOLEN_WSCALE_ALIGNED;
620 }
621 if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) {
622 opts->options |= OPTION_SACK_ADVERTISE;
623 if (unlikely(!(OPTION_TS & opts->options)))
624 remaining -= TCPOLEN_SACKPERM_ALIGNED;
625 }
626
627 if (fastopen && fastopen->cookie.len >= 0) {
628 u32 need = fastopen->cookie.len;
629
630 need += fastopen->cookie.exp ? TCPOLEN_EXP_FASTOPEN_BASE :
631 TCPOLEN_FASTOPEN_BASE;
632 need = (need + 3) & ~3U;
633 if (remaining >= need) {
634 opts->options |= OPTION_FAST_OPEN_COOKIE;
635 opts->fastopen_cookie = &fastopen->cookie;
636 remaining -= need;
637 tp->syn_fastopen = 1;
638 tp->syn_fastopen_exp = fastopen->cookie.exp ? 1 : 0;
639 }
640 }
641
642 smc_set_option(tp, opts, &remaining);
643
644 return MAX_TCP_OPTION_SPACE - remaining;
645}
646
647
648static unsigned int tcp_synack_options(const struct sock *sk,
649 struct request_sock *req,
650 unsigned int mss, struct sk_buff *skb,
651 struct tcp_out_options *opts,
652 const struct tcp_md5sig_key *md5,
653 struct tcp_fastopen_cookie *foc)
654{
655 struct inet_request_sock *ireq = inet_rsk(req);
656 unsigned int remaining = MAX_TCP_OPTION_SPACE;
657
658#ifdef CONFIG_TCP_MD5SIG
659 if (md5) {
660 opts->options |= OPTION_MD5;
661 remaining -= TCPOLEN_MD5SIG_ALIGNED;
662
663
664
665
666
667
668 ireq->tstamp_ok &= !ireq->sack_ok;
669 }
670#endif
671
672
673 opts->mss = mss;
674 remaining -= TCPOLEN_MSS_ALIGNED;
675
676 if (likely(ireq->wscale_ok)) {
677 opts->ws = ireq->rcv_wscale;
678 opts->options |= OPTION_WSCALE;
679 remaining -= TCPOLEN_WSCALE_ALIGNED;
680 }
681 if (likely(ireq->tstamp_ok)) {
682 opts->options |= OPTION_TS;
683 opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off;
684 opts->tsecr = req->ts_recent;
685 remaining -= TCPOLEN_TSTAMP_ALIGNED;
686 }
687 if (likely(ireq->sack_ok)) {
688 opts->options |= OPTION_SACK_ADVERTISE;
689 if (unlikely(!ireq->tstamp_ok))
690 remaining -= TCPOLEN_SACKPERM_ALIGNED;
691 }
692 if (foc != NULL && foc->len >= 0) {
693 u32 need = foc->len;
694
695 need += foc->exp ? TCPOLEN_EXP_FASTOPEN_BASE :
696 TCPOLEN_FASTOPEN_BASE;
697 need = (need + 3) & ~3U;
698 if (remaining >= need) {
699 opts->options |= OPTION_FAST_OPEN_COOKIE;
700 opts->fastopen_cookie = foc;
701 remaining -= need;
702 }
703 }
704
705 smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
706
707 return MAX_TCP_OPTION_SPACE - remaining;
708}
709
710
711
712
713static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb,
714 struct tcp_out_options *opts,
715 struct tcp_md5sig_key **md5)
716{
717 struct tcp_sock *tp = tcp_sk(sk);
718 unsigned int size = 0;
719 unsigned int eff_sacks;
720
721 opts->options = 0;
722
723#ifdef CONFIG_TCP_MD5SIG
724 *md5 = tp->af_specific->md5_lookup(sk, sk);
725 if (unlikely(*md5)) {
726 opts->options |= OPTION_MD5;
727 size += TCPOLEN_MD5SIG_ALIGNED;
728 }
729#else
730 *md5 = NULL;
731#endif
732
733 if (likely(tp->rx_opt.tstamp_ok)) {
734 opts->options |= OPTION_TS;
735 opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0;
736 opts->tsecr = tp->rx_opt.ts_recent;
737 size += TCPOLEN_TSTAMP_ALIGNED;
738 }
739
740 eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
741 if (unlikely(eff_sacks)) {
742 const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
743 opts->num_sack_blocks =
744 min_t(unsigned int, eff_sacks,
745 (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
746 TCPOLEN_SACK_PERBLOCK);
747 size += TCPOLEN_SACK_BASE_ALIGNED +
748 opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
749 }
750
751 return size;
752}
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769struct tsq_tasklet {
770 struct tasklet_struct tasklet;
771 struct list_head head;
772};
773static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);
774
775static void tcp_tsq_handler(struct sock *sk)
776{
777 if ((1 << sk->sk_state) &
778 (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
779 TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) {
780 struct tcp_sock *tp = tcp_sk(sk);
781
782 if (tp->lost_out > tp->retrans_out &&
783 tp->snd_cwnd > tcp_packets_in_flight(tp)) {
784 tcp_mstamp_refresh(tp);
785 tcp_xmit_retransmit_queue(sk);
786 }
787
788 tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle,
789 0, GFP_ATOMIC);
790 }
791}
792
793
794
795
796
797
798static void tcp_tasklet_func(unsigned long data)
799{
800 struct tsq_tasklet *tsq = (struct tsq_tasklet *)data;
801 LIST_HEAD(list);
802 unsigned long flags;
803 struct list_head *q, *n;
804 struct tcp_sock *tp;
805 struct sock *sk;
806
807 local_irq_save(flags);
808 list_splice_init(&tsq->head, &list);
809 local_irq_restore(flags);
810
811 list_for_each_safe(q, n, &list) {
812 tp = list_entry(q, struct tcp_sock, tsq_node);
813 list_del(&tp->tsq_node);
814
815 sk = (struct sock *)tp;
816 smp_mb__before_atomic();
817 clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags);
818
819 if (!sk->sk_lock.owned &&
820 test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) {
821 bh_lock_sock(sk);
822 if (!sock_owned_by_user(sk)) {
823 clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags);
824 tcp_tsq_handler(sk);
825 }
826 bh_unlock_sock(sk);
827 }
828
829 sk_free(sk);
830 }
831}
832
833#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \
834 TCPF_WRITE_TIMER_DEFERRED | \
835 TCPF_DELACK_TIMER_DEFERRED | \
836 TCPF_MTU_REDUCED_DEFERRED)
837
838
839
840
841
842
843
844void tcp_release_cb(struct sock *sk)
845{
846 unsigned long flags, nflags;
847
848
849 do {
850 flags = sk->sk_tsq_flags;
851 if (!(flags & TCP_DEFERRED_ALL))
852 return;
853 nflags = flags & ~TCP_DEFERRED_ALL;
854 } while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags);
855
856 if (flags & TCPF_TSQ_DEFERRED)
857 tcp_tsq_handler(sk);
858
859
860
861
862
863
864
865
866
867
868 sock_release_ownership(sk);
869
870 if (flags & TCPF_WRITE_TIMER_DEFERRED) {
871 tcp_write_timer_handler(sk);
872 __sock_put(sk);
873 }
874 if (flags & TCPF_DELACK_TIMER_DEFERRED) {
875 tcp_delack_timer_handler(sk);
876 __sock_put(sk);
877 }
878 if (flags & TCPF_MTU_REDUCED_DEFERRED) {
879 inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
880 __sock_put(sk);
881 }
882}
883EXPORT_SYMBOL(tcp_release_cb);
884
885void __init tcp_tasklet_init(void)
886{
887 int i;
888
889 for_each_possible_cpu(i) {
890 struct tsq_tasklet *tsq = &per_cpu(tsq_tasklet, i);
891
892 INIT_LIST_HEAD(&tsq->head);
893 tasklet_init(&tsq->tasklet,
894 tcp_tasklet_func,
895 (unsigned long)tsq);
896 }
897}
898
899
900
901
902
903
904void tcp_wfree(struct sk_buff *skb)
905{
906 struct sock *sk = skb->sk;
907 struct tcp_sock *tp = tcp_sk(sk);
908 unsigned long flags, nval, oval;
909
910
911
912
913 WARN_ON(refcount_sub_and_test(skb->truesize - 1, &sk->sk_wmem_alloc));
914
915
916
917
918
919
920
921
922 if (refcount_read(&sk->sk_wmem_alloc) >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
923 goto out;
924
925 for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) {
926 struct tsq_tasklet *tsq;
927 bool empty;
928
929 if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED))
930 goto out;
931
932 nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED;
933 nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
934 if (nval != oval)
935 continue;
936
937
938 local_irq_save(flags);
939 tsq = this_cpu_ptr(&tsq_tasklet);
940 empty = list_empty(&tsq->head);
941 list_add(&tp->tsq_node, &tsq->head);
942 if (empty)
943 tasklet_schedule(&tsq->tasklet);
944 local_irq_restore(flags);
945 return;
946 }
947out:
948 sk_free(sk);
949}
950
951
952
953
954enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
955{
956 struct tcp_sock *tp = container_of(timer, struct tcp_sock, pacing_timer);
957 struct sock *sk = (struct sock *)tp;
958 unsigned long nval, oval;
959
960 for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) {
961 struct tsq_tasklet *tsq;
962 bool empty;
963
964 if (oval & TSQF_QUEUED)
965 break;
966
967 nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED;
968 nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
969 if (nval != oval)
970 continue;
971
972 if (!refcount_inc_not_zero(&sk->sk_wmem_alloc))
973 break;
974
975 tsq = this_cpu_ptr(&tsq_tasklet);
976 empty = list_empty(&tsq->head);
977 list_add(&tp->tsq_node, &tsq->head);
978 if (empty)
979 tasklet_schedule(&tsq->tasklet);
980 break;
981 }
982 return HRTIMER_NORESTART;
983}
984
985
986
987
988
989
990
991static bool tcp_needs_internal_pacing(const struct sock *sk)
992{
993 return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED;
994}
995
996static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
997{
998 u64 len_ns;
999 u32 rate;
1000
1001 if (!tcp_needs_internal_pacing(sk))
1002 return;
1003 rate = sk->sk_pacing_rate;
1004 if (!rate || rate == ~0U)
1005 return;
1006
1007
1008
1009
1010 len_ns = (u64)skb->len * NSEC_PER_SEC;
1011 do_div(len_ns, rate);
1012 hrtimer_start(&tcp_sk(sk)->pacing_timer,
1013 ktime_add_ns(ktime_get(), len_ns),
1014 HRTIMER_MODE_ABS_PINNED);
1015}
1016
1017static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
1018{
1019 skb->skb_mstamp = tp->tcp_mstamp;
1020 list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
1021}
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
1035 gfp_t gfp_mask)
1036{
1037 const struct inet_connection_sock *icsk = inet_csk(sk);
1038 struct inet_sock *inet;
1039 struct tcp_sock *tp;
1040 struct tcp_skb_cb *tcb;
1041 struct tcp_out_options opts;
1042 unsigned int tcp_options_size, tcp_header_size;
1043 struct sk_buff *oskb = NULL;
1044 struct tcp_md5sig_key *md5;
1045 struct tcphdr *th;
1046 int err;
1047
1048 BUG_ON(!skb || !tcp_skb_pcount(skb));
1049 tp = tcp_sk(sk);
1050
1051 if (clone_it) {
1052 TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
1053 - tp->snd_una;
1054 oskb = skb;
1055
1056 tcp_skb_tsorted_save(oskb) {
1057 if (unlikely(skb_cloned(oskb)))
1058 skb = pskb_copy(oskb, gfp_mask);
1059 else
1060 skb = skb_clone(oskb, gfp_mask);
1061 } tcp_skb_tsorted_restore(oskb);
1062
1063 if (unlikely(!skb))
1064 return -ENOBUFS;
1065 }
1066 skb->skb_mstamp = tp->tcp_mstamp;
1067
1068 inet = inet_sk(sk);
1069 tcb = TCP_SKB_CB(skb);
1070 memset(&opts, 0, sizeof(opts));
1071
1072 if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
1073 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
1074 else
1075 tcp_options_size = tcp_established_options(sk, skb, &opts,
1076 &md5);
1077 tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
1078
1079
1080
1081
1082
1083
1084
1085
1086 skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1);
1087
1088
1089
1090
1091
1092
1093 skb->pfmemalloc = 0;
1094
1095 skb_push(skb, tcp_header_size);
1096 skb_reset_transport_header(skb);
1097
1098 skb_orphan(skb);
1099 skb->sk = sk;
1100 skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree;
1101 skb_set_hash_from_sk(skb, sk);
1102 refcount_add(skb->truesize, &sk->sk_wmem_alloc);
1103
1104 skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm);
1105
1106
1107 th = (struct tcphdr *)skb->data;
1108 th->source = inet->inet_sport;
1109 th->dest = inet->inet_dport;
1110 th->seq = htonl(tcb->seq);
1111 th->ack_seq = htonl(tp->rcv_nxt);
1112 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
1113 tcb->tcp_flags);
1114
1115 th->check = 0;
1116 th->urg_ptr = 0;
1117
1118
1119 if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
1120 if (before(tp->snd_up, tcb->seq + 0x10000)) {
1121 th->urg_ptr = htons(tp->snd_up - tcb->seq);
1122 th->urg = 1;
1123 } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
1124 th->urg_ptr = htons(0xFFFF);
1125 th->urg = 1;
1126 }
1127 }
1128
1129 tcp_options_write((__be32 *)(th + 1), tp, &opts);
1130 skb_shinfo(skb)->gso_type = sk->sk_gso_type;
1131 if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) {
1132 th->window = htons(tcp_select_window(sk));
1133 tcp_ecn_send(sk, skb, th, tcp_header_size);
1134 } else {
1135
1136
1137
1138 th->window = htons(min(tp->rcv_wnd, 65535U));
1139 }
1140#ifdef CONFIG_TCP_MD5SIG
1141
1142 if (md5) {
1143 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1144 tp->af_specific->calc_md5_hash(opts.hash_location,
1145 md5, sk, skb);
1146 }
1147#endif
1148
1149 icsk->icsk_af_ops->send_check(sk, skb);
1150
1151 if (likely(tcb->tcp_flags & TCPHDR_ACK))
1152 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
1153
1154 if (skb->len != tcp_header_size) {
1155 tcp_event_data_sent(tp, sk);
1156 tp->data_segs_out += tcp_skb_pcount(skb);
1157 tcp_internal_pacing(sk, skb);
1158 }
1159
1160 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
1161 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
1162 tcp_skb_pcount(skb));
1163
1164 tp->segs_out += tcp_skb_pcount(skb);
1165
1166 skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
1167 skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);
1168
1169
1170 skb->tstamp = 0;
1171
1172
1173 memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
1174 sizeof(struct inet6_skb_parm)));
1175
1176 err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
1177
1178 if (unlikely(err > 0)) {
1179 tcp_enter_cwr(sk);
1180 err = net_xmit_eval(err);
1181 }
1182 if (!err && oskb) {
1183 tcp_update_skb_after_send(tp, oskb);
1184 tcp_rate_skb_sent(sk, oskb);
1185 }
1186 return err;
1187}
1188
1189
1190
1191
1192
1193
1194static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
1195{
1196 struct tcp_sock *tp = tcp_sk(sk);
1197
1198
1199 tp->write_seq = TCP_SKB_CB(skb)->end_seq;
1200 __skb_header_release(skb);
1201 tcp_add_write_queue_tail(sk, skb);
1202 sk->sk_wmem_queued += skb->truesize;
1203 sk_mem_charge(sk, skb->truesize);
1204}
1205
1206
1207static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
1208{
1209 if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) {
1210
1211
1212
1213 tcp_skb_pcount_set(skb, 1);
1214 TCP_SKB_CB(skb)->tcp_gso_size = 0;
1215 } else {
1216 tcp_skb_pcount_set(skb, DIV_ROUND_UP(skb->len, mss_now));
1217 TCP_SKB_CB(skb)->tcp_gso_size = mss_now;
1218 }
1219}
1220
1221
1222
1223
1224static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
1225{
1226 struct tcp_sock *tp = tcp_sk(sk);
1227
1228 tp->packets_out -= decr;
1229
1230 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1231 tp->sacked_out -= decr;
1232 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
1233 tp->retrans_out -= decr;
1234 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
1235 tp->lost_out -= decr;
1236
1237
1238 if (tcp_is_reno(tp) && decr > 0)
1239 tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
1240
1241 if (tp->lost_skb_hint &&
1242 before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
1243 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
1244 tp->lost_cnt_hint -= decr;
1245
1246 tcp_verify_left_out(tp);
1247}
1248
1249static bool tcp_has_tx_tstamp(const struct sk_buff *skb)
1250{
1251 return TCP_SKB_CB(skb)->txstamp_ack ||
1252 (skb_shinfo(skb)->tx_flags & SKBTX_ANY_TSTAMP);
1253}
1254
1255static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2)
1256{
1257 struct skb_shared_info *shinfo = skb_shinfo(skb);
1258
1259 if (unlikely(tcp_has_tx_tstamp(skb)) &&
1260 !before(shinfo->tskey, TCP_SKB_CB(skb2)->seq)) {
1261 struct skb_shared_info *shinfo2 = skb_shinfo(skb2);
1262 u8 tsflags = shinfo->tx_flags & SKBTX_ANY_TSTAMP;
1263
1264 shinfo->tx_flags &= ~tsflags;
1265 shinfo2->tx_flags |= tsflags;
1266 swap(shinfo->tskey, shinfo2->tskey);
1267 TCP_SKB_CB(skb2)->txstamp_ack = TCP_SKB_CB(skb)->txstamp_ack;
1268 TCP_SKB_CB(skb)->txstamp_ack = 0;
1269 }
1270}
1271
1272static void tcp_skb_fragment_eor(struct sk_buff *skb, struct sk_buff *skb2)
1273{
1274 TCP_SKB_CB(skb2)->eor = TCP_SKB_CB(skb)->eor;
1275 TCP_SKB_CB(skb)->eor = 0;
1276}
1277
1278
1279static void tcp_insert_write_queue_after(struct sk_buff *skb,
1280 struct sk_buff *buff,
1281 struct sock *sk,
1282 enum tcp_queue tcp_queue)
1283{
1284 if (tcp_queue == TCP_FRAG_IN_WRITE_QUEUE)
1285 __skb_queue_after(&sk->sk_write_queue, skb, buff);
1286 else
1287 tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
1288}
1289
1290
1291
1292
1293
1294
1295int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
1296 struct sk_buff *skb, u32 len,
1297 unsigned int mss_now, gfp_t gfp)
1298{
1299 struct tcp_sock *tp = tcp_sk(sk);
1300 struct sk_buff *buff;
1301 int nsize, old_factor;
1302 int nlen;
1303 u8 flags;
1304
1305 if (WARN_ON(len > skb->len))
1306 return -EINVAL;
1307
1308 nsize = skb_headlen(skb) - len;
1309 if (nsize < 0)
1310 nsize = 0;
1311
1312 if (skb_unclone(skb, gfp))
1313 return -ENOMEM;
1314
1315
1316 buff = sk_stream_alloc_skb(sk, nsize, gfp, true);
1317 if (!buff)
1318 return -ENOMEM;
1319
1320 sk->sk_wmem_queued += buff->truesize;
1321 sk_mem_charge(sk, buff->truesize);
1322 nlen = skb->len - len - nsize;
1323 buff->truesize += nlen;
1324 skb->truesize -= nlen;
1325
1326
1327 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1328 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1329 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1330
1331
1332 flags = TCP_SKB_CB(skb)->tcp_flags;
1333 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1334 TCP_SKB_CB(buff)->tcp_flags = flags;
1335 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
1336 tcp_skb_fragment_eor(skb, buff);
1337
1338 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
1339
1340 buff->csum = csum_partial_copy_nocheck(skb->data + len,
1341 skb_put(buff, nsize),
1342 nsize, 0);
1343
1344 skb_trim(skb, len);
1345
1346 skb->csum = csum_block_sub(skb->csum, buff->csum, len);
1347 } else {
1348 skb->ip_summed = CHECKSUM_PARTIAL;
1349 skb_split(skb, buff, len);
1350 }
1351
1352 buff->ip_summed = skb->ip_summed;
1353
1354 buff->tstamp = skb->tstamp;
1355 tcp_fragment_tstamp(skb, buff);
1356
1357 old_factor = tcp_skb_pcount(skb);
1358
1359
1360 tcp_set_skb_tso_segs(skb, mss_now);
1361 tcp_set_skb_tso_segs(buff, mss_now);
1362
1363
1364 TCP_SKB_CB(buff)->tx = TCP_SKB_CB(skb)->tx;
1365
1366
1367
1368
1369 if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) {
1370 int diff = old_factor - tcp_skb_pcount(skb) -
1371 tcp_skb_pcount(buff);
1372
1373 if (diff)
1374 tcp_adjust_pcount(sk, skb, diff);
1375 }
1376
1377
1378 __skb_header_release(buff);
1379 tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
1380 if (tcp_queue == TCP_FRAG_IN_RTX_QUEUE)
1381 list_add(&buff->tcp_tsorted_anchor, &skb->tcp_tsorted_anchor);
1382
1383 return 0;
1384}
1385
1386
1387
1388
1389static int __pskb_trim_head(struct sk_buff *skb, int len)
1390{
1391 struct skb_shared_info *shinfo;
1392 int i, k, eat;
1393
1394 eat = min_t(int, len, skb_headlen(skb));
1395 if (eat) {
1396 __skb_pull(skb, eat);
1397 len -= eat;
1398 if (!len)
1399 return 0;
1400 }
1401 eat = len;
1402 k = 0;
1403 shinfo = skb_shinfo(skb);
1404 for (i = 0; i < shinfo->nr_frags; i++) {
1405 int size = skb_frag_size(&shinfo->frags[i]);
1406
1407 if (size <= eat) {
1408 skb_frag_unref(skb, i);
1409 eat -= size;
1410 } else {
1411 shinfo->frags[k] = shinfo->frags[i];
1412 if (eat) {
1413 shinfo->frags[k].page_offset += eat;
1414 skb_frag_size_sub(&shinfo->frags[k], eat);
1415 eat = 0;
1416 }
1417 k++;
1418 }
1419 }
1420 shinfo->nr_frags = k;
1421
1422 skb->data_len -= len;
1423 skb->len = skb->data_len;
1424 return len;
1425}
1426
1427
1428int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
1429{
1430 u32 delta_truesize;
1431
1432 if (skb_unclone(skb, GFP_ATOMIC))
1433 return -ENOMEM;
1434
1435 delta_truesize = __pskb_trim_head(skb, len);
1436
1437 TCP_SKB_CB(skb)->seq += len;
1438 skb->ip_summed = CHECKSUM_PARTIAL;
1439
1440 if (delta_truesize) {
1441 skb->truesize -= delta_truesize;
1442 sk->sk_wmem_queued -= delta_truesize;
1443 sk_mem_uncharge(sk, delta_truesize);
1444 sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
1445 }
1446
1447
1448 if (tcp_skb_pcount(skb) > 1)
1449 tcp_set_skb_tso_segs(skb, tcp_skb_mss(skb));
1450
1451 return 0;
1452}
1453
1454
1455static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
1456{
1457 const struct tcp_sock *tp = tcp_sk(sk);
1458 const struct inet_connection_sock *icsk = inet_csk(sk);
1459 int mss_now;
1460
1461
1462
1463
1464 mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr);
1465
1466
1467 if (icsk->icsk_af_ops->net_frag_header_len) {
1468 const struct dst_entry *dst = __sk_dst_get(sk);
1469
1470 if (dst && dst_allfrag(dst))
1471 mss_now -= icsk->icsk_af_ops->net_frag_header_len;
1472 }
1473
1474
1475 if (mss_now > tp->rx_opt.mss_clamp)
1476 mss_now = tp->rx_opt.mss_clamp;
1477
1478
1479 mss_now -= icsk->icsk_ext_hdr_len;
1480
1481
1482 if (mss_now < 48)
1483 mss_now = 48;
1484 return mss_now;
1485}
1486
1487
1488int tcp_mtu_to_mss(struct sock *sk, int pmtu)
1489{
1490
1491 return __tcp_mtu_to_mss(sk, pmtu) -
1492 (tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr));
1493}
1494
1495
1496int tcp_mss_to_mtu(struct sock *sk, int mss)
1497{
1498 const struct tcp_sock *tp = tcp_sk(sk);
1499 const struct inet_connection_sock *icsk = inet_csk(sk);
1500 int mtu;
1501
1502 mtu = mss +
1503 tp->tcp_header_len +
1504 icsk->icsk_ext_hdr_len +
1505 icsk->icsk_af_ops->net_header_len;
1506
1507
1508 if (icsk->icsk_af_ops->net_frag_header_len) {
1509 const struct dst_entry *dst = __sk_dst_get(sk);
1510
1511 if (dst && dst_allfrag(dst))
1512 mtu += icsk->icsk_af_ops->net_frag_header_len;
1513 }
1514 return mtu;
1515}
1516EXPORT_SYMBOL(tcp_mss_to_mtu);
1517
1518
1519void tcp_mtup_init(struct sock *sk)
1520{
1521 struct tcp_sock *tp = tcp_sk(sk);
1522 struct inet_connection_sock *icsk = inet_csk(sk);
1523 struct net *net = sock_net(sk);
1524
1525 icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
1526 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
1527 icsk->icsk_af_ops->net_header_len;
1528 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
1529 icsk->icsk_mtup.probe_size = 0;
1530 if (icsk->icsk_mtup.enabled)
1531 icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
1532}
1533EXPORT_SYMBOL(tcp_mtup_init);
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
1558{
1559 struct tcp_sock *tp = tcp_sk(sk);
1560 struct inet_connection_sock *icsk = inet_csk(sk);
1561 int mss_now;
1562
1563 if (icsk->icsk_mtup.search_high > pmtu)
1564 icsk->icsk_mtup.search_high = pmtu;
1565
1566 mss_now = tcp_mtu_to_mss(sk, pmtu);
1567 mss_now = tcp_bound_to_half_wnd(tp, mss_now);
1568
1569
1570 icsk->icsk_pmtu_cookie = pmtu;
1571 if (icsk->icsk_mtup.enabled)
1572 mss_now = min(mss_now, tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low));
1573 tp->mss_cache = mss_now;
1574
1575 return mss_now;
1576}
1577EXPORT_SYMBOL(tcp_sync_mss);
1578
1579
1580
1581
1582unsigned int tcp_current_mss(struct sock *sk)
1583{
1584 const struct tcp_sock *tp = tcp_sk(sk);
1585 const struct dst_entry *dst = __sk_dst_get(sk);
1586 u32 mss_now;
1587 unsigned int header_len;
1588 struct tcp_out_options opts;
1589 struct tcp_md5sig_key *md5;
1590
1591 mss_now = tp->mss_cache;
1592
1593 if (dst) {
1594 u32 mtu = dst_mtu(dst);
1595 if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
1596 mss_now = tcp_sync_mss(sk, mtu);
1597 }
1598
1599 header_len = tcp_established_options(sk, NULL, &opts, &md5) +
1600 sizeof(struct tcphdr);
1601
1602
1603
1604
1605 if (header_len != tp->tcp_header_len) {
1606 int delta = (int) header_len - tp->tcp_header_len;
1607 mss_now -= delta;
1608 }
1609
1610 return mss_now;
1611}
1612
1613
1614
1615
1616
1617static void tcp_cwnd_application_limited(struct sock *sk)
1618{
1619 struct tcp_sock *tp = tcp_sk(sk);
1620
1621 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
1622 sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
1623
1624 u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
1625 u32 win_used = max(tp->snd_cwnd_used, init_win);
1626 if (win_used < tp->snd_cwnd) {
1627 tp->snd_ssthresh = tcp_current_ssthresh(sk);
1628 tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
1629 }
1630 tp->snd_cwnd_used = 0;
1631 }
1632 tp->snd_cwnd_stamp = tcp_jiffies32;
1633}
1634
1635static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
1636{
1637 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
1638 struct tcp_sock *tp = tcp_sk(sk);
1639
1640
1641
1642
1643 if (!before(tp->snd_una, tp->max_packets_seq) ||
1644 tp->packets_out > tp->max_packets_out) {
1645 tp->max_packets_out = tp->packets_out;
1646 tp->max_packets_seq = tp->snd_nxt;
1647 tp->is_cwnd_limited = is_cwnd_limited;
1648 }
1649
1650 if (tcp_is_cwnd_limited(sk)) {
1651
1652 tp->snd_cwnd_used = 0;
1653 tp->snd_cwnd_stamp = tcp_jiffies32;
1654 } else {
1655
1656 if (tp->packets_out > tp->snd_cwnd_used)
1657 tp->snd_cwnd_used = tp->packets_out;
1658
1659 if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
1660 (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
1661 !ca_ops->cong_control)
1662 tcp_cwnd_application_limited(sk);
1663
1664
1665
1666
1667
1668
1669
1670
1671 if (tcp_write_queue_empty(sk) && sk->sk_socket &&
1672 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) &&
1673 (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
1674 tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED);
1675 }
1676}
1677
1678
1679static bool tcp_minshall_check(const struct tcp_sock *tp)
1680{
1681 return after(tp->snd_sml, tp->snd_una) &&
1682 !after(tp->snd_sml, tp->snd_nxt);
1683}
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
1694 const struct sk_buff *skb)
1695{
1696 if (skb->len < tcp_skb_pcount(skb) * mss_now)
1697 tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
1698}
1699
1700
1701
1702
1703
1704
1705
1706
1707static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
1708 int nonagle)
1709{
1710 return partial &&
1711 ((nonagle & TCP_NAGLE_CORK) ||
1712 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1713}
1714
1715
1716
1717
1718u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
1719 int min_tso_segs)
1720{
1721 u32 bytes, segs;
1722
1723 bytes = min(sk->sk_pacing_rate >> sk->sk_pacing_shift,
1724 sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
1725
1726
1727
1728
1729
1730
1731 segs = max_t(u32, bytes / mss_now, min_tso_segs);
1732
1733 return segs;
1734}
1735EXPORT_SYMBOL(tcp_tso_autosize);
1736
1737
1738
1739
1740static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
1741{
1742 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
1743 u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
1744
1745 if (!tso_segs)
1746 tso_segs = tcp_tso_autosize(sk, mss_now,
1747 sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
1748 return min_t(u32, tso_segs, sk->sk_gso_max_segs);
1749}
1750
1751
1752static unsigned int tcp_mss_split_point(const struct sock *sk,
1753 const struct sk_buff *skb,
1754 unsigned int mss_now,
1755 unsigned int max_segs,
1756 int nonagle)
1757{
1758 const struct tcp_sock *tp = tcp_sk(sk);
1759 u32 partial, needed, window, max_len;
1760
1761 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1762 max_len = mss_now * max_segs;
1763
1764 if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
1765 return max_len;
1766
1767 needed = min(skb->len, window);
1768
1769 if (max_len <= needed)
1770 return max_len;
1771
1772 partial = needed % mss_now;
1773
1774
1775
1776
1777 if (tcp_nagle_check(partial != 0, tp, nonagle))
1778 return needed - partial;
1779
1780 return needed;
1781}
1782
1783
1784
1785
1786static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
1787 const struct sk_buff *skb)
1788{
1789 u32 in_flight, cwnd, halfcwnd;
1790
1791
1792 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
1793 tcp_skb_pcount(skb) == 1)
1794 return 1;
1795
1796 in_flight = tcp_packets_in_flight(tp);
1797 cwnd = tp->snd_cwnd;
1798 if (in_flight >= cwnd)
1799 return 0;
1800
1801
1802
1803
1804 halfcwnd = max(cwnd >> 1, 1U);
1805 return min(halfcwnd, cwnd - in_flight);
1806}
1807
1808
1809
1810
1811
1812static int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now)
1813{
1814 int tso_segs = tcp_skb_pcount(skb);
1815
1816 if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
1817 tcp_set_skb_tso_segs(skb, mss_now);
1818 tso_segs = tcp_skb_pcount(skb);
1819 }
1820 return tso_segs;
1821}
1822
1823
1824
1825
1826
1827static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
1828 unsigned int cur_mss, int nonagle)
1829{
1830
1831
1832
1833
1834
1835
1836 if (nonagle & TCP_NAGLE_PUSH)
1837 return true;
1838
1839
1840 if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
1841 return true;
1842
1843 if (!tcp_nagle_check(skb->len < cur_mss, tp, nonagle))
1844 return true;
1845
1846 return false;
1847}
1848
1849
1850static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
1851 const struct sk_buff *skb,
1852 unsigned int cur_mss)
1853{
1854 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
1855
1856 if (skb->len > cur_mss)
1857 end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
1858
1859 return !after(end_seq, tcp_wnd_end(tp));
1860}
1861
1862
1863
1864
1865
1866
1867
1868
1869static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
1870 struct sk_buff *skb, unsigned int len,
1871 unsigned int mss_now, gfp_t gfp)
1872{
1873 struct sk_buff *buff;
1874 int nlen = skb->len - len;
1875 u8 flags;
1876
1877
1878 if (skb->len != skb->data_len)
1879 return tcp_fragment(sk, tcp_queue, skb, len, mss_now, gfp);
1880
1881 buff = sk_stream_alloc_skb(sk, 0, gfp, true);
1882 if (unlikely(!buff))
1883 return -ENOMEM;
1884
1885 sk->sk_wmem_queued += buff->truesize;
1886 sk_mem_charge(sk, buff->truesize);
1887 buff->truesize += nlen;
1888 skb->truesize -= nlen;
1889
1890
1891 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1892 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1893 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1894
1895
1896 flags = TCP_SKB_CB(skb)->tcp_flags;
1897 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1898 TCP_SKB_CB(buff)->tcp_flags = flags;
1899
1900
1901 TCP_SKB_CB(buff)->sacked = 0;
1902
1903 tcp_skb_fragment_eor(skb, buff);
1904
1905 buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
1906 skb_split(skb, buff, len);
1907 tcp_fragment_tstamp(skb, buff);
1908
1909
1910 tcp_set_skb_tso_segs(skb, mss_now);
1911 tcp_set_skb_tso_segs(buff, mss_now);
1912
1913
1914 __skb_header_release(buff);
1915 tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
1916
1917 return 0;
1918}
1919
1920
1921
1922
1923
1924
1925static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1926 bool *is_cwnd_limited, u32 max_segs)
1927{
1928 const struct inet_connection_sock *icsk = inet_csk(sk);
1929 u32 age, send_win, cong_win, limit, in_flight;
1930 struct tcp_sock *tp = tcp_sk(sk);
1931 struct sk_buff *head;
1932 int win_divisor;
1933
1934 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1935 goto send_now;
1936
1937 if (icsk->icsk_ca_state >= TCP_CA_Recovery)
1938 goto send_now;
1939
1940
1941
1942
1943 if ((s32)(tcp_jiffies32 - tp->lsndtime) > 0)
1944 goto send_now;
1945
1946 in_flight = tcp_packets_in_flight(tp);
1947
1948 BUG_ON(tcp_skb_pcount(skb) <= 1);
1949 BUG_ON(tp->snd_cwnd <= in_flight);
1950
1951 send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1952
1953
1954 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
1955
1956 limit = min(send_win, cong_win);
1957
1958
1959 if (limit >= max_segs * tp->mss_cache)
1960 goto send_now;
1961
1962
1963 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
1964 goto send_now;
1965
1966 win_divisor = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor);
1967 if (win_divisor) {
1968 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
1969
1970
1971
1972
1973 chunk /= win_divisor;
1974 if (limit >= chunk)
1975 goto send_now;
1976 } else {
1977
1978
1979
1980
1981
1982 if (limit > tcp_max_tso_deferred_mss(tp) * tp->mss_cache)
1983 goto send_now;
1984 }
1985
1986
1987 head = tcp_rtx_queue_head(sk);
1988 if (!head)
1989 goto send_now;
1990 age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp);
1991
1992 if (age < (tp->srtt_us >> 4))
1993 goto send_now;
1994
1995
1996
1997 if (cong_win < send_win && cong_win <= skb->len)
1998 *is_cwnd_limited = true;
1999
2000 return true;
2001
2002send_now:
2003 return false;
2004}
2005
2006static inline void tcp_mtu_check_reprobe(struct sock *sk)
2007{
2008 struct inet_connection_sock *icsk = inet_csk(sk);
2009 struct tcp_sock *tp = tcp_sk(sk);
2010 struct net *net = sock_net(sk);
2011 u32 interval;
2012 s32 delta;
2013
2014 interval = net->ipv4.sysctl_tcp_probe_interval;
2015 delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp;
2016 if (unlikely(delta >= interval * HZ)) {
2017 int mss = tcp_current_mss(sk);
2018
2019
2020 icsk->icsk_mtup.probe_size = 0;
2021 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp +
2022 sizeof(struct tcphdr) +
2023 icsk->icsk_af_ops->net_header_len;
2024 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
2025
2026
2027 icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
2028 }
2029}
2030
2031static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
2032{
2033 struct sk_buff *skb, *next;
2034
2035 skb = tcp_send_head(sk);
2036 tcp_for_write_queue_from_safe(skb, next, sk) {
2037 if (len <= skb->len)
2038 break;
2039
2040 if (unlikely(TCP_SKB_CB(skb)->eor))
2041 return false;
2042
2043 len -= skb->len;
2044 }
2045
2046 return true;
2047}
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058static int tcp_mtu_probe(struct sock *sk)
2059{
2060 struct inet_connection_sock *icsk = inet_csk(sk);
2061 struct tcp_sock *tp = tcp_sk(sk);
2062 struct sk_buff *skb, *nskb, *next;
2063 struct net *net = sock_net(sk);
2064 int probe_size;
2065 int size_needed;
2066 int copy, len;
2067 int mss_now;
2068 int interval;
2069
2070
2071
2072
2073
2074
2075 if (likely(!icsk->icsk_mtup.enabled ||
2076 icsk->icsk_mtup.probe_size ||
2077 inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
2078 tp->snd_cwnd < 11 ||
2079 tp->rx_opt.num_sacks || tp->rx_opt.dsack))
2080 return -1;
2081
2082
2083
2084
2085
2086 mss_now = tcp_current_mss(sk);
2087 probe_size = tcp_mtu_to_mss(sk, (icsk->icsk_mtup.search_high +
2088 icsk->icsk_mtup.search_low) >> 1);
2089 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
2090 interval = icsk->icsk_mtup.search_high - icsk->icsk_mtup.search_low;
2091
2092
2093
2094
2095 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
2096 interval < net->ipv4.sysctl_tcp_probe_threshold) {
2097
2098
2099
2100 tcp_mtu_check_reprobe(sk);
2101 return -1;
2102 }
2103
2104
2105 if (tp->write_seq - tp->snd_nxt < size_needed)
2106 return -1;
2107
2108 if (tp->snd_wnd < size_needed)
2109 return -1;
2110 if (after(tp->snd_nxt + size_needed, tcp_wnd_end(tp)))
2111 return 0;
2112
2113
2114 if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
2115 if (!tcp_packets_in_flight(tp))
2116 return -1;
2117 else
2118 return 0;
2119 }
2120
2121 if (!tcp_can_coalesce_send_queue_head(sk, probe_size))
2122 return -1;
2123
2124
2125 nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
2126 if (!nskb)
2127 return -1;
2128 sk->sk_wmem_queued += nskb->truesize;
2129 sk_mem_charge(sk, nskb->truesize);
2130
2131 skb = tcp_send_head(sk);
2132
2133 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
2134 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
2135 TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
2136 TCP_SKB_CB(nskb)->sacked = 0;
2137 nskb->csum = 0;
2138 nskb->ip_summed = skb->ip_summed;
2139
2140 tcp_insert_write_queue_before(nskb, skb, sk);
2141 tcp_highest_sack_replace(sk, skb, nskb);
2142
2143 len = 0;
2144 tcp_for_write_queue_from_safe(skb, next, sk) {
2145 copy = min_t(int, skb->len, probe_size - len);
2146 if (nskb->ip_summed) {
2147 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
2148 } else {
2149 __wsum csum = skb_copy_and_csum_bits(skb, 0,
2150 skb_put(nskb, copy),
2151 copy, 0);
2152 nskb->csum = csum_block_add(nskb->csum, csum, len);
2153 }
2154
2155 if (skb->len <= copy) {
2156
2157
2158 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
2159
2160
2161
2162 TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor;
2163 tcp_unlink_write_queue(skb, sk);
2164 sk_wmem_free_skb(sk, skb);
2165 } else {
2166 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
2167 ~(TCPHDR_FIN|TCPHDR_PSH);
2168 if (!skb_shinfo(skb)->nr_frags) {
2169 skb_pull(skb, copy);
2170 if (skb->ip_summed != CHECKSUM_PARTIAL)
2171 skb->csum = csum_partial(skb->data,
2172 skb->len, 0);
2173 } else {
2174 __pskb_trim_head(skb, copy);
2175 tcp_set_skb_tso_segs(skb, mss_now);
2176 }
2177 TCP_SKB_CB(skb)->seq += copy;
2178 }
2179
2180 len += copy;
2181
2182 if (len >= probe_size)
2183 break;
2184 }
2185 tcp_init_tso_segs(nskb, nskb->len);
2186
2187
2188
2189
2190 if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
2191
2192
2193 tp->snd_cwnd--;
2194 tcp_event_new_data_sent(sk, nskb);
2195
2196 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
2197 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
2198 tp->mtu_probe.probe_seq_end = TCP_SKB_CB(nskb)->end_seq;
2199
2200 return 1;
2201 }
2202
2203 return -1;
2204}
2205
2206static bool tcp_pacing_check(const struct sock *sk)
2207{
2208 return tcp_needs_internal_pacing(sk) &&
2209 hrtimer_active(&tcp_sk(sk)->pacing_timer);
2210}
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
2224 unsigned int factor)
2225{
2226 unsigned int limit;
2227
2228 limit = max(2 * skb->truesize, sk->sk_pacing_rate >> sk->sk_pacing_shift);
2229 limit = min_t(u32, limit,
2230 sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
2231 limit <<= factor;
2232
2233 if (refcount_read(&sk->sk_wmem_alloc) > limit) {
2234
2235
2236
2237
2238
2239 if (tcp_rtx_queue_empty(sk))
2240 return false;
2241
2242 set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
2243
2244
2245
2246
2247 smp_mb__after_atomic();
2248 if (refcount_read(&sk->sk_wmem_alloc) > limit)
2249 return true;
2250 }
2251 return false;
2252}
2253
2254static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new)
2255{
2256 const u32 now = tcp_jiffies32;
2257 enum tcp_chrono old = tp->chrono_type;
2258
2259 if (old > TCP_CHRONO_UNSPEC)
2260 tp->chrono_stat[old - 1] += now - tp->chrono_start;
2261 tp->chrono_start = now;
2262 tp->chrono_type = new;
2263}
2264
2265void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type)
2266{
2267 struct tcp_sock *tp = tcp_sk(sk);
2268
2269
2270
2271
2272
2273
2274 if (type > tp->chrono_type)
2275 tcp_chrono_set(tp, type);
2276}
2277
2278void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type)
2279{
2280 struct tcp_sock *tp = tcp_sk(sk);
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290 if (tcp_rtx_and_write_queues_empty(sk))
2291 tcp_chrono_set(tp, TCP_CHRONO_UNSPEC);
2292 else if (type == tp->chrono_type)
2293 tcp_chrono_set(tp, TCP_CHRONO_BUSY);
2294}
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2311 int push_one, gfp_t gfp)
2312{
2313 struct tcp_sock *tp = tcp_sk(sk);
2314 struct sk_buff *skb;
2315 unsigned int tso_segs, sent_pkts;
2316 int cwnd_quota;
2317 int result;
2318 bool is_cwnd_limited = false, is_rwnd_limited = false;
2319 u32 max_segs;
2320
2321 sent_pkts = 0;
2322
2323 tcp_mstamp_refresh(tp);
2324 if (!push_one) {
2325
2326 result = tcp_mtu_probe(sk);
2327 if (!result) {
2328 return false;
2329 } else if (result > 0) {
2330 sent_pkts = 1;
2331 }
2332 }
2333
2334 max_segs = tcp_tso_segs(sk, mss_now);
2335 while ((skb = tcp_send_head(sk))) {
2336 unsigned int limit;
2337
2338 if (tcp_pacing_check(sk))
2339 break;
2340
2341 tso_segs = tcp_init_tso_segs(skb, mss_now);
2342 BUG_ON(!tso_segs);
2343
2344 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
2345
2346 tcp_update_skb_after_send(tp, skb);
2347 goto repair;
2348 }
2349
2350 cwnd_quota = tcp_cwnd_test(tp, skb);
2351 if (!cwnd_quota) {
2352 if (push_one == 2)
2353
2354 cwnd_quota = 1;
2355 else
2356 break;
2357 }
2358
2359 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) {
2360 is_rwnd_limited = true;
2361 break;
2362 }
2363
2364 if (tso_segs == 1) {
2365 if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
2366 (tcp_skb_is_last(sk, skb) ?
2367 nonagle : TCP_NAGLE_PUSH))))
2368 break;
2369 } else {
2370 if (!push_one &&
2371 tcp_tso_should_defer(sk, skb, &is_cwnd_limited,
2372 max_segs))
2373 break;
2374 }
2375
2376 limit = mss_now;
2377 if (tso_segs > 1 && !tcp_urg_mode(tp))
2378 limit = tcp_mss_split_point(sk, skb, mss_now,
2379 min_t(unsigned int,
2380 cwnd_quota,
2381 max_segs),
2382 nonagle);
2383
2384 if (skb->len > limit &&
2385 unlikely(tso_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
2386 skb, limit, mss_now, gfp)))
2387 break;
2388
2389 if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
2390 clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags);
2391 if (tcp_small_queue_check(sk, skb, 0))
2392 break;
2393
2394 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
2395 break;
2396
2397repair:
2398
2399
2400
2401 tcp_event_new_data_sent(sk, skb);
2402
2403 tcp_minshall_update(tp, mss_now, skb);
2404 sent_pkts += tcp_skb_pcount(skb);
2405
2406 if (push_one)
2407 break;
2408 }
2409
2410 if (is_rwnd_limited)
2411 tcp_chrono_start(sk, TCP_CHRONO_RWND_LIMITED);
2412 else
2413 tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED);
2414
2415 if (likely(sent_pkts)) {
2416 if (tcp_in_cwnd_reduction(sk))
2417 tp->prr_out += sent_pkts;
2418
2419
2420 if (push_one != 2)
2421 tcp_schedule_loss_probe(sk, false);
2422 is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
2423 tcp_cwnd_validate(sk, is_cwnd_limited);
2424 return false;
2425 }
2426 return !tp->packets_out && !tcp_write_queue_empty(sk);
2427}
2428
2429bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
2430{
2431 struct inet_connection_sock *icsk = inet_csk(sk);
2432 struct tcp_sock *tp = tcp_sk(sk);
2433 u32 timeout, rto_delta_us;
2434 int early_retrans;
2435
2436
2437
2438
2439 if (tp->fastopen_rsk)
2440 return false;
2441
2442 early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
2443
2444
2445
2446 if ((early_retrans != 3 && early_retrans != 4) ||
2447 !tp->packets_out || !tcp_is_sack(tp) ||
2448 (icsk->icsk_ca_state != TCP_CA_Open &&
2449 icsk->icsk_ca_state != TCP_CA_CWR))
2450 return false;
2451
2452
2453
2454
2455
2456 if (tp->srtt_us) {
2457 timeout = usecs_to_jiffies(tp->srtt_us >> 2);
2458 if (tp->packets_out == 1)
2459 timeout += TCP_RTO_MIN;
2460 else
2461 timeout += TCP_TIMEOUT_MIN;
2462 } else {
2463 timeout = TCP_TIMEOUT_INIT;
2464 }
2465
2466
2467 rto_delta_us = advancing_rto ?
2468 jiffies_to_usecs(inet_csk(sk)->icsk_rto) :
2469 tcp_rto_delta_us(sk);
2470 if (rto_delta_us > 0)
2471 timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us));
2472
2473 inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
2474 TCP_RTO_MAX);
2475 return true;
2476}
2477
2478
2479
2480
2481
2482static bool skb_still_in_host_queue(const struct sock *sk,
2483 const struct sk_buff *skb)
2484{
2485 if (unlikely(skb_fclone_busy(sk, skb))) {
2486 NET_INC_STATS(sock_net(sk),
2487 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
2488 return true;
2489 }
2490 return false;
2491}
2492
2493
2494
2495
2496void tcp_send_loss_probe(struct sock *sk)
2497{
2498 struct tcp_sock *tp = tcp_sk(sk);
2499 struct sk_buff *skb;
2500 int pcount;
2501 int mss = tcp_current_mss(sk);
2502
2503 skb = tcp_send_head(sk);
2504 if (skb && tcp_snd_wnd_test(tp, skb, mss)) {
2505 pcount = tp->packets_out;
2506 tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
2507 if (tp->packets_out > pcount)
2508 goto probe_sent;
2509 goto rearm_timer;
2510 }
2511 skb = skb_rb_last(&sk->tcp_rtx_queue);
2512
2513
2514 if (tp->tlp_high_seq)
2515 goto rearm_timer;
2516
2517
2518 if (WARN_ON(!skb))
2519 goto rearm_timer;
2520
2521 if (skb_still_in_host_queue(sk, skb))
2522 goto rearm_timer;
2523
2524 pcount = tcp_skb_pcount(skb);
2525 if (WARN_ON(!pcount))
2526 goto rearm_timer;
2527
2528 if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
2529 if (unlikely(tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
2530 (pcount - 1) * mss, mss,
2531 GFP_ATOMIC)))
2532 goto rearm_timer;
2533 skb = skb_rb_next(skb);
2534 }
2535
2536 if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
2537 goto rearm_timer;
2538
2539 if (__tcp_retransmit_skb(sk, skb, 1))
2540 goto rearm_timer;
2541
2542
2543 tp->tlp_high_seq = tp->snd_nxt;
2544
2545probe_sent:
2546 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES);
2547
2548 inet_csk(sk)->icsk_pending = 0;
2549rearm_timer:
2550 tcp_rearm_rto(sk);
2551}
2552
2553
2554
2555
2556
2557void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
2558 int nonagle)
2559{
2560
2561
2562
2563
2564 if (unlikely(sk->sk_state == TCP_CLOSE))
2565 return;
2566
2567 if (tcp_write_xmit(sk, cur_mss, nonagle, 0,
2568 sk_gfp_mask(sk, GFP_ATOMIC)))
2569 tcp_check_probe_timer(sk);
2570}
2571
2572
2573
2574
2575void tcp_push_one(struct sock *sk, unsigned int mss_now)
2576{
2577 struct sk_buff *skb = tcp_send_head(sk);
2578
2579 BUG_ON(!skb || skb->len < mss_now);
2580
2581 tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation);
2582}
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636u32 __tcp_select_window(struct sock *sk)
2637{
2638 struct inet_connection_sock *icsk = inet_csk(sk);
2639 struct tcp_sock *tp = tcp_sk(sk);
2640
2641
2642
2643
2644
2645
2646 int mss = icsk->icsk_ack.rcv_mss;
2647 int free_space = tcp_space(sk);
2648 int allowed_space = tcp_full_space(sk);
2649 int full_space = min_t(int, tp->window_clamp, allowed_space);
2650 int window;
2651
2652 if (unlikely(mss > full_space)) {
2653 mss = full_space;
2654 if (mss <= 0)
2655 return 0;
2656 }
2657 if (free_space < (full_space >> 1)) {
2658 icsk->icsk_ack.quick = 0;
2659
2660 if (tcp_under_memory_pressure(sk))
2661 tp->rcv_ssthresh = min(tp->rcv_ssthresh,
2662 4U * tp->advmss);
2663
2664
2665
2666
2667 free_space = round_down(free_space, 1 << tp->rx_opt.rcv_wscale);
2668
2669
2670
2671
2672
2673
2674
2675
2676 if (free_space < (allowed_space >> 4) || free_space < mss)
2677 return 0;
2678 }
2679
2680 if (free_space > tp->rcv_ssthresh)
2681 free_space = tp->rcv_ssthresh;
2682
2683
2684
2685
2686 if (tp->rx_opt.rcv_wscale) {
2687 window = free_space;
2688
2689
2690
2691
2692
2693 window = ALIGN(window, (1 << tp->rx_opt.rcv_wscale));
2694 } else {
2695 window = tp->rcv_wnd;
2696
2697
2698
2699
2700
2701
2702
2703
2704 if (window <= free_space - mss || window > free_space)
2705 window = rounddown(free_space, mss);
2706 else if (mss == full_space &&
2707 free_space > window + (full_space >> 1))
2708 window = free_space;
2709 }
2710
2711 return window;
2712}
2713
2714void tcp_skb_collapse_tstamp(struct sk_buff *skb,
2715 const struct sk_buff *next_skb)
2716{
2717 if (unlikely(tcp_has_tx_tstamp(next_skb))) {
2718 const struct skb_shared_info *next_shinfo =
2719 skb_shinfo(next_skb);
2720 struct skb_shared_info *shinfo = skb_shinfo(skb);
2721
2722 shinfo->tx_flags |= next_shinfo->tx_flags & SKBTX_ANY_TSTAMP;
2723 shinfo->tskey = next_shinfo->tskey;
2724 TCP_SKB_CB(skb)->txstamp_ack |=
2725 TCP_SKB_CB(next_skb)->txstamp_ack;
2726 }
2727}
2728
2729
2730static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2731{
2732 struct tcp_sock *tp = tcp_sk(sk);
2733 struct sk_buff *next_skb = skb_rb_next(skb);
2734 int skb_size, next_skb_size;
2735
2736 skb_size = skb->len;
2737 next_skb_size = next_skb->len;
2738
2739 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
2740
2741 if (next_skb_size) {
2742 if (next_skb_size <= skb_availroom(skb))
2743 skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
2744 next_skb_size);
2745 else if (!skb_shift(skb, next_skb, next_skb_size))
2746 return false;
2747 }
2748 tcp_highest_sack_replace(sk, next_skb, skb);
2749
2750 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
2751 skb->ip_summed = CHECKSUM_PARTIAL;
2752
2753 if (skb->ip_summed != CHECKSUM_PARTIAL)
2754 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
2755
2756
2757 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
2758
2759
2760 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags;
2761
2762
2763
2764
2765 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
2766 TCP_SKB_CB(skb)->eor = TCP_SKB_CB(next_skb)->eor;
2767
2768
2769 tcp_clear_retrans_hints_partial(tp);
2770 if (next_skb == tp->retransmit_skb_hint)
2771 tp->retransmit_skb_hint = skb;
2772
2773 tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb));
2774
2775 tcp_skb_collapse_tstamp(skb, next_skb);
2776
2777 tcp_rtx_queue_unlink_and_free(next_skb, sk);
2778 return true;
2779}
2780
2781
2782static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
2783{
2784 if (tcp_skb_pcount(skb) > 1)
2785 return false;
2786 if (skb_cloned(skb))
2787 return false;
2788
2789 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2790 return false;
2791
2792 return true;
2793}
2794
2795
2796
2797
2798static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2799 int space)
2800{
2801 struct tcp_sock *tp = tcp_sk(sk);
2802 struct sk_buff *skb = to, *tmp;
2803 bool first = true;
2804
2805 if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
2806 return;
2807 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2808 return;
2809
2810 skb_rbtree_walk_from_safe(skb, tmp) {
2811 if (!tcp_can_collapse(sk, skb))
2812 break;
2813
2814 if (!tcp_skb_can_collapse_to(to))
2815 break;
2816
2817 space -= skb->len;
2818
2819 if (first) {
2820 first = false;
2821 continue;
2822 }
2823
2824 if (space < 0)
2825 break;
2826
2827 if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
2828 break;
2829
2830 if (!tcp_collapse_retrans(sk, to))
2831 break;
2832 }
2833}
2834
2835
2836
2837
2838
2839int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2840{
2841 struct inet_connection_sock *icsk = inet_csk(sk);
2842 struct tcp_sock *tp = tcp_sk(sk);
2843 unsigned int cur_mss;
2844 int diff, len, err;
2845
2846
2847
2848 if (icsk->icsk_mtup.probe_size)
2849 icsk->icsk_mtup.probe_size = 0;
2850
2851
2852
2853
2854 if (refcount_read(&sk->sk_wmem_alloc) >
2855 min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2),
2856 sk->sk_sndbuf))
2857 return -EAGAIN;
2858
2859 if (skb_still_in_host_queue(sk, skb))
2860 return -EBUSY;
2861
2862 if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
2863 if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
2864 BUG();
2865 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
2866 return -ENOMEM;
2867 }
2868
2869 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
2870 return -EHOSTUNREACH;
2871
2872 cur_mss = tcp_current_mss(sk);
2873
2874
2875
2876
2877
2878
2879 if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) &&
2880 TCP_SKB_CB(skb)->seq != tp->snd_una)
2881 return -EAGAIN;
2882
2883 len = cur_mss * segs;
2884 if (skb->len > len) {
2885 if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len,
2886 cur_mss, GFP_ATOMIC))
2887 return -ENOMEM;
2888 } else {
2889 if (skb_unclone(skb, GFP_ATOMIC))
2890 return -ENOMEM;
2891
2892 diff = tcp_skb_pcount(skb);
2893 tcp_set_skb_tso_segs(skb, cur_mss);
2894 diff -= tcp_skb_pcount(skb);
2895 if (diff)
2896 tcp_adjust_pcount(sk, skb, diff);
2897 if (skb->len < cur_mss)
2898 tcp_retrans_try_collapse(sk, skb, cur_mss);
2899 }
2900
2901
2902 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN_ECN) == TCPHDR_SYN_ECN)
2903 tcp_ecn_clear_syn(sk, skb);
2904
2905
2906 segs = tcp_skb_pcount(skb);
2907 TCP_ADD_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS, segs);
2908 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2909 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
2910 tp->total_retrans += segs;
2911
2912
2913
2914
2915
2916 if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) ||
2917 skb_headroom(skb) >= 0xFFFF)) {
2918 struct sk_buff *nskb;
2919
2920 tcp_skb_tsorted_save(skb) {
2921 nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
2922 err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
2923 -ENOBUFS;
2924 } tcp_skb_tsorted_restore(skb);
2925
2926 if (!err) {
2927 tcp_update_skb_after_send(tp, skb);
2928 tcp_rate_skb_sent(sk, skb);
2929 }
2930 } else {
2931 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2932 }
2933
2934 if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG))
2935 tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB,
2936 TCP_SKB_CB(skb)->seq, segs, err);
2937
2938 if (likely(!err)) {
2939 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
2940 trace_tcp_retransmit_skb(sk, skb);
2941 } else if (err != -EBUSY) {
2942 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
2943 }
2944 return err;
2945}
2946
2947int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2948{
2949 struct tcp_sock *tp = tcp_sk(sk);
2950 int err = __tcp_retransmit_skb(sk, skb, segs);
2951
2952 if (err == 0) {
2953#if FASTRETRANS_DEBUG > 0
2954 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2955 net_dbg_ratelimited("retrans_out leaked\n");
2956 }
2957#endif
2958 TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
2959 tp->retrans_out += tcp_skb_pcount(skb);
2960
2961
2962 if (!tp->retrans_stamp)
2963 tp->retrans_stamp = tcp_skb_timestamp(skb);
2964
2965 }
2966
2967 if (tp->undo_retrans < 0)
2968 tp->undo_retrans = 0;
2969 tp->undo_retrans += tcp_skb_pcount(skb);
2970 return err;
2971}
2972
2973
2974
2975
2976
2977
2978void tcp_xmit_retransmit_queue(struct sock *sk)
2979{
2980 const struct inet_connection_sock *icsk = inet_csk(sk);
2981 struct sk_buff *skb, *rtx_head, *hole = NULL;
2982 struct tcp_sock *tp = tcp_sk(sk);
2983 u32 max_segs;
2984 int mib_idx;
2985
2986 if (!tp->packets_out)
2987 return;
2988
2989 rtx_head = tcp_rtx_queue_head(sk);
2990 skb = tp->retransmit_skb_hint ?: rtx_head;
2991 max_segs = tcp_tso_segs(sk, tcp_current_mss(sk));
2992 skb_rbtree_walk_from(skb) {
2993 __u8 sacked;
2994 int segs;
2995
2996 if (tcp_pacing_check(sk))
2997 break;
2998
2999
3000 if (!hole)
3001 tp->retransmit_skb_hint = skb;
3002
3003 segs = tp->snd_cwnd - tcp_packets_in_flight(tp);
3004 if (segs <= 0)
3005 return;
3006 sacked = TCP_SKB_CB(skb)->sacked;
3007
3008
3009
3010 segs = min_t(int, segs, max_segs);
3011
3012 if (tp->retrans_out >= tp->lost_out) {
3013 break;
3014 } else if (!(sacked & TCPCB_LOST)) {
3015 if (!hole && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
3016 hole = skb;
3017 continue;
3018
3019 } else {
3020 if (icsk->icsk_ca_state != TCP_CA_Loss)
3021 mib_idx = LINUX_MIB_TCPFASTRETRANS;
3022 else
3023 mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
3024 }
3025
3026 if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
3027 continue;
3028
3029 if (tcp_small_queue_check(sk, skb, 1))
3030 return;
3031
3032 if (tcp_retransmit_skb(sk, skb, segs))
3033 return;
3034
3035 NET_ADD_STATS(sock_net(sk), mib_idx, tcp_skb_pcount(skb));
3036
3037 if (tcp_in_cwnd_reduction(sk))
3038 tp->prr_out += tcp_skb_pcount(skb);
3039
3040 if (skb == rtx_head &&
3041 icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT)
3042 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3043 inet_csk(sk)->icsk_rto,
3044 TCP_RTO_MAX);
3045 }
3046}
3047
3048
3049
3050
3051
3052
3053
3054
3055void sk_forced_mem_schedule(struct sock *sk, int size)
3056{
3057 int amt;
3058
3059 if (size <= sk->sk_forward_alloc)
3060 return;
3061 amt = sk_mem_pages(size);
3062 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
3063 sk_memory_allocated_add(sk, amt);
3064
3065 if (mem_cgroup_sockets_enabled && sk->sk_memcg)
3066 mem_cgroup_charge_skmem(sk->sk_memcg, amt);
3067}
3068
3069
3070
3071
3072void tcp_send_fin(struct sock *sk)
3073{
3074 struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk);
3075 struct tcp_sock *tp = tcp_sk(sk);
3076
3077
3078
3079
3080
3081
3082 if (!tskb && tcp_under_memory_pressure(sk))
3083 tskb = skb_rb_last(&sk->tcp_rtx_queue);
3084
3085 if (tskb) {
3086coalesce:
3087 TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
3088 TCP_SKB_CB(tskb)->end_seq++;
3089 tp->write_seq++;
3090 if (tcp_write_queue_empty(sk)) {
3091
3092
3093
3094
3095
3096
3097 tp->snd_nxt++;
3098 return;
3099 }
3100 } else {
3101 skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation);
3102 if (unlikely(!skb)) {
3103 if (tskb)
3104 goto coalesce;
3105 return;
3106 }
3107 INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
3108 skb_reserve(skb, MAX_TCP_HEADER);
3109 sk_forced_mem_schedule(sk, skb->truesize);
3110
3111 tcp_init_nondata_skb(skb, tp->write_seq,
3112 TCPHDR_ACK | TCPHDR_FIN);
3113 tcp_queue_skb(sk, skb);
3114 }
3115 __tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF);
3116}
3117
3118
3119
3120
3121
3122
3123void tcp_send_active_reset(struct sock *sk, gfp_t priority)
3124{
3125 struct sk_buff *skb;
3126
3127 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
3128
3129
3130 skb = alloc_skb(MAX_TCP_HEADER, priority);
3131 if (!skb) {
3132 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
3133 return;
3134 }
3135
3136
3137 skb_reserve(skb, MAX_TCP_HEADER);
3138 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
3139 TCPHDR_ACK | TCPHDR_RST);
3140 tcp_mstamp_refresh(tcp_sk(sk));
3141
3142 if (tcp_transmit_skb(sk, skb, 0, priority))
3143 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
3144
3145
3146
3147
3148 trace_tcp_send_reset(sk, NULL);
3149}
3150
3151
3152
3153
3154
3155
3156
3157int tcp_send_synack(struct sock *sk)
3158{
3159 struct sk_buff *skb;
3160
3161 skb = tcp_rtx_queue_head(sk);
3162 if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
3163 pr_err("%s: wrong queue state\n", __func__);
3164 return -EFAULT;
3165 }
3166 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
3167 if (skb_cloned(skb)) {
3168 struct sk_buff *nskb;
3169
3170 tcp_skb_tsorted_save(skb) {
3171 nskb = skb_copy(skb, GFP_ATOMIC);
3172 } tcp_skb_tsorted_restore(skb);
3173 if (!nskb)
3174 return -ENOMEM;
3175 INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor);
3176 tcp_rtx_queue_unlink_and_free(skb, sk);
3177 __skb_header_release(nskb);
3178 tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb);
3179 sk->sk_wmem_queued += nskb->truesize;
3180 sk_mem_charge(sk, nskb->truesize);
3181 skb = nskb;
3182 }
3183
3184 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK;
3185 tcp_ecn_send_synack(sk, skb);
3186 }
3187 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
3188}
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
3200 struct request_sock *req,
3201 struct tcp_fastopen_cookie *foc,
3202 enum tcp_synack_type synack_type)
3203{
3204 struct inet_request_sock *ireq = inet_rsk(req);
3205 const struct tcp_sock *tp = tcp_sk(sk);
3206 struct tcp_md5sig_key *md5 = NULL;
3207 struct tcp_out_options opts;
3208 struct sk_buff *skb;
3209 int tcp_header_size;
3210 struct tcphdr *th;
3211 int mss;
3212
3213 skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
3214 if (unlikely(!skb)) {
3215 dst_release(dst);
3216 return NULL;
3217 }
3218
3219 skb_reserve(skb, MAX_TCP_HEADER);
3220
3221 switch (synack_type) {
3222 case TCP_SYNACK_NORMAL:
3223 skb_set_owner_w(skb, req_to_sk(req));
3224 break;
3225 case TCP_SYNACK_COOKIE:
3226
3227
3228
3229 break;
3230 case TCP_SYNACK_FASTOPEN:
3231
3232
3233
3234
3235 skb_set_owner_w(skb, (struct sock *)sk);
3236 break;
3237 }
3238 skb_dst_set(skb, dst);
3239
3240 mss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
3241
3242 memset(&opts, 0, sizeof(opts));
3243#ifdef CONFIG_SYN_COOKIES
3244 if (unlikely(req->cookie_ts))
3245 skb->skb_mstamp = cookie_init_timestamp(req);
3246 else
3247#endif
3248 skb->skb_mstamp = tcp_clock_us();
3249
3250#ifdef CONFIG_TCP_MD5SIG
3251 rcu_read_lock();
3252 md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
3253#endif
3254 skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
3255 tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
3256 foc) + sizeof(*th);
3257
3258 skb_push(skb, tcp_header_size);
3259 skb_reset_transport_header(skb);
3260
3261 th = (struct tcphdr *)skb->data;
3262 memset(th, 0, sizeof(struct tcphdr));
3263 th->syn = 1;
3264 th->ack = 1;
3265 tcp_ecn_make_synack(req, th);
3266 th->source = htons(ireq->ir_num);
3267 th->dest = ireq->ir_rmt_port;
3268 skb->mark = ireq->ir_mark;
3269 skb->ip_summed = CHECKSUM_PARTIAL;
3270 th->seq = htonl(tcp_rsk(req)->snt_isn);
3271
3272 th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
3273
3274
3275 th->window = htons(min(req->rsk_rcv_wnd, 65535U));
3276 tcp_options_write((__be32 *)(th + 1), NULL, &opts);
3277 th->doff = (tcp_header_size >> 2);
3278 __TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
3279
3280#ifdef CONFIG_TCP_MD5SIG
3281
3282 if (md5)
3283 tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
3284 md5, req_to_sk(req), skb);
3285 rcu_read_unlock();
3286#endif
3287
3288
3289 skb->tstamp = 0;
3290 return skb;
3291}
3292EXPORT_SYMBOL(tcp_make_synack);
3293
3294static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst)
3295{
3296 struct inet_connection_sock *icsk = inet_csk(sk);
3297 const struct tcp_congestion_ops *ca;
3298 u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
3299
3300 if (ca_key == TCP_CA_UNSPEC)
3301 return;
3302
3303 rcu_read_lock();
3304 ca = tcp_ca_find_key(ca_key);
3305 if (likely(ca && try_module_get(ca->owner))) {
3306 module_put(icsk->icsk_ca_ops->owner);
3307 icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst);
3308 icsk->icsk_ca_ops = ca;
3309 }
3310 rcu_read_unlock();
3311}
3312
3313
3314static void tcp_connect_init(struct sock *sk)
3315{
3316 const struct dst_entry *dst = __sk_dst_get(sk);
3317 struct tcp_sock *tp = tcp_sk(sk);
3318 __u8 rcv_wscale;
3319 u32 rcv_wnd;
3320
3321
3322
3323
3324 tp->tcp_header_len = sizeof(struct tcphdr);
3325 if (sock_net(sk)->ipv4.sysctl_tcp_timestamps)
3326 tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
3327
3328#ifdef CONFIG_TCP_MD5SIG
3329 if (tp->af_specific->md5_lookup(sk, sk))
3330 tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
3331#endif
3332
3333
3334 if (tp->rx_opt.user_mss)
3335 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
3336 tp->max_window = 0;
3337 tcp_mtup_init(sk);
3338 tcp_sync_mss(sk, dst_mtu(dst));
3339
3340 tcp_ca_dst_init(sk, dst);
3341
3342 if (!tp->window_clamp)
3343 tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
3344 tp->advmss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
3345
3346 tcp_initialize_rcv_mss(sk);
3347
3348
3349 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
3350 (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
3351 tp->window_clamp = tcp_full_space(sk);
3352
3353 rcv_wnd = tcp_rwnd_init_bpf(sk);
3354 if (rcv_wnd == 0)
3355 rcv_wnd = dst_metric(dst, RTAX_INITRWND);
3356
3357 tcp_select_initial_window(sk, tcp_full_space(sk),
3358 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
3359 &tp->rcv_wnd,
3360 &tp->window_clamp,
3361 sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
3362 &rcv_wscale,
3363 rcv_wnd);
3364
3365 tp->rx_opt.rcv_wscale = rcv_wscale;
3366 tp->rcv_ssthresh = tp->rcv_wnd;
3367
3368 sk->sk_err = 0;
3369 sock_reset_flag(sk, SOCK_DONE);
3370 tp->snd_wnd = 0;
3371 tcp_init_wl(tp, 0);
3372 tp->snd_una = tp->write_seq;
3373 tp->snd_sml = tp->write_seq;
3374 tp->snd_up = tp->write_seq;
3375 tp->snd_nxt = tp->write_seq;
3376
3377 if (likely(!tp->repair))
3378 tp->rcv_nxt = 0;
3379 else
3380 tp->rcv_tstamp = tcp_jiffies32;
3381 tp->rcv_wup = tp->rcv_nxt;
3382 tp->copied_seq = tp->rcv_nxt;
3383
3384 inet_csk(sk)->icsk_rto = tcp_timeout_init(sk);
3385 inet_csk(sk)->icsk_retransmits = 0;
3386 tcp_clear_retrans(tp);
3387}
3388
3389static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
3390{
3391 struct tcp_sock *tp = tcp_sk(sk);
3392 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
3393
3394 tcb->end_seq += skb->len;
3395 __skb_header_release(skb);
3396 sk->sk_wmem_queued += skb->truesize;
3397 sk_mem_charge(sk, skb->truesize);
3398 tp->write_seq = tcb->end_seq;
3399 tp->packets_out += tcp_skb_pcount(skb);
3400}
3401
3402
3403
3404
3405
3406
3407
3408
3409static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3410{
3411 struct tcp_sock *tp = tcp_sk(sk);
3412 struct tcp_fastopen_request *fo = tp->fastopen_req;
3413 int space, err = 0;
3414 struct sk_buff *syn_data;
3415
3416 tp->rx_opt.mss_clamp = tp->advmss;
3417 if (!tcp_fastopen_cookie_check(sk, &tp->rx_opt.mss_clamp, &fo->cookie))
3418 goto fallback;
3419
3420
3421
3422
3423
3424 tp->rx_opt.mss_clamp = tcp_mss_clamp(tp, tp->rx_opt.mss_clamp);
3425
3426 space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
3427 MAX_TCP_OPTION_SPACE;
3428
3429 space = min_t(size_t, space, fo->size);
3430
3431
3432 space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));
3433
3434 syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation, false);
3435 if (!syn_data)
3436 goto fallback;
3437 syn_data->ip_summed = CHECKSUM_PARTIAL;
3438 memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
3439 if (space) {
3440 int copied = copy_from_iter(skb_put(syn_data, space), space,
3441 &fo->data->msg_iter);
3442 if (unlikely(!copied)) {
3443 tcp_skb_tsorted_anchor_cleanup(syn_data);
3444 kfree_skb(syn_data);
3445 goto fallback;
3446 }
3447 if (copied != space) {
3448 skb_trim(syn_data, copied);
3449 space = copied;
3450 }
3451 }
3452
3453 if (space == fo->size)
3454 fo->data = NULL;
3455 fo->copied = space;
3456
3457 tcp_connect_queue_skb(sk, syn_data);
3458 if (syn_data->len)
3459 tcp_chrono_start(sk, TCP_CHRONO_BUSY);
3460
3461 err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
3462
3463 syn->skb_mstamp = syn_data->skb_mstamp;
3464
3465
3466
3467
3468
3469
3470 TCP_SKB_CB(syn_data)->seq++;
3471 TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH;
3472 if (!err) {
3473 tp->syn_data = (fo->copied > 0);
3474 tcp_rbtree_insert(&sk->tcp_rtx_queue, syn_data);
3475 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
3476 goto done;
3477 }
3478
3479
3480 __skb_queue_tail(&sk->sk_write_queue, syn_data);
3481 tp->packets_out -= tcp_skb_pcount(syn_data);
3482
3483fallback:
3484
3485 if (fo->cookie.len > 0)
3486 fo->cookie.len = 0;
3487 err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation);
3488 if (err)
3489 tp->syn_fastopen = 0;
3490done:
3491 fo->cookie.len = -1;
3492 return err;
3493}
3494
3495
3496int tcp_connect(struct sock *sk)
3497{
3498 struct tcp_sock *tp = tcp_sk(sk);
3499 struct sk_buff *buff;
3500 int err;
3501
3502 tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB, 0, NULL);
3503
3504 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
3505 return -EHOSTUNREACH;
3506
3507 tcp_connect_init(sk);
3508
3509 if (unlikely(tp->repair)) {
3510 tcp_finish_connect(sk, NULL);
3511 return 0;
3512 }
3513
3514 buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, true);
3515 if (unlikely(!buff))
3516 return -ENOBUFS;
3517
3518 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
3519 tcp_mstamp_refresh(tp);
3520 tp->retrans_stamp = tcp_time_stamp(tp);
3521 tcp_connect_queue_skb(sk, buff);
3522 tcp_ecn_send_syn(sk, buff);
3523 tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
3524
3525
3526 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
3527 tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
3528 if (err == -ECONNREFUSED)
3529 return err;
3530
3531
3532
3533
3534 tp->snd_nxt = tp->write_seq;
3535 tp->pushed_seq = tp->write_seq;
3536 buff = tcp_send_head(sk);
3537 if (unlikely(buff)) {
3538 tp->snd_nxt = TCP_SKB_CB(buff)->seq;
3539 tp->pushed_seq = TCP_SKB_CB(buff)->seq;
3540 }
3541 TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
3542
3543
3544 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3545 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
3546 return 0;
3547}
3548EXPORT_SYMBOL(tcp_connect);
3549
3550
3551
3552
3553
3554void tcp_send_delayed_ack(struct sock *sk)
3555{
3556 struct inet_connection_sock *icsk = inet_csk(sk);
3557 int ato = icsk->icsk_ack.ato;
3558 unsigned long timeout;
3559
3560 tcp_ca_event(sk, CA_EVENT_DELAYED_ACK);
3561
3562 if (ato > TCP_DELACK_MIN) {
3563 const struct tcp_sock *tp = tcp_sk(sk);
3564 int max_ato = HZ / 2;
3565
3566 if (icsk->icsk_ack.pingpong ||
3567 (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
3568 max_ato = TCP_DELACK_MAX;
3569
3570
3571
3572
3573
3574
3575
3576 if (tp->srtt_us) {
3577 int rtt = max_t(int, usecs_to_jiffies(tp->srtt_us >> 3),
3578 TCP_DELACK_MIN);
3579
3580 if (rtt < max_ato)
3581 max_ato = rtt;
3582 }
3583
3584 ato = min(ato, max_ato);
3585 }
3586
3587
3588 timeout = jiffies + ato;
3589
3590
3591 if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
3592
3593
3594
3595 if (icsk->icsk_ack.blocked ||
3596 time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
3597 tcp_send_ack(sk);
3598 return;
3599 }
3600
3601 if (!time_before(timeout, icsk->icsk_ack.timeout))
3602 timeout = icsk->icsk_ack.timeout;
3603 }
3604 icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
3605 icsk->icsk_ack.timeout = timeout;
3606 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
3607}
3608
3609
3610void tcp_send_ack(struct sock *sk)
3611{
3612 struct sk_buff *buff;
3613
3614
3615 if (sk->sk_state == TCP_CLOSE)
3616 return;
3617
3618 tcp_ca_event(sk, CA_EVENT_NON_DELAYED_ACK);
3619
3620
3621
3622
3623
3624 buff = alloc_skb(MAX_TCP_HEADER,
3625 sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN));
3626 if (unlikely(!buff)) {
3627 inet_csk_schedule_ack(sk);
3628 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
3629 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
3630 TCP_DELACK_MAX, TCP_RTO_MAX);
3631 return;
3632 }
3633
3634
3635 skb_reserve(buff, MAX_TCP_HEADER);
3636 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
3637
3638
3639
3640
3641
3642 skb_set_tcp_pure_ack(buff);
3643
3644
3645 tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0);
3646}
3647EXPORT_SYMBOL_GPL(tcp_send_ack);
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
3661{
3662 struct tcp_sock *tp = tcp_sk(sk);
3663 struct sk_buff *skb;
3664
3665
3666 skb = alloc_skb(MAX_TCP_HEADER,
3667 sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN));
3668 if (!skb)
3669 return -1;
3670
3671
3672 skb_reserve(skb, MAX_TCP_HEADER);
3673
3674
3675
3676
3677 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
3678 NET_INC_STATS(sock_net(sk), mib);
3679 return tcp_transmit_skb(sk, skb, 0, (__force gfp_t)0);
3680}
3681
3682
3683void tcp_send_window_probe(struct sock *sk)
3684{
3685 if (sk->sk_state == TCP_ESTABLISHED) {
3686 tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
3687 tcp_mstamp_refresh(tcp_sk(sk));
3688 tcp_xmit_probe_skb(sk, 0, LINUX_MIB_TCPWINPROBE);
3689 }
3690}
3691
3692
3693int tcp_write_wakeup(struct sock *sk, int mib)
3694{
3695 struct tcp_sock *tp = tcp_sk(sk);
3696 struct sk_buff *skb;
3697
3698 if (sk->sk_state == TCP_CLOSE)
3699 return -1;
3700
3701 skb = tcp_send_head(sk);
3702 if (skb && before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
3703 int err;
3704 unsigned int mss = tcp_current_mss(sk);
3705 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
3706
3707 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
3708 tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
3709
3710
3711
3712
3713
3714 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
3715 skb->len > mss) {
3716 seg_size = min(seg_size, mss);
3717 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3718 if (tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
3719 skb, seg_size, mss, GFP_ATOMIC))
3720 return -1;
3721 } else if (!tcp_skb_pcount(skb))
3722 tcp_set_skb_tso_segs(skb, mss);
3723
3724 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3725 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
3726 if (!err)
3727 tcp_event_new_data_sent(sk, skb);
3728 return err;
3729 } else {
3730 if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
3731 tcp_xmit_probe_skb(sk, 1, mib);
3732 return tcp_xmit_probe_skb(sk, 0, mib);
3733 }
3734}
3735
3736
3737
3738
3739void tcp_send_probe0(struct sock *sk)
3740{
3741 struct inet_connection_sock *icsk = inet_csk(sk);
3742 struct tcp_sock *tp = tcp_sk(sk);
3743 struct net *net = sock_net(sk);
3744 unsigned long probe_max;
3745 int err;
3746
3747 err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
3748
3749 if (tp->packets_out || tcp_write_queue_empty(sk)) {
3750
3751 icsk->icsk_probes_out = 0;
3752 icsk->icsk_backoff = 0;
3753 return;
3754 }
3755
3756 if (err <= 0) {
3757 if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2)
3758 icsk->icsk_backoff++;
3759 icsk->icsk_probes_out++;
3760 probe_max = TCP_RTO_MAX;
3761 } else {
3762
3763
3764
3765
3766
3767
3768 if (!icsk->icsk_probes_out)
3769 icsk->icsk_probes_out = 1;
3770 probe_max = TCP_RESOURCE_PROBE_INTERVAL;
3771 }
3772 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3773 tcp_probe0_when(sk, probe_max),
3774 TCP_RTO_MAX);
3775}
3776
3777int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
3778{
3779 const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific;
3780 struct flowi fl;
3781 int res;
3782
3783 tcp_rsk(req)->txhash = net_tx_rndhash();
3784 res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL);
3785 if (!res) {
3786 __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
3787 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
3788 if (unlikely(tcp_passive_fastopen(sk)))
3789 tcp_sk(sk)->total_retrans++;
3790 trace_tcp_retransmit_synack(sk, req);
3791 }
3792 return res;
3793}
3794EXPORT_SYMBOL(tcp_rtx_synack);
3795