1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38#define pr_fmt(fmt) "TCP: " fmt
39
40#include <net/tcp.h>
41
42#include <linux/compiler.h>
43#include <linux/gfp.h>
44#include <linux/module.h>
45#include <linux/static_key.h>
46
47#include <trace/events/tcp.h>
48
49
50
51
52void tcp_mstamp_refresh(struct tcp_sock *tp)
53{
54 u64 val = tcp_clock_ns();
55
56 tp->tcp_clock_cache = val;
57 tp->tcp_mstamp = div_u64(val, NSEC_PER_USEC);
58}
59
60static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
61 int push_one, gfp_t gfp);
62
63
64static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
65{
66 struct inet_connection_sock *icsk = inet_csk(sk);
67 struct tcp_sock *tp = tcp_sk(sk);
68 unsigned int prior_packets = tp->packets_out;
69
70 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
71
72 __skb_unlink(skb, &sk->sk_write_queue);
73 tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
74
75 tp->packets_out += tcp_skb_pcount(skb);
76 if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
77 tcp_rearm_rto(sk);
78
79 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
80 tcp_skb_pcount(skb));
81}
82
83
84
85
86
87
88
89
90static inline __u32 tcp_acceptable_seq(const struct sock *sk)
91{
92 const struct tcp_sock *tp = tcp_sk(sk);
93
94 if (!before(tcp_wnd_end(tp), tp->snd_nxt) ||
95 (tp->rx_opt.wscale_ok &&
96 ((tp->snd_nxt - tcp_wnd_end(tp)) < (1 << tp->rx_opt.rcv_wscale))))
97 return tp->snd_nxt;
98 else
99 return tcp_wnd_end(tp);
100}
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116static __u16 tcp_advertise_mss(struct sock *sk)
117{
118 struct tcp_sock *tp = tcp_sk(sk);
119 const struct dst_entry *dst = __sk_dst_get(sk);
120 int mss = tp->advmss;
121
122 if (dst) {
123 unsigned int metric = dst_metric_advmss(dst);
124
125 if (metric < mss) {
126 mss = metric;
127 tp->advmss = mss;
128 }
129 }
130
131 return (__u16)mss;
132}
133
134
135
136
137void tcp_cwnd_restart(struct sock *sk, s32 delta)
138{
139 struct tcp_sock *tp = tcp_sk(sk);
140 u32 restart_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
141 u32 cwnd = tp->snd_cwnd;
142
143 tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
144
145 tp->snd_ssthresh = tcp_current_ssthresh(sk);
146 restart_cwnd = min(restart_cwnd, cwnd);
147
148 while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
149 cwnd >>= 1;
150 tp->snd_cwnd = max(cwnd, restart_cwnd);
151 tp->snd_cwnd_stamp = tcp_jiffies32;
152 tp->snd_cwnd_used = 0;
153}
154
155
156static void tcp_event_data_sent(struct tcp_sock *tp,
157 struct sock *sk)
158{
159 struct inet_connection_sock *icsk = inet_csk(sk);
160 const u32 now = tcp_jiffies32;
161
162 if (tcp_packets_in_flight(tp) == 0)
163 tcp_ca_event(sk, CA_EVENT_TX_START);
164
165
166
167
168
169
170 if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) &&
171 (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
172 inet_csk_inc_pingpong_cnt(sk);
173
174 tp->lsndtime = now;
175}
176
177
178static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
179 u32 rcv_nxt)
180{
181 struct tcp_sock *tp = tcp_sk(sk);
182
183 if (unlikely(tp->compressed_ack > TCP_FASTRETRANS_THRESH)) {
184 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
185 tp->compressed_ack - TCP_FASTRETRANS_THRESH);
186 tp->compressed_ack = TCP_FASTRETRANS_THRESH;
187 if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
188 __sock_put(sk);
189 }
190
191 if (unlikely(rcv_nxt != tp->rcv_nxt))
192 return;
193 tcp_dec_quickack_mode(sk, pkts);
194 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
195}
196
197
198
199
200
201
202
203
204void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
205 __u32 *rcv_wnd, __u32 *window_clamp,
206 int wscale_ok, __u8 *rcv_wscale,
207 __u32 init_rcv_wnd)
208{
209 unsigned int space = (__space < 0 ? 0 : __space);
210
211
212 if (*window_clamp == 0)
213 (*window_clamp) = (U16_MAX << TCP_MAX_WSCALE);
214 space = min(*window_clamp, space);
215
216
217 if (space > mss)
218 space = rounddown(space, mss);
219
220
221
222
223
224
225
226
227
228 if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
229 (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
230 else
231 (*rcv_wnd) = min_t(u32, space, U16_MAX);
232
233 if (init_rcv_wnd)
234 *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
235
236 *rcv_wscale = 0;
237 if (wscale_ok) {
238
239 space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
240 space = max_t(u32, space, sysctl_rmem_max);
241 space = min_t(u32, space, *window_clamp);
242 *rcv_wscale = clamp_t(int, ilog2(space) - 15,
243 0, TCP_MAX_WSCALE);
244 }
245
246 (*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp);
247}
248EXPORT_SYMBOL(tcp_select_initial_window);
249
250
251
252
253
254
255static u16 tcp_select_window(struct sock *sk)
256{
257 struct tcp_sock *tp = tcp_sk(sk);
258 u32 old_win = tp->rcv_wnd;
259 u32 cur_win = tcp_receive_window(tp);
260 u32 new_win = __tcp_select_window(sk);
261
262
263 if (new_win < cur_win) {
264
265
266
267
268
269
270
271 if (new_win == 0)
272 NET_INC_STATS(sock_net(sk),
273 LINUX_MIB_TCPWANTZEROWINDOWADV);
274 new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
275 }
276 tp->rcv_wnd = new_win;
277 tp->rcv_wup = tp->rcv_nxt;
278
279
280
281
282 if (!tp->rx_opt.rcv_wscale &&
283 sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
284 new_win = min(new_win, MAX_TCP_WINDOW);
285 else
286 new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
287
288
289 new_win >>= tp->rx_opt.rcv_wscale;
290
291
292 if (new_win == 0) {
293 tp->pred_flags = 0;
294 if (old_win)
295 NET_INC_STATS(sock_net(sk),
296 LINUX_MIB_TCPTOZEROWINDOWADV);
297 } else if (old_win == 0) {
298 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV);
299 }
300
301 return new_win;
302}
303
304
305static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
306{
307 const struct tcp_sock *tp = tcp_sk(sk);
308
309 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
310 if (!(tp->ecn_flags & TCP_ECN_OK))
311 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
312 else if (tcp_ca_needs_ecn(sk) ||
313 tcp_bpf_ca_needs_ecn(sk))
314 INET_ECN_xmit(sk);
315}
316
317
318static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
319{
320 struct tcp_sock *tp = tcp_sk(sk);
321 bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
322 bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
323 tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
324
325 if (!use_ecn) {
326 const struct dst_entry *dst = __sk_dst_get(sk);
327
328 if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
329 use_ecn = true;
330 }
331
332 tp->ecn_flags = 0;
333
334 if (use_ecn) {
335 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
336 tp->ecn_flags = TCP_ECN_OK;
337 if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
338 INET_ECN_xmit(sk);
339 }
340}
341
342static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
343{
344 if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)
345
346
347
348 TCP_SKB_CB(skb)->tcp_flags &= ~(TCPHDR_ECE | TCPHDR_CWR);
349}
350
351static void
352tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)
353{
354 if (inet_rsk(req)->ecn_ok)
355 th->ece = 1;
356}
357
358
359
360
361static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
362 struct tcphdr *th, int tcp_header_len)
363{
364 struct tcp_sock *tp = tcp_sk(sk);
365
366 if (tp->ecn_flags & TCP_ECN_OK) {
367
368 if (skb->len != tcp_header_len &&
369 !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
370 INET_ECN_xmit(sk);
371 if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) {
372 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
373 th->cwr = 1;
374 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
375 }
376 } else if (!tcp_ca_needs_ecn(sk)) {
377
378 INET_ECN_dontxmit(sk);
379 }
380 if (tp->ecn_flags & TCP_ECN_DEMAND_CWR)
381 th->ece = 1;
382 }
383}
384
385
386
387
388static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
389{
390 skb->ip_summed = CHECKSUM_PARTIAL;
391
392 TCP_SKB_CB(skb)->tcp_flags = flags;
393 TCP_SKB_CB(skb)->sacked = 0;
394
395 tcp_skb_pcount_set(skb, 1);
396
397 TCP_SKB_CB(skb)->seq = seq;
398 if (flags & (TCPHDR_SYN | TCPHDR_FIN))
399 seq++;
400 TCP_SKB_CB(skb)->end_seq = seq;
401}
402
403static inline bool tcp_urg_mode(const struct tcp_sock *tp)
404{
405 return tp->snd_una != tp->snd_up;
406}
407
408#define OPTION_SACK_ADVERTISE (1 << 0)
409#define OPTION_TS (1 << 1)
410#define OPTION_MD5 (1 << 2)
411#define OPTION_WSCALE (1 << 3)
412#define OPTION_FAST_OPEN_COOKIE (1 << 8)
413#define OPTION_SMC (1 << 9)
414
415static void smc_options_write(__be32 *ptr, u16 *options)
416{
417#if IS_ENABLED(CONFIG_SMC)
418 if (static_branch_unlikely(&tcp_have_smc)) {
419 if (unlikely(OPTION_SMC & *options)) {
420 *ptr++ = htonl((TCPOPT_NOP << 24) |
421 (TCPOPT_NOP << 16) |
422 (TCPOPT_EXP << 8) |
423 (TCPOLEN_EXP_SMC_BASE));
424 *ptr++ = htonl(TCPOPT_SMC_MAGIC);
425 }
426 }
427#endif
428}
429
430struct tcp_out_options {
431 u16 options;
432 u16 mss;
433 u8 ws;
434 u8 num_sack_blocks;
435 u8 hash_size;
436 __u8 *hash_location;
437 __u32 tsval, tsecr;
438 struct tcp_fastopen_cookie *fastopen_cookie;
439};
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
455 struct tcp_out_options *opts)
456{
457 u16 options = opts->options;
458
459 if (unlikely(OPTION_MD5 & options)) {
460 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
461 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
462
463 opts->hash_location = (__u8 *)ptr;
464 ptr += 4;
465 }
466
467 if (unlikely(opts->mss)) {
468 *ptr++ = htonl((TCPOPT_MSS << 24) |
469 (TCPOLEN_MSS << 16) |
470 opts->mss);
471 }
472
473 if (likely(OPTION_TS & options)) {
474 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
475 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
476 (TCPOLEN_SACK_PERM << 16) |
477 (TCPOPT_TIMESTAMP << 8) |
478 TCPOLEN_TIMESTAMP);
479 options &= ~OPTION_SACK_ADVERTISE;
480 } else {
481 *ptr++ = htonl((TCPOPT_NOP << 24) |
482 (TCPOPT_NOP << 16) |
483 (TCPOPT_TIMESTAMP << 8) |
484 TCPOLEN_TIMESTAMP);
485 }
486 *ptr++ = htonl(opts->tsval);
487 *ptr++ = htonl(opts->tsecr);
488 }
489
490 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
491 *ptr++ = htonl((TCPOPT_NOP << 24) |
492 (TCPOPT_NOP << 16) |
493 (TCPOPT_SACK_PERM << 8) |
494 TCPOLEN_SACK_PERM);
495 }
496
497 if (unlikely(OPTION_WSCALE & options)) {
498 *ptr++ = htonl((TCPOPT_NOP << 24) |
499 (TCPOPT_WINDOW << 16) |
500 (TCPOLEN_WINDOW << 8) |
501 opts->ws);
502 }
503
504 if (unlikely(opts->num_sack_blocks)) {
505 struct tcp_sack_block *sp = tp->rx_opt.dsack ?
506 tp->duplicate_sack : tp->selective_acks;
507 int this_sack;
508
509 *ptr++ = htonl((TCPOPT_NOP << 24) |
510 (TCPOPT_NOP << 16) |
511 (TCPOPT_SACK << 8) |
512 (TCPOLEN_SACK_BASE + (opts->num_sack_blocks *
513 TCPOLEN_SACK_PERBLOCK)));
514
515 for (this_sack = 0; this_sack < opts->num_sack_blocks;
516 ++this_sack) {
517 *ptr++ = htonl(sp[this_sack].start_seq);
518 *ptr++ = htonl(sp[this_sack].end_seq);
519 }
520
521 tp->rx_opt.dsack = 0;
522 }
523
524 if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) {
525 struct tcp_fastopen_cookie *foc = opts->fastopen_cookie;
526 u8 *p = (u8 *)ptr;
527 u32 len;
528
529 if (foc->exp) {
530 len = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
531 *ptr = htonl((TCPOPT_EXP << 24) | (len << 16) |
532 TCPOPT_FASTOPEN_MAGIC);
533 p += TCPOLEN_EXP_FASTOPEN_BASE;
534 } else {
535 len = TCPOLEN_FASTOPEN_BASE + foc->len;
536 *p++ = TCPOPT_FASTOPEN;
537 *p++ = len;
538 }
539
540 memcpy(p, foc->val, foc->len);
541 if ((len & 3) == 2) {
542 p[foc->len] = TCPOPT_NOP;
543 p[foc->len + 1] = TCPOPT_NOP;
544 }
545 ptr += (len + 3) >> 2;
546 }
547
548 smc_options_write(ptr, &options);
549}
550
551static void smc_set_option(const struct tcp_sock *tp,
552 struct tcp_out_options *opts,
553 unsigned int *remaining)
554{
555#if IS_ENABLED(CONFIG_SMC)
556 if (static_branch_unlikely(&tcp_have_smc)) {
557 if (tp->syn_smc) {
558 if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
559 opts->options |= OPTION_SMC;
560 *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
561 }
562 }
563 }
564#endif
565}
566
567static void smc_set_option_cond(const struct tcp_sock *tp,
568 const struct inet_request_sock *ireq,
569 struct tcp_out_options *opts,
570 unsigned int *remaining)
571{
572#if IS_ENABLED(CONFIG_SMC)
573 if (static_branch_unlikely(&tcp_have_smc)) {
574 if (tp->syn_smc && ireq->smc_ok) {
575 if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
576 opts->options |= OPTION_SMC;
577 *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
578 }
579 }
580 }
581#endif
582}
583
584
585
586
587static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
588 struct tcp_out_options *opts,
589 struct tcp_md5sig_key **md5)
590{
591 struct tcp_sock *tp = tcp_sk(sk);
592 unsigned int remaining = MAX_TCP_OPTION_SPACE;
593 struct tcp_fastopen_request *fastopen = tp->fastopen_req;
594
595 *md5 = NULL;
596#ifdef CONFIG_TCP_MD5SIG
597 if (static_branch_unlikely(&tcp_md5_needed) &&
598 rcu_access_pointer(tp->md5sig_info)) {
599 *md5 = tp->af_specific->md5_lookup(sk, sk);
600 if (*md5) {
601 opts->options |= OPTION_MD5;
602 remaining -= TCPOLEN_MD5SIG_ALIGNED;
603 }
604 }
605#endif
606
607
608
609
610
611
612
613
614
615
616 opts->mss = tcp_advertise_mss(sk);
617 remaining -= TCPOLEN_MSS_ALIGNED;
618
619 if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) {
620 opts->options |= OPTION_TS;
621 opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
622 opts->tsecr = tp->rx_opt.ts_recent;
623 remaining -= TCPOLEN_TSTAMP_ALIGNED;
624 }
625 if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
626 opts->ws = tp->rx_opt.rcv_wscale;
627 opts->options |= OPTION_WSCALE;
628 remaining -= TCPOLEN_WSCALE_ALIGNED;
629 }
630 if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) {
631 opts->options |= OPTION_SACK_ADVERTISE;
632 if (unlikely(!(OPTION_TS & opts->options)))
633 remaining -= TCPOLEN_SACKPERM_ALIGNED;
634 }
635
636 if (fastopen && fastopen->cookie.len >= 0) {
637 u32 need = fastopen->cookie.len;
638
639 need += fastopen->cookie.exp ? TCPOLEN_EXP_FASTOPEN_BASE :
640 TCPOLEN_FASTOPEN_BASE;
641 need = (need + 3) & ~3U;
642 if (remaining >= need) {
643 opts->options |= OPTION_FAST_OPEN_COOKIE;
644 opts->fastopen_cookie = &fastopen->cookie;
645 remaining -= need;
646 tp->syn_fastopen = 1;
647 tp->syn_fastopen_exp = fastopen->cookie.exp ? 1 : 0;
648 }
649 }
650
651 smc_set_option(tp, opts, &remaining);
652
653 return MAX_TCP_OPTION_SPACE - remaining;
654}
655
656
657static unsigned int tcp_synack_options(const struct sock *sk,
658 struct request_sock *req,
659 unsigned int mss, struct sk_buff *skb,
660 struct tcp_out_options *opts,
661 const struct tcp_md5sig_key *md5,
662 struct tcp_fastopen_cookie *foc)
663{
664 struct inet_request_sock *ireq = inet_rsk(req);
665 unsigned int remaining = MAX_TCP_OPTION_SPACE;
666
667#ifdef CONFIG_TCP_MD5SIG
668 if (md5) {
669 opts->options |= OPTION_MD5;
670 remaining -= TCPOLEN_MD5SIG_ALIGNED;
671
672
673
674
675
676
677 ireq->tstamp_ok &= !ireq->sack_ok;
678 }
679#endif
680
681
682 opts->mss = mss;
683 remaining -= TCPOLEN_MSS_ALIGNED;
684
685 if (likely(ireq->wscale_ok)) {
686 opts->ws = ireq->rcv_wscale;
687 opts->options |= OPTION_WSCALE;
688 remaining -= TCPOLEN_WSCALE_ALIGNED;
689 }
690 if (likely(ireq->tstamp_ok)) {
691 opts->options |= OPTION_TS;
692 opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off;
693 opts->tsecr = req->ts_recent;
694 remaining -= TCPOLEN_TSTAMP_ALIGNED;
695 }
696 if (likely(ireq->sack_ok)) {
697 opts->options |= OPTION_SACK_ADVERTISE;
698 if (unlikely(!ireq->tstamp_ok))
699 remaining -= TCPOLEN_SACKPERM_ALIGNED;
700 }
701 if (foc != NULL && foc->len >= 0) {
702 u32 need = foc->len;
703
704 need += foc->exp ? TCPOLEN_EXP_FASTOPEN_BASE :
705 TCPOLEN_FASTOPEN_BASE;
706 need = (need + 3) & ~3U;
707 if (remaining >= need) {
708 opts->options |= OPTION_FAST_OPEN_COOKIE;
709 opts->fastopen_cookie = foc;
710 remaining -= need;
711 }
712 }
713
714 smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
715
716 return MAX_TCP_OPTION_SPACE - remaining;
717}
718
719
720
721
722static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb,
723 struct tcp_out_options *opts,
724 struct tcp_md5sig_key **md5)
725{
726 struct tcp_sock *tp = tcp_sk(sk);
727 unsigned int size = 0;
728 unsigned int eff_sacks;
729
730 opts->options = 0;
731
732 *md5 = NULL;
733#ifdef CONFIG_TCP_MD5SIG
734 if (static_branch_unlikely(&tcp_md5_needed) &&
735 rcu_access_pointer(tp->md5sig_info)) {
736 *md5 = tp->af_specific->md5_lookup(sk, sk);
737 if (*md5) {
738 opts->options |= OPTION_MD5;
739 size += TCPOLEN_MD5SIG_ALIGNED;
740 }
741 }
742#endif
743
744 if (likely(tp->rx_opt.tstamp_ok)) {
745 opts->options |= OPTION_TS;
746 opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0;
747 opts->tsecr = tp->rx_opt.ts_recent;
748 size += TCPOLEN_TSTAMP_ALIGNED;
749 }
750
751 eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
752 if (unlikely(eff_sacks)) {
753 const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
754 opts->num_sack_blocks =
755 min_t(unsigned int, eff_sacks,
756 (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
757 TCPOLEN_SACK_PERBLOCK);
758 size += TCPOLEN_SACK_BASE_ALIGNED +
759 opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
760 }
761
762 return size;
763}
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780struct tsq_tasklet {
781 struct tasklet_struct tasklet;
782 struct list_head head;
783};
784static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);
785
786static void tcp_tsq_write(struct sock *sk)
787{
788 if ((1 << sk->sk_state) &
789 (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
790 TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) {
791 struct tcp_sock *tp = tcp_sk(sk);
792
793 if (tp->lost_out > tp->retrans_out &&
794 tp->snd_cwnd > tcp_packets_in_flight(tp)) {
795 tcp_mstamp_refresh(tp);
796 tcp_xmit_retransmit_queue(sk);
797 }
798
799 tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle,
800 0, GFP_ATOMIC);
801 }
802}
803
804static void tcp_tsq_handler(struct sock *sk)
805{
806 bh_lock_sock(sk);
807 if (!sock_owned_by_user(sk))
808 tcp_tsq_write(sk);
809 else if (!test_and_set_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
810 sock_hold(sk);
811 bh_unlock_sock(sk);
812}
813
814
815
816
817
818
819static void tcp_tasklet_func(unsigned long data)
820{
821 struct tsq_tasklet *tsq = (struct tsq_tasklet *)data;
822 LIST_HEAD(list);
823 unsigned long flags;
824 struct list_head *q, *n;
825 struct tcp_sock *tp;
826 struct sock *sk;
827
828 local_irq_save(flags);
829 list_splice_init(&tsq->head, &list);
830 local_irq_restore(flags);
831
832 list_for_each_safe(q, n, &list) {
833 tp = list_entry(q, struct tcp_sock, tsq_node);
834 list_del(&tp->tsq_node);
835
836 sk = (struct sock *)tp;
837 smp_mb__before_atomic();
838 clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags);
839
840 tcp_tsq_handler(sk);
841 sk_free(sk);
842 }
843}
844
845#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \
846 TCPF_WRITE_TIMER_DEFERRED | \
847 TCPF_DELACK_TIMER_DEFERRED | \
848 TCPF_MTU_REDUCED_DEFERRED)
849
850
851
852
853
854
855
856void tcp_release_cb(struct sock *sk)
857{
858 unsigned long flags, nflags;
859
860
861 do {
862 flags = sk->sk_tsq_flags;
863 if (!(flags & TCP_DEFERRED_ALL))
864 return;
865 nflags = flags & ~TCP_DEFERRED_ALL;
866 } while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags);
867
868 if (flags & TCPF_TSQ_DEFERRED) {
869 tcp_tsq_write(sk);
870 __sock_put(sk);
871 }
872
873
874
875
876
877
878
879
880
881 sock_release_ownership(sk);
882
883 if (flags & TCPF_WRITE_TIMER_DEFERRED) {
884 tcp_write_timer_handler(sk);
885 __sock_put(sk);
886 }
887 if (flags & TCPF_DELACK_TIMER_DEFERRED) {
888 tcp_delack_timer_handler(sk);
889 __sock_put(sk);
890 }
891 if (flags & TCPF_MTU_REDUCED_DEFERRED) {
892 inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
893 __sock_put(sk);
894 }
895}
896EXPORT_SYMBOL(tcp_release_cb);
897
898void __init tcp_tasklet_init(void)
899{
900 int i;
901
902 for_each_possible_cpu(i) {
903 struct tsq_tasklet *tsq = &per_cpu(tsq_tasklet, i);
904
905 INIT_LIST_HEAD(&tsq->head);
906 tasklet_init(&tsq->tasklet,
907 tcp_tasklet_func,
908 (unsigned long)tsq);
909 }
910}
911
912
913
914
915
916
917void tcp_wfree(struct sk_buff *skb)
918{
919 struct sock *sk = skb->sk;
920 struct tcp_sock *tp = tcp_sk(sk);
921 unsigned long flags, nval, oval;
922
923
924
925
926 WARN_ON(refcount_sub_and_test(skb->truesize - 1, &sk->sk_wmem_alloc));
927
928
929
930
931
932
933
934
935 if (refcount_read(&sk->sk_wmem_alloc) >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
936 goto out;
937
938 for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) {
939 struct tsq_tasklet *tsq;
940 bool empty;
941
942 if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED))
943 goto out;
944
945 nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED;
946 nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
947 if (nval != oval)
948 continue;
949
950
951 local_irq_save(flags);
952 tsq = this_cpu_ptr(&tsq_tasklet);
953 empty = list_empty(&tsq->head);
954 list_add(&tp->tsq_node, &tsq->head);
955 if (empty)
956 tasklet_schedule(&tsq->tasklet);
957 local_irq_restore(flags);
958 return;
959 }
960out:
961 sk_free(sk);
962}
963
964
965
966
967enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
968{
969 struct tcp_sock *tp = container_of(timer, struct tcp_sock, pacing_timer);
970 struct sock *sk = (struct sock *)tp;
971
972 tcp_tsq_handler(sk);
973 sock_put(sk);
974
975 return HRTIMER_NORESTART;
976}
977
978static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb,
979 u64 prior_wstamp)
980{
981 struct tcp_sock *tp = tcp_sk(sk);
982
983 if (sk->sk_pacing_status != SK_PACING_NONE) {
984 unsigned long rate = sk->sk_pacing_rate;
985
986
987
988
989
990 if (rate != ~0UL && rate && tp->data_segs_out >= 10) {
991 u64 len_ns = div64_ul((u64)skb->len * NSEC_PER_SEC, rate);
992 u64 credit = tp->tcp_wstamp_ns - prior_wstamp;
993
994
995 len_ns -= min_t(u64, len_ns / 2, credit);
996 tp->tcp_wstamp_ns += len_ns;
997 }
998 }
999 list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
1000}
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
1014 int clone_it, gfp_t gfp_mask, u32 rcv_nxt)
1015{
1016 const struct inet_connection_sock *icsk = inet_csk(sk);
1017 struct inet_sock *inet;
1018 struct tcp_sock *tp;
1019 struct tcp_skb_cb *tcb;
1020 struct tcp_out_options opts;
1021 unsigned int tcp_options_size, tcp_header_size;
1022 struct sk_buff *oskb = NULL;
1023 struct tcp_md5sig_key *md5;
1024 struct tcphdr *th;
1025 u64 prior_wstamp;
1026 int err;
1027
1028 BUG_ON(!skb || !tcp_skb_pcount(skb));
1029 tp = tcp_sk(sk);
1030 prior_wstamp = tp->tcp_wstamp_ns;
1031 tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
1032 skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
1033 if (clone_it) {
1034 TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
1035 - tp->snd_una;
1036 oskb = skb;
1037
1038 tcp_skb_tsorted_save(oskb) {
1039 if (unlikely(skb_cloned(oskb)))
1040 skb = pskb_copy(oskb, gfp_mask);
1041 else
1042 skb = skb_clone(oskb, gfp_mask);
1043 } tcp_skb_tsorted_restore(oskb);
1044
1045 if (unlikely(!skb))
1046 return -ENOBUFS;
1047 }
1048
1049 inet = inet_sk(sk);
1050 tcb = TCP_SKB_CB(skb);
1051 memset(&opts, 0, sizeof(opts));
1052
1053 if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
1054 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
1055 else
1056 tcp_options_size = tcp_established_options(sk, skb, &opts,
1057 &md5);
1058 tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
1059
1060
1061
1062
1063
1064
1065
1066
1067 skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1);
1068
1069
1070
1071
1072
1073
1074 skb->pfmemalloc = 0;
1075
1076 skb_push(skb, tcp_header_size);
1077 skb_reset_transport_header(skb);
1078
1079 skb_orphan(skb);
1080 skb->sk = sk;
1081 skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree;
1082 skb_set_hash_from_sk(skb, sk);
1083 refcount_add(skb->truesize, &sk->sk_wmem_alloc);
1084
1085 skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm);
1086
1087
1088 th = (struct tcphdr *)skb->data;
1089 th->source = inet->inet_sport;
1090 th->dest = inet->inet_dport;
1091 th->seq = htonl(tcb->seq);
1092 th->ack_seq = htonl(rcv_nxt);
1093 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
1094 tcb->tcp_flags);
1095
1096 th->check = 0;
1097 th->urg_ptr = 0;
1098
1099
1100 if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
1101 if (before(tp->snd_up, tcb->seq + 0x10000)) {
1102 th->urg_ptr = htons(tp->snd_up - tcb->seq);
1103 th->urg = 1;
1104 } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
1105 th->urg_ptr = htons(0xFFFF);
1106 th->urg = 1;
1107 }
1108 }
1109
1110 tcp_options_write((__be32 *)(th + 1), tp, &opts);
1111 skb_shinfo(skb)->gso_type = sk->sk_gso_type;
1112 if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) {
1113 th->window = htons(tcp_select_window(sk));
1114 tcp_ecn_send(sk, skb, th, tcp_header_size);
1115 } else {
1116
1117
1118
1119 th->window = htons(min(tp->rcv_wnd, 65535U));
1120 }
1121#ifdef CONFIG_TCP_MD5SIG
1122
1123 if (md5) {
1124 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1125 tp->af_specific->calc_md5_hash(opts.hash_location,
1126 md5, sk, skb);
1127 }
1128#endif
1129
1130 icsk->icsk_af_ops->send_check(sk, skb);
1131
1132 if (likely(tcb->tcp_flags & TCPHDR_ACK))
1133 tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);
1134
1135 if (skb->len != tcp_header_size) {
1136 tcp_event_data_sent(tp, sk);
1137 tp->data_segs_out += tcp_skb_pcount(skb);
1138 tp->bytes_sent += skb->len - tcp_header_size;
1139 }
1140
1141 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
1142 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
1143 tcp_skb_pcount(skb));
1144
1145 tp->segs_out += tcp_skb_pcount(skb);
1146
1147 skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
1148 skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);
1149
1150
1151
1152
1153 memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
1154 sizeof(struct inet6_skb_parm)));
1155
1156 tcp_add_tx_delay(skb, tp);
1157
1158 err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
1159
1160 if (unlikely(err > 0)) {
1161 tcp_enter_cwr(sk);
1162 err = net_xmit_eval(err);
1163 }
1164 if (!err && oskb) {
1165 tcp_update_skb_after_send(sk, oskb, prior_wstamp);
1166 tcp_rate_skb_sent(sk, oskb);
1167 }
1168 return err;
1169}
1170
1171static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
1172 gfp_t gfp_mask)
1173{
1174 return __tcp_transmit_skb(sk, skb, clone_it, gfp_mask,
1175 tcp_sk(sk)->rcv_nxt);
1176}
1177
1178
1179
1180
1181
1182
1183static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
1184{
1185 struct tcp_sock *tp = tcp_sk(sk);
1186
1187
1188 tp->write_seq = TCP_SKB_CB(skb)->end_seq;
1189 __skb_header_release(skb);
1190 tcp_add_write_queue_tail(sk, skb);
1191 sk->sk_wmem_queued += skb->truesize;
1192 sk_mem_charge(sk, skb->truesize);
1193}
1194
1195
1196static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
1197{
1198 if (skb->len <= mss_now) {
1199
1200
1201
1202 tcp_skb_pcount_set(skb, 1);
1203 TCP_SKB_CB(skb)->tcp_gso_size = 0;
1204 } else {
1205 tcp_skb_pcount_set(skb, DIV_ROUND_UP(skb->len, mss_now));
1206 TCP_SKB_CB(skb)->tcp_gso_size = mss_now;
1207 }
1208}
1209
1210
1211
1212
1213static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
1214{
1215 struct tcp_sock *tp = tcp_sk(sk);
1216
1217 tp->packets_out -= decr;
1218
1219 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1220 tp->sacked_out -= decr;
1221 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
1222 tp->retrans_out -= decr;
1223 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
1224 tp->lost_out -= decr;
1225
1226
1227 if (tcp_is_reno(tp) && decr > 0)
1228 tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
1229
1230 if (tp->lost_skb_hint &&
1231 before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
1232 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
1233 tp->lost_cnt_hint -= decr;
1234
1235 tcp_verify_left_out(tp);
1236}
1237
1238static bool tcp_has_tx_tstamp(const struct sk_buff *skb)
1239{
1240 return TCP_SKB_CB(skb)->txstamp_ack ||
1241 (skb_shinfo(skb)->tx_flags & SKBTX_ANY_TSTAMP);
1242}
1243
1244static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2)
1245{
1246 struct skb_shared_info *shinfo = skb_shinfo(skb);
1247
1248 if (unlikely(tcp_has_tx_tstamp(skb)) &&
1249 !before(shinfo->tskey, TCP_SKB_CB(skb2)->seq)) {
1250 struct skb_shared_info *shinfo2 = skb_shinfo(skb2);
1251 u8 tsflags = shinfo->tx_flags & SKBTX_ANY_TSTAMP;
1252
1253 shinfo->tx_flags &= ~tsflags;
1254 shinfo2->tx_flags |= tsflags;
1255 swap(shinfo->tskey, shinfo2->tskey);
1256 TCP_SKB_CB(skb2)->txstamp_ack = TCP_SKB_CB(skb)->txstamp_ack;
1257 TCP_SKB_CB(skb)->txstamp_ack = 0;
1258 }
1259}
1260
1261static void tcp_skb_fragment_eor(struct sk_buff *skb, struct sk_buff *skb2)
1262{
1263 TCP_SKB_CB(skb2)->eor = TCP_SKB_CB(skb)->eor;
1264 TCP_SKB_CB(skb)->eor = 0;
1265}
1266
1267
1268static void tcp_insert_write_queue_after(struct sk_buff *skb,
1269 struct sk_buff *buff,
1270 struct sock *sk,
1271 enum tcp_queue tcp_queue)
1272{
1273 if (tcp_queue == TCP_FRAG_IN_WRITE_QUEUE)
1274 __skb_queue_after(&sk->sk_write_queue, skb, buff);
1275 else
1276 tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
1277}
1278
1279
1280
1281
1282
1283
1284int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
1285 struct sk_buff *skb, u32 len,
1286 unsigned int mss_now, gfp_t gfp)
1287{
1288 struct tcp_sock *tp = tcp_sk(sk);
1289 struct sk_buff *buff;
1290 int nsize, old_factor;
1291 long limit;
1292 int nlen;
1293 u8 flags;
1294
1295 if (WARN_ON(len > skb->len))
1296 return -EINVAL;
1297
1298 nsize = skb_headlen(skb) - len;
1299 if (nsize < 0)
1300 nsize = 0;
1301
1302
1303
1304
1305
1306
1307 limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_MAX_SIZE);
1308 if (unlikely((sk->sk_wmem_queued >> 1) > limit &&
1309 tcp_queue != TCP_FRAG_IN_WRITE_QUEUE &&
1310 skb != tcp_rtx_queue_head(sk) &&
1311 skb != tcp_rtx_queue_tail(sk))) {
1312 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG);
1313 return -ENOMEM;
1314 }
1315
1316 if (skb_unclone(skb, gfp))
1317 return -ENOMEM;
1318
1319
1320 buff = sk_stream_alloc_skb(sk, nsize, gfp, true);
1321 if (!buff)
1322 return -ENOMEM;
1323 skb_copy_decrypted(buff, skb);
1324
1325 sk->sk_wmem_queued += buff->truesize;
1326 sk_mem_charge(sk, buff->truesize);
1327 nlen = skb->len - len - nsize;
1328 buff->truesize += nlen;
1329 skb->truesize -= nlen;
1330
1331
1332 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1333 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1334 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1335
1336
1337 flags = TCP_SKB_CB(skb)->tcp_flags;
1338 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1339 TCP_SKB_CB(buff)->tcp_flags = flags;
1340 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
1341 tcp_skb_fragment_eor(skb, buff);
1342
1343 skb_split(skb, buff, len);
1344
1345 buff->ip_summed = CHECKSUM_PARTIAL;
1346
1347 buff->tstamp = skb->tstamp;
1348 tcp_fragment_tstamp(skb, buff);
1349
1350 old_factor = tcp_skb_pcount(skb);
1351
1352
1353 tcp_set_skb_tso_segs(skb, mss_now);
1354 tcp_set_skb_tso_segs(buff, mss_now);
1355
1356
1357 TCP_SKB_CB(buff)->tx = TCP_SKB_CB(skb)->tx;
1358
1359
1360
1361
1362 if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) {
1363 int diff = old_factor - tcp_skb_pcount(skb) -
1364 tcp_skb_pcount(buff);
1365
1366 if (diff)
1367 tcp_adjust_pcount(sk, skb, diff);
1368 }
1369
1370
1371 __skb_header_release(buff);
1372 tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
1373 if (tcp_queue == TCP_FRAG_IN_RTX_QUEUE)
1374 list_add(&buff->tcp_tsorted_anchor, &skb->tcp_tsorted_anchor);
1375
1376 return 0;
1377}
1378
1379
1380
1381
1382static int __pskb_trim_head(struct sk_buff *skb, int len)
1383{
1384 struct skb_shared_info *shinfo;
1385 int i, k, eat;
1386
1387 eat = min_t(int, len, skb_headlen(skb));
1388 if (eat) {
1389 __skb_pull(skb, eat);
1390 len -= eat;
1391 if (!len)
1392 return 0;
1393 }
1394 eat = len;
1395 k = 0;
1396 shinfo = skb_shinfo(skb);
1397 for (i = 0; i < shinfo->nr_frags; i++) {
1398 int size = skb_frag_size(&shinfo->frags[i]);
1399
1400 if (size <= eat) {
1401 skb_frag_unref(skb, i);
1402 eat -= size;
1403 } else {
1404 shinfo->frags[k] = shinfo->frags[i];
1405 if (eat) {
1406 shinfo->frags[k].page_offset += eat;
1407 skb_frag_size_sub(&shinfo->frags[k], eat);
1408 eat = 0;
1409 }
1410 k++;
1411 }
1412 }
1413 shinfo->nr_frags = k;
1414
1415 skb->data_len -= len;
1416 skb->len = skb->data_len;
1417 return len;
1418}
1419
1420
1421int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
1422{
1423 u32 delta_truesize;
1424
1425 if (skb_unclone(skb, GFP_ATOMIC))
1426 return -ENOMEM;
1427
1428 delta_truesize = __pskb_trim_head(skb, len);
1429
1430 TCP_SKB_CB(skb)->seq += len;
1431 skb->ip_summed = CHECKSUM_PARTIAL;
1432
1433 if (delta_truesize) {
1434 skb->truesize -= delta_truesize;
1435 sk->sk_wmem_queued -= delta_truesize;
1436 sk_mem_uncharge(sk, delta_truesize);
1437 sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
1438 }
1439
1440
1441 if (tcp_skb_pcount(skb) > 1)
1442 tcp_set_skb_tso_segs(skb, tcp_skb_mss(skb));
1443
1444 return 0;
1445}
1446
1447
1448static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
1449{
1450 const struct tcp_sock *tp = tcp_sk(sk);
1451 const struct inet_connection_sock *icsk = inet_csk(sk);
1452 int mss_now;
1453
1454
1455
1456
1457 mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr);
1458
1459
1460 if (icsk->icsk_af_ops->net_frag_header_len) {
1461 const struct dst_entry *dst = __sk_dst_get(sk);
1462
1463 if (dst && dst_allfrag(dst))
1464 mss_now -= icsk->icsk_af_ops->net_frag_header_len;
1465 }
1466
1467
1468 if (mss_now > tp->rx_opt.mss_clamp)
1469 mss_now = tp->rx_opt.mss_clamp;
1470
1471
1472 mss_now -= icsk->icsk_ext_hdr_len;
1473
1474
1475 mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss);
1476 return mss_now;
1477}
1478
1479
1480int tcp_mtu_to_mss(struct sock *sk, int pmtu)
1481{
1482
1483 return __tcp_mtu_to_mss(sk, pmtu) -
1484 (tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr));
1485}
1486
1487
1488int tcp_mss_to_mtu(struct sock *sk, int mss)
1489{
1490 const struct tcp_sock *tp = tcp_sk(sk);
1491 const struct inet_connection_sock *icsk = inet_csk(sk);
1492 int mtu;
1493
1494 mtu = mss +
1495 tp->tcp_header_len +
1496 icsk->icsk_ext_hdr_len +
1497 icsk->icsk_af_ops->net_header_len;
1498
1499
1500 if (icsk->icsk_af_ops->net_frag_header_len) {
1501 const struct dst_entry *dst = __sk_dst_get(sk);
1502
1503 if (dst && dst_allfrag(dst))
1504 mtu += icsk->icsk_af_ops->net_frag_header_len;
1505 }
1506 return mtu;
1507}
1508EXPORT_SYMBOL(tcp_mss_to_mtu);
1509
1510
1511void tcp_mtup_init(struct sock *sk)
1512{
1513 struct tcp_sock *tp = tcp_sk(sk);
1514 struct inet_connection_sock *icsk = inet_csk(sk);
1515 struct net *net = sock_net(sk);
1516
1517 icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
1518 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
1519 icsk->icsk_af_ops->net_header_len;
1520 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
1521 icsk->icsk_mtup.probe_size = 0;
1522 if (icsk->icsk_mtup.enabled)
1523 icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
1524}
1525EXPORT_SYMBOL(tcp_mtup_init);
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
1550{
1551 struct tcp_sock *tp = tcp_sk(sk);
1552 struct inet_connection_sock *icsk = inet_csk(sk);
1553 int mss_now;
1554
1555 if (icsk->icsk_mtup.search_high > pmtu)
1556 icsk->icsk_mtup.search_high = pmtu;
1557
1558 mss_now = tcp_mtu_to_mss(sk, pmtu);
1559 mss_now = tcp_bound_to_half_wnd(tp, mss_now);
1560
1561
1562 icsk->icsk_pmtu_cookie = pmtu;
1563 if (icsk->icsk_mtup.enabled)
1564 mss_now = min(mss_now, tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low));
1565 tp->mss_cache = mss_now;
1566
1567 return mss_now;
1568}
1569EXPORT_SYMBOL(tcp_sync_mss);
1570
1571
1572
1573
1574unsigned int tcp_current_mss(struct sock *sk)
1575{
1576 const struct tcp_sock *tp = tcp_sk(sk);
1577 const struct dst_entry *dst = __sk_dst_get(sk);
1578 u32 mss_now;
1579 unsigned int header_len;
1580 struct tcp_out_options opts;
1581 struct tcp_md5sig_key *md5;
1582
1583 mss_now = tp->mss_cache;
1584
1585 if (dst) {
1586 u32 mtu = dst_mtu(dst);
1587 if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
1588 mss_now = tcp_sync_mss(sk, mtu);
1589 }
1590
1591 header_len = tcp_established_options(sk, NULL, &opts, &md5) +
1592 sizeof(struct tcphdr);
1593
1594
1595
1596
1597 if (header_len != tp->tcp_header_len) {
1598 int delta = (int) header_len - tp->tcp_header_len;
1599 mss_now -= delta;
1600 }
1601
1602 return mss_now;
1603}
1604
1605
1606
1607
1608
1609static void tcp_cwnd_application_limited(struct sock *sk)
1610{
1611 struct tcp_sock *tp = tcp_sk(sk);
1612
1613 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
1614 sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
1615
1616 u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
1617 u32 win_used = max(tp->snd_cwnd_used, init_win);
1618 if (win_used < tp->snd_cwnd) {
1619 tp->snd_ssthresh = tcp_current_ssthresh(sk);
1620 tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
1621 }
1622 tp->snd_cwnd_used = 0;
1623 }
1624 tp->snd_cwnd_stamp = tcp_jiffies32;
1625}
1626
1627static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
1628{
1629 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
1630 struct tcp_sock *tp = tcp_sk(sk);
1631
1632
1633
1634
1635 if (!before(tp->snd_una, tp->max_packets_seq) ||
1636 tp->packets_out > tp->max_packets_out) {
1637 tp->max_packets_out = tp->packets_out;
1638 tp->max_packets_seq = tp->snd_nxt;
1639 tp->is_cwnd_limited = is_cwnd_limited;
1640 }
1641
1642 if (tcp_is_cwnd_limited(sk)) {
1643
1644 tp->snd_cwnd_used = 0;
1645 tp->snd_cwnd_stamp = tcp_jiffies32;
1646 } else {
1647
1648 if (tp->packets_out > tp->snd_cwnd_used)
1649 tp->snd_cwnd_used = tp->packets_out;
1650
1651 if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
1652 (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
1653 !ca_ops->cong_control)
1654 tcp_cwnd_application_limited(sk);
1655
1656
1657
1658
1659
1660
1661
1662
1663 if (tcp_write_queue_empty(sk) && sk->sk_socket &&
1664 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) &&
1665 (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
1666 tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED);
1667 }
1668}
1669
1670
1671static bool tcp_minshall_check(const struct tcp_sock *tp)
1672{
1673 return after(tp->snd_sml, tp->snd_una) &&
1674 !after(tp->snd_sml, tp->snd_nxt);
1675}
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
1686 const struct sk_buff *skb)
1687{
1688 if (skb->len < tcp_skb_pcount(skb) * mss_now)
1689 tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
1690}
1691
1692
1693
1694
1695
1696
1697
1698
1699static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
1700 int nonagle)
1701{
1702 return partial &&
1703 ((nonagle & TCP_NAGLE_CORK) ||
1704 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1705}
1706
1707
1708
1709
1710static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
1711 int min_tso_segs)
1712{
1713 u32 bytes, segs;
1714
1715 bytes = min_t(unsigned long,
1716 sk->sk_pacing_rate >> sk->sk_pacing_shift,
1717 sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
1718
1719
1720
1721
1722
1723
1724 segs = max_t(u32, bytes / mss_now, min_tso_segs);
1725
1726 return segs;
1727}
1728
1729
1730
1731
1732static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
1733{
1734 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
1735 u32 min_tso, tso_segs;
1736
1737 min_tso = ca_ops->min_tso_segs ?
1738 ca_ops->min_tso_segs(sk) :
1739 sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
1740
1741 tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
1742 return min_t(u32, tso_segs, sk->sk_gso_max_segs);
1743}
1744
1745
1746static unsigned int tcp_mss_split_point(const struct sock *sk,
1747 const struct sk_buff *skb,
1748 unsigned int mss_now,
1749 unsigned int max_segs,
1750 int nonagle)
1751{
1752 const struct tcp_sock *tp = tcp_sk(sk);
1753 u32 partial, needed, window, max_len;
1754
1755 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1756 max_len = mss_now * max_segs;
1757
1758 if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
1759 return max_len;
1760
1761 needed = min(skb->len, window);
1762
1763 if (max_len <= needed)
1764 return max_len;
1765
1766 partial = needed % mss_now;
1767
1768
1769
1770
1771 if (tcp_nagle_check(partial != 0, tp, nonagle))
1772 return needed - partial;
1773
1774 return needed;
1775}
1776
1777
1778
1779
1780static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
1781 const struct sk_buff *skb)
1782{
1783 u32 in_flight, cwnd, halfcwnd;
1784
1785
1786 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
1787 tcp_skb_pcount(skb) == 1)
1788 return 1;
1789
1790 in_flight = tcp_packets_in_flight(tp);
1791 cwnd = tp->snd_cwnd;
1792 if (in_flight >= cwnd)
1793 return 0;
1794
1795
1796
1797
1798 halfcwnd = max(cwnd >> 1, 1U);
1799 return min(halfcwnd, cwnd - in_flight);
1800}
1801
1802
1803
1804
1805
1806static int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now)
1807{
1808 int tso_segs = tcp_skb_pcount(skb);
1809
1810 if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
1811 tcp_set_skb_tso_segs(skb, mss_now);
1812 tso_segs = tcp_skb_pcount(skb);
1813 }
1814 return tso_segs;
1815}
1816
1817
1818
1819
1820
1821static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
1822 unsigned int cur_mss, int nonagle)
1823{
1824
1825
1826
1827
1828
1829
1830 if (nonagle & TCP_NAGLE_PUSH)
1831 return true;
1832
1833
1834 if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
1835 return true;
1836
1837 if (!tcp_nagle_check(skb->len < cur_mss, tp, nonagle))
1838 return true;
1839
1840 return false;
1841}
1842
1843
1844static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
1845 const struct sk_buff *skb,
1846 unsigned int cur_mss)
1847{
1848 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
1849
1850 if (skb->len > cur_mss)
1851 end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
1852
1853 return !after(end_seq, tcp_wnd_end(tp));
1854}
1855
1856
1857
1858
1859
1860
1861
1862
1863static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1864 unsigned int mss_now, gfp_t gfp)
1865{
1866 int nlen = skb->len - len;
1867 struct sk_buff *buff;
1868 u8 flags;
1869
1870
1871 if (skb->len != skb->data_len)
1872 return tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
1873 skb, len, mss_now, gfp);
1874
1875 buff = sk_stream_alloc_skb(sk, 0, gfp, true);
1876 if (unlikely(!buff))
1877 return -ENOMEM;
1878 skb_copy_decrypted(buff, skb);
1879
1880 sk->sk_wmem_queued += buff->truesize;
1881 sk_mem_charge(sk, buff->truesize);
1882 buff->truesize += nlen;
1883 skb->truesize -= nlen;
1884
1885
1886 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1887 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1888 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1889
1890
1891 flags = TCP_SKB_CB(skb)->tcp_flags;
1892 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1893 TCP_SKB_CB(buff)->tcp_flags = flags;
1894
1895
1896 TCP_SKB_CB(buff)->sacked = 0;
1897
1898 tcp_skb_fragment_eor(skb, buff);
1899
1900 buff->ip_summed = CHECKSUM_PARTIAL;
1901 skb_split(skb, buff, len);
1902 tcp_fragment_tstamp(skb, buff);
1903
1904
1905 tcp_set_skb_tso_segs(skb, mss_now);
1906 tcp_set_skb_tso_segs(buff, mss_now);
1907
1908
1909 __skb_header_release(buff);
1910 tcp_insert_write_queue_after(skb, buff, sk, TCP_FRAG_IN_WRITE_QUEUE);
1911
1912 return 0;
1913}
1914
1915
1916
1917
1918
1919
1920static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1921 bool *is_cwnd_limited,
1922 bool *is_rwnd_limited,
1923 u32 max_segs)
1924{
1925 const struct inet_connection_sock *icsk = inet_csk(sk);
1926 u32 send_win, cong_win, limit, in_flight;
1927 struct tcp_sock *tp = tcp_sk(sk);
1928 struct sk_buff *head;
1929 int win_divisor;
1930 s64 delta;
1931
1932 if (icsk->icsk_ca_state >= TCP_CA_Recovery)
1933 goto send_now;
1934
1935
1936
1937
1938
1939
1940 delta = tp->tcp_clock_cache - tp->tcp_wstamp_ns - NSEC_PER_MSEC;
1941 if (delta > 0)
1942 goto send_now;
1943
1944 in_flight = tcp_packets_in_flight(tp);
1945
1946 BUG_ON(tcp_skb_pcount(skb) <= 1);
1947 BUG_ON(tp->snd_cwnd <= in_flight);
1948
1949 send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1950
1951
1952 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
1953
1954 limit = min(send_win, cong_win);
1955
1956
1957 if (limit >= max_segs * tp->mss_cache)
1958 goto send_now;
1959
1960
1961 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
1962 goto send_now;
1963
1964 win_divisor = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor);
1965 if (win_divisor) {
1966 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
1967
1968
1969
1970
1971 chunk /= win_divisor;
1972 if (limit >= chunk)
1973 goto send_now;
1974 } else {
1975
1976
1977
1978
1979
1980 if (limit > tcp_max_tso_deferred_mss(tp) * tp->mss_cache)
1981 goto send_now;
1982 }
1983
1984
1985 head = tcp_rtx_queue_head(sk);
1986 if (!head)
1987 goto send_now;
1988 delta = tp->tcp_clock_cache - head->tstamp;
1989
1990 if ((s64)(delta - (u64)NSEC_PER_USEC * (tp->srtt_us >> 4)) < 0)
1991 goto send_now;
1992
1993
1994
1995
1996
1997
1998
1999 if (cong_win < send_win) {
2000 if (cong_win <= skb->len) {
2001 *is_cwnd_limited = true;
2002 return true;
2003 }
2004 } else {
2005 if (send_win <= skb->len) {
2006 *is_rwnd_limited = true;
2007 return true;
2008 }
2009 }
2010
2011
2012 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) ||
2013 TCP_SKB_CB(skb)->eor)
2014 goto send_now;
2015
2016 return true;
2017
2018send_now:
2019 return false;
2020}
2021
2022static inline void tcp_mtu_check_reprobe(struct sock *sk)
2023{
2024 struct inet_connection_sock *icsk = inet_csk(sk);
2025 struct tcp_sock *tp = tcp_sk(sk);
2026 struct net *net = sock_net(sk);
2027 u32 interval;
2028 s32 delta;
2029
2030 interval = net->ipv4.sysctl_tcp_probe_interval;
2031 delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp;
2032 if (unlikely(delta >= interval * HZ)) {
2033 int mss = tcp_current_mss(sk);
2034
2035
2036 icsk->icsk_mtup.probe_size = 0;
2037 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp +
2038 sizeof(struct tcphdr) +
2039 icsk->icsk_af_ops->net_header_len;
2040 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
2041
2042
2043 icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
2044 }
2045}
2046
2047static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
2048{
2049 struct sk_buff *skb, *next;
2050
2051 skb = tcp_send_head(sk);
2052 tcp_for_write_queue_from_safe(skb, next, sk) {
2053 if (len <= skb->len)
2054 break;
2055
2056 if (unlikely(TCP_SKB_CB(skb)->eor) || tcp_has_tx_tstamp(skb))
2057 return false;
2058
2059 len -= skb->len;
2060 }
2061
2062 return true;
2063}
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074static int tcp_mtu_probe(struct sock *sk)
2075{
2076 struct inet_connection_sock *icsk = inet_csk(sk);
2077 struct tcp_sock *tp = tcp_sk(sk);
2078 struct sk_buff *skb, *nskb, *next;
2079 struct net *net = sock_net(sk);
2080 int probe_size;
2081 int size_needed;
2082 int copy, len;
2083 int mss_now;
2084 int interval;
2085
2086
2087
2088
2089
2090
2091 if (likely(!icsk->icsk_mtup.enabled ||
2092 icsk->icsk_mtup.probe_size ||
2093 inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
2094 tp->snd_cwnd < 11 ||
2095 tp->rx_opt.num_sacks || tp->rx_opt.dsack))
2096 return -1;
2097
2098
2099
2100
2101
2102 mss_now = tcp_current_mss(sk);
2103 probe_size = tcp_mtu_to_mss(sk, (icsk->icsk_mtup.search_high +
2104 icsk->icsk_mtup.search_low) >> 1);
2105 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
2106 interval = icsk->icsk_mtup.search_high - icsk->icsk_mtup.search_low;
2107
2108
2109
2110
2111 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
2112 interval < net->ipv4.sysctl_tcp_probe_threshold) {
2113
2114
2115
2116 tcp_mtu_check_reprobe(sk);
2117 return -1;
2118 }
2119
2120
2121 if (tp->write_seq - tp->snd_nxt < size_needed)
2122 return -1;
2123
2124 if (tp->snd_wnd < size_needed)
2125 return -1;
2126 if (after(tp->snd_nxt + size_needed, tcp_wnd_end(tp)))
2127 return 0;
2128
2129
2130 if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
2131 if (!tcp_packets_in_flight(tp))
2132 return -1;
2133 else
2134 return 0;
2135 }
2136
2137 if (!tcp_can_coalesce_send_queue_head(sk, probe_size))
2138 return -1;
2139
2140
2141 nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
2142 if (!nskb)
2143 return -1;
2144 sk->sk_wmem_queued += nskb->truesize;
2145 sk_mem_charge(sk, nskb->truesize);
2146
2147 skb = tcp_send_head(sk);
2148 skb_copy_decrypted(nskb, skb);
2149
2150 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
2151 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
2152 TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
2153 TCP_SKB_CB(nskb)->sacked = 0;
2154 nskb->csum = 0;
2155 nskb->ip_summed = CHECKSUM_PARTIAL;
2156
2157 tcp_insert_write_queue_before(nskb, skb, sk);
2158 tcp_highest_sack_replace(sk, skb, nskb);
2159
2160 len = 0;
2161 tcp_for_write_queue_from_safe(skb, next, sk) {
2162 copy = min_t(int, skb->len, probe_size - len);
2163 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
2164
2165 if (skb->len <= copy) {
2166
2167
2168 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
2169
2170
2171
2172 TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor;
2173 tcp_skb_collapse_tstamp(nskb, skb);
2174 tcp_unlink_write_queue(skb, sk);
2175 sk_wmem_free_skb(sk, skb);
2176 } else {
2177 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
2178 ~(TCPHDR_FIN|TCPHDR_PSH);
2179 if (!skb_shinfo(skb)->nr_frags) {
2180 skb_pull(skb, copy);
2181 } else {
2182 __pskb_trim_head(skb, copy);
2183 tcp_set_skb_tso_segs(skb, mss_now);
2184 }
2185 TCP_SKB_CB(skb)->seq += copy;
2186 }
2187
2188 len += copy;
2189
2190 if (len >= probe_size)
2191 break;
2192 }
2193 tcp_init_tso_segs(nskb, nskb->len);
2194
2195
2196
2197
2198 if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
2199
2200
2201 tp->snd_cwnd--;
2202 tcp_event_new_data_sent(sk, nskb);
2203
2204 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
2205 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
2206 tp->mtu_probe.probe_seq_end = TCP_SKB_CB(nskb)->end_seq;
2207
2208 return 1;
2209 }
2210
2211 return -1;
2212}
2213
2214static bool tcp_pacing_check(struct sock *sk)
2215{
2216 struct tcp_sock *tp = tcp_sk(sk);
2217
2218 if (!tcp_needs_internal_pacing(sk))
2219 return false;
2220
2221 if (tp->tcp_wstamp_ns <= tp->tcp_clock_cache)
2222 return false;
2223
2224 if (!hrtimer_is_queued(&tp->pacing_timer)) {
2225 hrtimer_start(&tp->pacing_timer,
2226 ns_to_ktime(tp->tcp_wstamp_ns),
2227 HRTIMER_MODE_ABS_PINNED_SOFT);
2228 sock_hold(sk);
2229 }
2230 return true;
2231}
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
2245 unsigned int factor)
2246{
2247 unsigned long limit;
2248
2249 limit = max_t(unsigned long,
2250 2 * skb->truesize,
2251 sk->sk_pacing_rate >> sk->sk_pacing_shift);
2252 if (sk->sk_pacing_status == SK_PACING_NONE)
2253 limit = min_t(unsigned long, limit,
2254 sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
2255 limit <<= factor;
2256
2257 if (static_branch_unlikely(&tcp_tx_delay_enabled) &&
2258 tcp_sk(sk)->tcp_tx_delay) {
2259 u64 extra_bytes = (u64)sk->sk_pacing_rate * tcp_sk(sk)->tcp_tx_delay;
2260
2261
2262
2263
2264
2265
2266 extra_bytes >>= (20 - 1);
2267 limit += extra_bytes;
2268 }
2269 if (refcount_read(&sk->sk_wmem_alloc) > limit) {
2270
2271
2272
2273
2274
2275 if (tcp_rtx_queue_empty(sk))
2276 return false;
2277
2278 set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
2279
2280
2281
2282
2283 smp_mb__after_atomic();
2284 if (refcount_read(&sk->sk_wmem_alloc) > limit)
2285 return true;
2286 }
2287 return false;
2288}
2289
2290static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new)
2291{
2292 const u32 now = tcp_jiffies32;
2293 enum tcp_chrono old = tp->chrono_type;
2294
2295 if (old > TCP_CHRONO_UNSPEC)
2296 tp->chrono_stat[old - 1] += now - tp->chrono_start;
2297 tp->chrono_start = now;
2298 tp->chrono_type = new;
2299}
2300
2301void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type)
2302{
2303 struct tcp_sock *tp = tcp_sk(sk);
2304
2305
2306
2307
2308
2309
2310 if (type > tp->chrono_type)
2311 tcp_chrono_set(tp, type);
2312}
2313
2314void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type)
2315{
2316 struct tcp_sock *tp = tcp_sk(sk);
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326 if (tcp_rtx_and_write_queues_empty(sk))
2327 tcp_chrono_set(tp, TCP_CHRONO_UNSPEC);
2328 else if (type == tp->chrono_type)
2329 tcp_chrono_set(tp, TCP_CHRONO_BUSY);
2330}
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2347 int push_one, gfp_t gfp)
2348{
2349 struct tcp_sock *tp = tcp_sk(sk);
2350 struct sk_buff *skb;
2351 unsigned int tso_segs, sent_pkts;
2352 int cwnd_quota;
2353 int result;
2354 bool is_cwnd_limited = false, is_rwnd_limited = false;
2355 u32 max_segs;
2356
2357 sent_pkts = 0;
2358
2359 tcp_mstamp_refresh(tp);
2360 if (!push_one) {
2361
2362 result = tcp_mtu_probe(sk);
2363 if (!result) {
2364 return false;
2365 } else if (result > 0) {
2366 sent_pkts = 1;
2367 }
2368 }
2369
2370 max_segs = tcp_tso_segs(sk, mss_now);
2371 while ((skb = tcp_send_head(sk))) {
2372 unsigned int limit;
2373
2374 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
2375
2376 skb->skb_mstamp_ns = tp->tcp_wstamp_ns = tp->tcp_clock_cache;
2377 list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
2378 tcp_init_tso_segs(skb, mss_now);
2379 goto repair;
2380 }
2381
2382 if (tcp_pacing_check(sk))
2383 break;
2384
2385 tso_segs = tcp_init_tso_segs(skb, mss_now);
2386 BUG_ON(!tso_segs);
2387
2388 cwnd_quota = tcp_cwnd_test(tp, skb);
2389 if (!cwnd_quota) {
2390 if (push_one == 2)
2391
2392 cwnd_quota = 1;
2393 else
2394 break;
2395 }
2396
2397 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) {
2398 is_rwnd_limited = true;
2399 break;
2400 }
2401
2402 if (tso_segs == 1) {
2403 if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
2404 (tcp_skb_is_last(sk, skb) ?
2405 nonagle : TCP_NAGLE_PUSH))))
2406 break;
2407 } else {
2408 if (!push_one &&
2409 tcp_tso_should_defer(sk, skb, &is_cwnd_limited,
2410 &is_rwnd_limited, max_segs))
2411 break;
2412 }
2413
2414 limit = mss_now;
2415 if (tso_segs > 1 && !tcp_urg_mode(tp))
2416 limit = tcp_mss_split_point(sk, skb, mss_now,
2417 min_t(unsigned int,
2418 cwnd_quota,
2419 max_segs),
2420 nonagle);
2421
2422 if (skb->len > limit &&
2423 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
2424 break;
2425
2426 if (tcp_small_queue_check(sk, skb, 0))
2427 break;
2428
2429 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
2430 break;
2431
2432repair:
2433
2434
2435
2436 tcp_event_new_data_sent(sk, skb);
2437
2438 tcp_minshall_update(tp, mss_now, skb);
2439 sent_pkts += tcp_skb_pcount(skb);
2440
2441 if (push_one)
2442 break;
2443 }
2444
2445 if (is_rwnd_limited)
2446 tcp_chrono_start(sk, TCP_CHRONO_RWND_LIMITED);
2447 else
2448 tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED);
2449
2450 if (likely(sent_pkts)) {
2451 if (tcp_in_cwnd_reduction(sk))
2452 tp->prr_out += sent_pkts;
2453
2454
2455 if (push_one != 2)
2456 tcp_schedule_loss_probe(sk, false);
2457 is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
2458 tcp_cwnd_validate(sk, is_cwnd_limited);
2459 return false;
2460 }
2461 return !tp->packets_out && !tcp_write_queue_empty(sk);
2462}
2463
2464bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
2465{
2466 struct inet_connection_sock *icsk = inet_csk(sk);
2467 struct tcp_sock *tp = tcp_sk(sk);
2468 u32 timeout, rto_delta_us;
2469 int early_retrans;
2470
2471
2472
2473
2474 if (tp->fastopen_rsk)
2475 return false;
2476
2477 early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
2478
2479
2480
2481 if ((early_retrans != 3 && early_retrans != 4) ||
2482 !tp->packets_out || !tcp_is_sack(tp) ||
2483 (icsk->icsk_ca_state != TCP_CA_Open &&
2484 icsk->icsk_ca_state != TCP_CA_CWR))
2485 return false;
2486
2487
2488
2489
2490
2491 if (tp->srtt_us) {
2492 timeout = usecs_to_jiffies(tp->srtt_us >> 2);
2493 if (tp->packets_out == 1)
2494 timeout += TCP_RTO_MIN;
2495 else
2496 timeout += TCP_TIMEOUT_MIN;
2497 } else {
2498 timeout = TCP_TIMEOUT_INIT;
2499 }
2500
2501
2502 rto_delta_us = advancing_rto ?
2503 jiffies_to_usecs(inet_csk(sk)->icsk_rto) :
2504 tcp_rto_delta_us(sk);
2505 if (rto_delta_us > 0)
2506 timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us));
2507
2508 tcp_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
2509 TCP_RTO_MAX, NULL);
2510 return true;
2511}
2512
2513
2514
2515
2516
2517static bool skb_still_in_host_queue(const struct sock *sk,
2518 const struct sk_buff *skb)
2519{
2520 if (unlikely(skb_fclone_busy(sk, skb))) {
2521 NET_INC_STATS(sock_net(sk),
2522 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
2523 return true;
2524 }
2525 return false;
2526}
2527
2528
2529
2530
2531void tcp_send_loss_probe(struct sock *sk)
2532{
2533 struct tcp_sock *tp = tcp_sk(sk);
2534 struct sk_buff *skb;
2535 int pcount;
2536 int mss = tcp_current_mss(sk);
2537
2538 skb = tcp_send_head(sk);
2539 if (skb && tcp_snd_wnd_test(tp, skb, mss)) {
2540 pcount = tp->packets_out;
2541 tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
2542 if (tp->packets_out > pcount)
2543 goto probe_sent;
2544 goto rearm_timer;
2545 }
2546 skb = skb_rb_last(&sk->tcp_rtx_queue);
2547 if (unlikely(!skb)) {
2548 WARN_ONCE(tp->packets_out,
2549 "invalid inflight: %u state %u cwnd %u mss %d\n",
2550 tp->packets_out, sk->sk_state, tp->snd_cwnd, mss);
2551 inet_csk(sk)->icsk_pending = 0;
2552 return;
2553 }
2554
2555
2556 if (tp->tlp_high_seq)
2557 goto rearm_timer;
2558
2559 if (skb_still_in_host_queue(sk, skb))
2560 goto rearm_timer;
2561
2562 pcount = tcp_skb_pcount(skb);
2563 if (WARN_ON(!pcount))
2564 goto rearm_timer;
2565
2566 if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
2567 if (unlikely(tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
2568 (pcount - 1) * mss, mss,
2569 GFP_ATOMIC)))
2570 goto rearm_timer;
2571 skb = skb_rb_next(skb);
2572 }
2573
2574 if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
2575 goto rearm_timer;
2576
2577 if (__tcp_retransmit_skb(sk, skb, 1))
2578 goto rearm_timer;
2579
2580
2581 tp->tlp_high_seq = tp->snd_nxt;
2582
2583probe_sent:
2584 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES);
2585
2586 inet_csk(sk)->icsk_pending = 0;
2587rearm_timer:
2588 tcp_rearm_rto(sk);
2589}
2590
2591
2592
2593
2594
2595void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
2596 int nonagle)
2597{
2598
2599
2600
2601
2602 if (unlikely(sk->sk_state == TCP_CLOSE))
2603 return;
2604
2605 if (tcp_write_xmit(sk, cur_mss, nonagle, 0,
2606 sk_gfp_mask(sk, GFP_ATOMIC)))
2607 tcp_check_probe_timer(sk);
2608}
2609
2610
2611
2612
2613void tcp_push_one(struct sock *sk, unsigned int mss_now)
2614{
2615 struct sk_buff *skb = tcp_send_head(sk);
2616
2617 BUG_ON(!skb || skb->len < mss_now);
2618
2619 tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation);
2620}
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674u32 __tcp_select_window(struct sock *sk)
2675{
2676 struct inet_connection_sock *icsk = inet_csk(sk);
2677 struct tcp_sock *tp = tcp_sk(sk);
2678
2679
2680
2681
2682
2683
2684 int mss = icsk->icsk_ack.rcv_mss;
2685 int free_space = tcp_space(sk);
2686 int allowed_space = tcp_full_space(sk);
2687 int full_space = min_t(int, tp->window_clamp, allowed_space);
2688 int window;
2689
2690 if (unlikely(mss > full_space)) {
2691 mss = full_space;
2692 if (mss <= 0)
2693 return 0;
2694 }
2695 if (free_space < (full_space >> 1)) {
2696 icsk->icsk_ack.quick = 0;
2697
2698 if (tcp_under_memory_pressure(sk))
2699 tp->rcv_ssthresh = min(tp->rcv_ssthresh,
2700 4U * tp->advmss);
2701
2702
2703
2704
2705 free_space = round_down(free_space, 1 << tp->rx_opt.rcv_wscale);
2706
2707
2708
2709
2710
2711
2712
2713
2714 if (free_space < (allowed_space >> 4) || free_space < mss)
2715 return 0;
2716 }
2717
2718 if (free_space > tp->rcv_ssthresh)
2719 free_space = tp->rcv_ssthresh;
2720
2721
2722
2723
2724 if (tp->rx_opt.rcv_wscale) {
2725 window = free_space;
2726
2727
2728
2729
2730
2731 window = ALIGN(window, (1 << tp->rx_opt.rcv_wscale));
2732 } else {
2733 window = tp->rcv_wnd;
2734
2735
2736
2737
2738
2739
2740
2741
2742 if (window <= free_space - mss || window > free_space)
2743 window = rounddown(free_space, mss);
2744 else if (mss == full_space &&
2745 free_space > window + (full_space >> 1))
2746 window = free_space;
2747 }
2748
2749 return window;
2750}
2751
2752void tcp_skb_collapse_tstamp(struct sk_buff *skb,
2753 const struct sk_buff *next_skb)
2754{
2755 if (unlikely(tcp_has_tx_tstamp(next_skb))) {
2756 const struct skb_shared_info *next_shinfo =
2757 skb_shinfo(next_skb);
2758 struct skb_shared_info *shinfo = skb_shinfo(skb);
2759
2760 shinfo->tx_flags |= next_shinfo->tx_flags & SKBTX_ANY_TSTAMP;
2761 shinfo->tskey = next_shinfo->tskey;
2762 TCP_SKB_CB(skb)->txstamp_ack |=
2763 TCP_SKB_CB(next_skb)->txstamp_ack;
2764 }
2765}
2766
2767
2768static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2769{
2770 struct tcp_sock *tp = tcp_sk(sk);
2771 struct sk_buff *next_skb = skb_rb_next(skb);
2772 int next_skb_size;
2773
2774 next_skb_size = next_skb->len;
2775
2776 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
2777
2778 if (next_skb_size) {
2779 if (next_skb_size <= skb_availroom(skb))
2780 skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
2781 next_skb_size);
2782 else if (!tcp_skb_shift(skb, next_skb, 1, next_skb_size))
2783 return false;
2784 }
2785 tcp_highest_sack_replace(sk, next_skb, skb);
2786
2787
2788 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
2789
2790
2791 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags;
2792
2793
2794
2795
2796 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
2797 TCP_SKB_CB(skb)->eor = TCP_SKB_CB(next_skb)->eor;
2798
2799
2800 tcp_clear_retrans_hints_partial(tp);
2801 if (next_skb == tp->retransmit_skb_hint)
2802 tp->retransmit_skb_hint = skb;
2803
2804 tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb));
2805
2806 tcp_skb_collapse_tstamp(skb, next_skb);
2807
2808 tcp_rtx_queue_unlink_and_free(next_skb, sk);
2809 return true;
2810}
2811
2812
2813static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
2814{
2815 if (tcp_skb_pcount(skb) > 1)
2816 return false;
2817 if (skb_cloned(skb))
2818 return false;
2819
2820 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2821 return false;
2822
2823 return true;
2824}
2825
2826
2827
2828
2829static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2830 int space)
2831{
2832 struct tcp_sock *tp = tcp_sk(sk);
2833 struct sk_buff *skb = to, *tmp;
2834 bool first = true;
2835
2836 if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
2837 return;
2838 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2839 return;
2840
2841 skb_rbtree_walk_from_safe(skb, tmp) {
2842 if (!tcp_can_collapse(sk, skb))
2843 break;
2844
2845 if (!tcp_skb_can_collapse_to(to))
2846 break;
2847
2848 space -= skb->len;
2849
2850 if (first) {
2851 first = false;
2852 continue;
2853 }
2854
2855 if (space < 0)
2856 break;
2857
2858 if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
2859 break;
2860
2861 if (!tcp_collapse_retrans(sk, to))
2862 break;
2863 }
2864}
2865
2866
2867
2868
2869
2870int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2871{
2872 struct inet_connection_sock *icsk = inet_csk(sk);
2873 struct tcp_sock *tp = tcp_sk(sk);
2874 unsigned int cur_mss;
2875 int diff, len, err;
2876
2877
2878
2879 if (icsk->icsk_mtup.probe_size)
2880 icsk->icsk_mtup.probe_size = 0;
2881
2882
2883
2884
2885 if (refcount_read(&sk->sk_wmem_alloc) >
2886 min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2),
2887 sk->sk_sndbuf))
2888 return -EAGAIN;
2889
2890 if (skb_still_in_host_queue(sk, skb))
2891 return -EBUSY;
2892
2893 if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
2894 if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) {
2895 WARN_ON_ONCE(1);
2896 return -EINVAL;
2897 }
2898 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
2899 return -ENOMEM;
2900 }
2901
2902 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
2903 return -EHOSTUNREACH;
2904
2905 cur_mss = tcp_current_mss(sk);
2906
2907
2908
2909
2910
2911
2912 if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) &&
2913 TCP_SKB_CB(skb)->seq != tp->snd_una)
2914 return -EAGAIN;
2915
2916 len = cur_mss * segs;
2917 if (skb->len > len) {
2918 if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len,
2919 cur_mss, GFP_ATOMIC))
2920 return -ENOMEM;
2921 } else {
2922 if (skb_unclone(skb, GFP_ATOMIC))
2923 return -ENOMEM;
2924
2925 diff = tcp_skb_pcount(skb);
2926 tcp_set_skb_tso_segs(skb, cur_mss);
2927 diff -= tcp_skb_pcount(skb);
2928 if (diff)
2929 tcp_adjust_pcount(sk, skb, diff);
2930 if (skb->len < cur_mss)
2931 tcp_retrans_try_collapse(sk, skb, cur_mss);
2932 }
2933
2934
2935 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN_ECN) == TCPHDR_SYN_ECN)
2936 tcp_ecn_clear_syn(sk, skb);
2937
2938
2939 segs = tcp_skb_pcount(skb);
2940 TCP_ADD_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS, segs);
2941 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2942 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
2943 tp->total_retrans += segs;
2944 tp->bytes_retrans += skb->len;
2945
2946
2947
2948
2949
2950 if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) ||
2951 skb_headroom(skb) >= 0xFFFF)) {
2952 struct sk_buff *nskb;
2953
2954 tcp_skb_tsorted_save(skb) {
2955 nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
2956 err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
2957 -ENOBUFS;
2958 } tcp_skb_tsorted_restore(skb);
2959
2960 if (!err) {
2961 tcp_update_skb_after_send(sk, skb, tp->tcp_wstamp_ns);
2962 tcp_rate_skb_sent(sk, skb);
2963 }
2964 } else {
2965 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2966 }
2967
2968
2969
2970
2971 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
2972
2973 if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG))
2974 tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB,
2975 TCP_SKB_CB(skb)->seq, segs, err);
2976
2977 if (likely(!err)) {
2978 trace_tcp_retransmit_skb(sk, skb);
2979 } else if (err != -EBUSY) {
2980 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs);
2981 }
2982 return err;
2983}
2984
2985int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2986{
2987 struct tcp_sock *tp = tcp_sk(sk);
2988 int err = __tcp_retransmit_skb(sk, skb, segs);
2989
2990 if (err == 0) {
2991#if FASTRETRANS_DEBUG > 0
2992 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2993 net_dbg_ratelimited("retrans_out leaked\n");
2994 }
2995#endif
2996 TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
2997 tp->retrans_out += tcp_skb_pcount(skb);
2998 }
2999
3000
3001 if (!tp->retrans_stamp)
3002 tp->retrans_stamp = tcp_skb_timestamp(skb);
3003
3004 if (tp->undo_retrans < 0)
3005 tp->undo_retrans = 0;
3006 tp->undo_retrans += tcp_skb_pcount(skb);
3007 return err;
3008}
3009
3010
3011
3012
3013
3014
3015void tcp_xmit_retransmit_queue(struct sock *sk)
3016{
3017 const struct inet_connection_sock *icsk = inet_csk(sk);
3018 struct sk_buff *skb, *rtx_head, *hole = NULL;
3019 struct tcp_sock *tp = tcp_sk(sk);
3020 u32 max_segs;
3021 int mib_idx;
3022
3023 if (!tp->packets_out)
3024 return;
3025
3026 rtx_head = tcp_rtx_queue_head(sk);
3027 skb = tp->retransmit_skb_hint ?: rtx_head;
3028 max_segs = tcp_tso_segs(sk, tcp_current_mss(sk));
3029 skb_rbtree_walk_from(skb) {
3030 __u8 sacked;
3031 int segs;
3032
3033 if (tcp_pacing_check(sk))
3034 break;
3035
3036
3037 if (!hole)
3038 tp->retransmit_skb_hint = skb;
3039
3040 segs = tp->snd_cwnd - tcp_packets_in_flight(tp);
3041 if (segs <= 0)
3042 return;
3043 sacked = TCP_SKB_CB(skb)->sacked;
3044
3045
3046
3047 segs = min_t(int, segs, max_segs);
3048
3049 if (tp->retrans_out >= tp->lost_out) {
3050 break;
3051 } else if (!(sacked & TCPCB_LOST)) {
3052 if (!hole && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
3053 hole = skb;
3054 continue;
3055
3056 } else {
3057 if (icsk->icsk_ca_state != TCP_CA_Loss)
3058 mib_idx = LINUX_MIB_TCPFASTRETRANS;
3059 else
3060 mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
3061 }
3062
3063 if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
3064 continue;
3065
3066 if (tcp_small_queue_check(sk, skb, 1))
3067 return;
3068
3069 if (tcp_retransmit_skb(sk, skb, segs))
3070 return;
3071
3072 NET_ADD_STATS(sock_net(sk), mib_idx, tcp_skb_pcount(skb));
3073
3074 if (tcp_in_cwnd_reduction(sk))
3075 tp->prr_out += tcp_skb_pcount(skb);
3076
3077 if (skb == rtx_head &&
3078 icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT)
3079 tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3080 inet_csk(sk)->icsk_rto,
3081 TCP_RTO_MAX,
3082 skb);
3083 }
3084}
3085
3086
3087
3088
3089
3090
3091
3092
3093void sk_forced_mem_schedule(struct sock *sk, int size)
3094{
3095 int amt;
3096
3097 if (size <= sk->sk_forward_alloc)
3098 return;
3099 amt = sk_mem_pages(size);
3100 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
3101 sk_memory_allocated_add(sk, amt);
3102
3103 if (mem_cgroup_sockets_enabled && sk->sk_memcg)
3104 mem_cgroup_charge_skmem(sk->sk_memcg, amt);
3105}
3106
3107
3108
3109
3110void tcp_send_fin(struct sock *sk)
3111{
3112 struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk);
3113 struct tcp_sock *tp = tcp_sk(sk);
3114
3115
3116
3117
3118
3119
3120 if (!tskb && tcp_under_memory_pressure(sk))
3121 tskb = skb_rb_last(&sk->tcp_rtx_queue);
3122
3123 if (tskb) {
3124 TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
3125 TCP_SKB_CB(tskb)->end_seq++;
3126 tp->write_seq++;
3127 if (tcp_write_queue_empty(sk)) {
3128
3129
3130
3131
3132
3133
3134 tp->snd_nxt++;
3135 return;
3136 }
3137 } else {
3138 skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation);
3139 if (unlikely(!skb))
3140 return;
3141
3142 INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
3143 skb_reserve(skb, MAX_TCP_HEADER);
3144 sk_forced_mem_schedule(sk, skb->truesize);
3145
3146 tcp_init_nondata_skb(skb, tp->write_seq,
3147 TCPHDR_ACK | TCPHDR_FIN);
3148 tcp_queue_skb(sk, skb);
3149 }
3150 __tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF);
3151}
3152
3153
3154
3155
3156
3157
3158void tcp_send_active_reset(struct sock *sk, gfp_t priority)
3159{
3160 struct sk_buff *skb;
3161
3162 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
3163
3164
3165 skb = alloc_skb(MAX_TCP_HEADER, priority);
3166 if (!skb) {
3167 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
3168 return;
3169 }
3170
3171
3172 skb_reserve(skb, MAX_TCP_HEADER);
3173 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
3174 TCPHDR_ACK | TCPHDR_RST);
3175 tcp_mstamp_refresh(tcp_sk(sk));
3176
3177 if (tcp_transmit_skb(sk, skb, 0, priority))
3178 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
3179
3180
3181
3182
3183 trace_tcp_send_reset(sk, NULL);
3184}
3185
3186
3187
3188
3189
3190
3191
3192int tcp_send_synack(struct sock *sk)
3193{
3194 struct sk_buff *skb;
3195
3196 skb = tcp_rtx_queue_head(sk);
3197 if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
3198 pr_err("%s: wrong queue state\n", __func__);
3199 return -EFAULT;
3200 }
3201 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
3202 if (skb_cloned(skb)) {
3203 struct sk_buff *nskb;
3204
3205 tcp_skb_tsorted_save(skb) {
3206 nskb = skb_copy(skb, GFP_ATOMIC);
3207 } tcp_skb_tsorted_restore(skb);
3208 if (!nskb)
3209 return -ENOMEM;
3210 INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor);
3211 tcp_rtx_queue_unlink_and_free(skb, sk);
3212 __skb_header_release(nskb);
3213 tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb);
3214 sk->sk_wmem_queued += nskb->truesize;
3215 sk_mem_charge(sk, nskb->truesize);
3216 skb = nskb;
3217 }
3218
3219 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK;
3220 tcp_ecn_send_synack(sk, skb);
3221 }
3222 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
3223}
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
3235 struct request_sock *req,
3236 struct tcp_fastopen_cookie *foc,
3237 enum tcp_synack_type synack_type)
3238{
3239 struct inet_request_sock *ireq = inet_rsk(req);
3240 const struct tcp_sock *tp = tcp_sk(sk);
3241 struct tcp_md5sig_key *md5 = NULL;
3242 struct tcp_out_options opts;
3243 struct sk_buff *skb;
3244 int tcp_header_size;
3245 struct tcphdr *th;
3246 int mss;
3247 u64 now;
3248
3249 skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
3250 if (unlikely(!skb)) {
3251 dst_release(dst);
3252 return NULL;
3253 }
3254
3255 skb_reserve(skb, MAX_TCP_HEADER);
3256
3257 switch (synack_type) {
3258 case TCP_SYNACK_NORMAL:
3259 skb_set_owner_w(skb, req_to_sk(req));
3260 break;
3261 case TCP_SYNACK_COOKIE:
3262
3263
3264
3265 break;
3266 case TCP_SYNACK_FASTOPEN:
3267
3268
3269
3270
3271 skb_set_owner_w(skb, (struct sock *)sk);
3272 break;
3273 }
3274 skb_dst_set(skb, dst);
3275
3276 mss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
3277
3278 memset(&opts, 0, sizeof(opts));
3279 now = tcp_clock_ns();
3280#ifdef CONFIG_SYN_COOKIES
3281 if (unlikely(req->cookie_ts))
3282 skb->skb_mstamp_ns = cookie_init_timestamp(req);
3283 else
3284#endif
3285 {
3286 skb->skb_mstamp_ns = now;
3287 if (!tcp_rsk(req)->snt_synack)
3288 tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb);
3289 }
3290
3291#ifdef CONFIG_TCP_MD5SIG
3292 rcu_read_lock();
3293 md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
3294#endif
3295 skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
3296 tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
3297 foc) + sizeof(*th);
3298
3299 skb_push(skb, tcp_header_size);
3300 skb_reset_transport_header(skb);
3301
3302 th = (struct tcphdr *)skb->data;
3303 memset(th, 0, sizeof(struct tcphdr));
3304 th->syn = 1;
3305 th->ack = 1;
3306 tcp_ecn_make_synack(req, th);
3307 th->source = htons(ireq->ir_num);
3308 th->dest = ireq->ir_rmt_port;
3309 skb->mark = ireq->ir_mark;
3310 skb->ip_summed = CHECKSUM_PARTIAL;
3311 th->seq = htonl(tcp_rsk(req)->snt_isn);
3312
3313 th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
3314
3315
3316 th->window = htons(min(req->rsk_rcv_wnd, 65535U));
3317 tcp_options_write((__be32 *)(th + 1), NULL, &opts);
3318 th->doff = (tcp_header_size >> 2);
3319 __TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
3320
3321#ifdef CONFIG_TCP_MD5SIG
3322
3323 if (md5)
3324 tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
3325 md5, req_to_sk(req), skb);
3326 rcu_read_unlock();
3327#endif
3328
3329 skb->skb_mstamp_ns = now;
3330 tcp_add_tx_delay(skb, tp);
3331
3332 return skb;
3333}
3334EXPORT_SYMBOL(tcp_make_synack);
3335
3336static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst)
3337{
3338 struct inet_connection_sock *icsk = inet_csk(sk);
3339 const struct tcp_congestion_ops *ca;
3340 u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
3341
3342 if (ca_key == TCP_CA_UNSPEC)
3343 return;
3344
3345 rcu_read_lock();
3346 ca = tcp_ca_find_key(ca_key);
3347 if (likely(ca && try_module_get(ca->owner))) {
3348 module_put(icsk->icsk_ca_ops->owner);
3349 icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst);
3350 icsk->icsk_ca_ops = ca;
3351 }
3352 rcu_read_unlock();
3353}
3354
3355
3356static void tcp_connect_init(struct sock *sk)
3357{
3358 const struct dst_entry *dst = __sk_dst_get(sk);
3359 struct tcp_sock *tp = tcp_sk(sk);
3360 __u8 rcv_wscale;
3361 u32 rcv_wnd;
3362
3363
3364
3365
3366 tp->tcp_header_len = sizeof(struct tcphdr);
3367 if (sock_net(sk)->ipv4.sysctl_tcp_timestamps)
3368 tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
3369
3370#ifdef CONFIG_TCP_MD5SIG
3371 if (tp->af_specific->md5_lookup(sk, sk))
3372 tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
3373#endif
3374
3375
3376 if (tp->rx_opt.user_mss)
3377 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
3378 tp->max_window = 0;
3379 tcp_mtup_init(sk);
3380 tcp_sync_mss(sk, dst_mtu(dst));
3381
3382 tcp_ca_dst_init(sk, dst);
3383
3384 if (!tp->window_clamp)
3385 tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
3386 tp->advmss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
3387
3388 tcp_initialize_rcv_mss(sk);
3389
3390
3391 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
3392 (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
3393 tp->window_clamp = tcp_full_space(sk);
3394
3395 rcv_wnd = tcp_rwnd_init_bpf(sk);
3396 if (rcv_wnd == 0)
3397 rcv_wnd = dst_metric(dst, RTAX_INITRWND);
3398
3399 tcp_select_initial_window(sk, tcp_full_space(sk),
3400 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
3401 &tp->rcv_wnd,
3402 &tp->window_clamp,
3403 sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
3404 &rcv_wscale,
3405 rcv_wnd);
3406
3407 tp->rx_opt.rcv_wscale = rcv_wscale;
3408 tp->rcv_ssthresh = tp->rcv_wnd;
3409
3410 sk->sk_err = 0;
3411 sock_reset_flag(sk, SOCK_DONE);
3412 tp->snd_wnd = 0;
3413 tcp_init_wl(tp, 0);
3414 tcp_write_queue_purge(sk);
3415 tp->snd_una = tp->write_seq;
3416 tp->snd_sml = tp->write_seq;
3417 tp->snd_up = tp->write_seq;
3418 tp->snd_nxt = tp->write_seq;
3419
3420 if (likely(!tp->repair))
3421 tp->rcv_nxt = 0;
3422 else
3423 tp->rcv_tstamp = tcp_jiffies32;
3424 tp->rcv_wup = tp->rcv_nxt;
3425 tp->copied_seq = tp->rcv_nxt;
3426
3427 inet_csk(sk)->icsk_rto = tcp_timeout_init(sk);
3428 inet_csk(sk)->icsk_retransmits = 0;
3429 tcp_clear_retrans(tp);
3430}
3431
3432static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
3433{
3434 struct tcp_sock *tp = tcp_sk(sk);
3435 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
3436
3437 tcb->end_seq += skb->len;
3438 __skb_header_release(skb);
3439 sk->sk_wmem_queued += skb->truesize;
3440 sk_mem_charge(sk, skb->truesize);
3441 tp->write_seq = tcb->end_seq;
3442 tp->packets_out += tcp_skb_pcount(skb);
3443}
3444
3445
3446
3447
3448
3449
3450
3451
3452static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3453{
3454 struct tcp_sock *tp = tcp_sk(sk);
3455 struct tcp_fastopen_request *fo = tp->fastopen_req;
3456 int space, err = 0;
3457 struct sk_buff *syn_data;
3458
3459 tp->rx_opt.mss_clamp = tp->advmss;
3460 if (!tcp_fastopen_cookie_check(sk, &tp->rx_opt.mss_clamp, &fo->cookie))
3461 goto fallback;
3462
3463
3464
3465
3466
3467 tp->rx_opt.mss_clamp = tcp_mss_clamp(tp, tp->rx_opt.mss_clamp);
3468
3469 space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
3470 MAX_TCP_OPTION_SPACE;
3471
3472 space = min_t(size_t, space, fo->size);
3473
3474
3475 space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));
3476
3477 syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation, false);
3478 if (!syn_data)
3479 goto fallback;
3480 syn_data->ip_summed = CHECKSUM_PARTIAL;
3481 memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
3482 if (space) {
3483 int copied = copy_from_iter(skb_put(syn_data, space), space,
3484 &fo->data->msg_iter);
3485 if (unlikely(!copied)) {
3486 tcp_skb_tsorted_anchor_cleanup(syn_data);
3487 kfree_skb(syn_data);
3488 goto fallback;
3489 }
3490 if (copied != space) {
3491 skb_trim(syn_data, copied);
3492 space = copied;
3493 }
3494 skb_zcopy_set(syn_data, fo->uarg, NULL);
3495 }
3496
3497 if (space == fo->size)
3498 fo->data = NULL;
3499 fo->copied = space;
3500
3501 tcp_connect_queue_skb(sk, syn_data);
3502 if (syn_data->len)
3503 tcp_chrono_start(sk, TCP_CHRONO_BUSY);
3504
3505 err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
3506
3507 syn->skb_mstamp_ns = syn_data->skb_mstamp_ns;
3508
3509
3510
3511
3512
3513
3514 TCP_SKB_CB(syn_data)->seq++;
3515 TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH;
3516 if (!err) {
3517 tp->syn_data = (fo->copied > 0);
3518 tcp_rbtree_insert(&sk->tcp_rtx_queue, syn_data);
3519 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
3520 goto done;
3521 }
3522
3523
3524 __skb_queue_tail(&sk->sk_write_queue, syn_data);
3525 tp->packets_out -= tcp_skb_pcount(syn_data);
3526
3527fallback:
3528
3529 if (fo->cookie.len > 0)
3530 fo->cookie.len = 0;
3531 err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation);
3532 if (err)
3533 tp->syn_fastopen = 0;
3534done:
3535 fo->cookie.len = -1;
3536 return err;
3537}
3538
3539
3540int tcp_connect(struct sock *sk)
3541{
3542 struct tcp_sock *tp = tcp_sk(sk);
3543 struct sk_buff *buff;
3544 int err;
3545
3546 tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB, 0, NULL);
3547
3548 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
3549 return -EHOSTUNREACH;
3550
3551 tcp_connect_init(sk);
3552
3553 if (unlikely(tp->repair)) {
3554 tcp_finish_connect(sk, NULL);
3555 return 0;
3556 }
3557
3558 buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, true);
3559 if (unlikely(!buff))
3560 return -ENOBUFS;
3561
3562 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
3563 tcp_mstamp_refresh(tp);
3564 tp->retrans_stamp = tcp_time_stamp(tp);
3565 tcp_connect_queue_skb(sk, buff);
3566 tcp_ecn_send_syn(sk, buff);
3567 tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
3568
3569
3570 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
3571 tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
3572 if (err == -ECONNREFUSED)
3573 return err;
3574
3575
3576
3577
3578 tp->snd_nxt = tp->write_seq;
3579 tp->pushed_seq = tp->write_seq;
3580 buff = tcp_send_head(sk);
3581 if (unlikely(buff)) {
3582 tp->snd_nxt = TCP_SKB_CB(buff)->seq;
3583 tp->pushed_seq = TCP_SKB_CB(buff)->seq;
3584 }
3585 TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
3586
3587
3588 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3589 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
3590 return 0;
3591}
3592EXPORT_SYMBOL(tcp_connect);
3593
3594
3595
3596
3597
3598void tcp_send_delayed_ack(struct sock *sk)
3599{
3600 struct inet_connection_sock *icsk = inet_csk(sk);
3601 int ato = icsk->icsk_ack.ato;
3602 unsigned long timeout;
3603
3604 if (ato > TCP_DELACK_MIN) {
3605 const struct tcp_sock *tp = tcp_sk(sk);
3606 int max_ato = HZ / 2;
3607
3608 if (inet_csk_in_pingpong_mode(sk) ||
3609 (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
3610 max_ato = TCP_DELACK_MAX;
3611
3612
3613
3614
3615
3616
3617
3618 if (tp->srtt_us) {
3619 int rtt = max_t(int, usecs_to_jiffies(tp->srtt_us >> 3),
3620 TCP_DELACK_MIN);
3621
3622 if (rtt < max_ato)
3623 max_ato = rtt;
3624 }
3625
3626 ato = min(ato, max_ato);
3627 }
3628
3629
3630 timeout = jiffies + ato;
3631
3632
3633 if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
3634
3635
3636
3637 if (icsk->icsk_ack.blocked ||
3638 time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
3639 tcp_send_ack(sk);
3640 return;
3641 }
3642
3643 if (!time_before(timeout, icsk->icsk_ack.timeout))
3644 timeout = icsk->icsk_ack.timeout;
3645 }
3646 icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
3647 icsk->icsk_ack.timeout = timeout;
3648 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
3649}
3650
3651
3652void __tcp_send_ack(struct sock *sk, u32 rcv_nxt)
3653{
3654 struct sk_buff *buff;
3655
3656
3657 if (sk->sk_state == TCP_CLOSE)
3658 return;
3659
3660
3661
3662
3663
3664 buff = alloc_skb(MAX_TCP_HEADER,
3665 sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN));
3666 if (unlikely(!buff)) {
3667 inet_csk_schedule_ack(sk);
3668 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
3669 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
3670 TCP_DELACK_MAX, TCP_RTO_MAX);
3671 return;
3672 }
3673
3674
3675 skb_reserve(buff, MAX_TCP_HEADER);
3676 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
3677
3678
3679
3680
3681
3682 skb_set_tcp_pure_ack(buff);
3683
3684
3685 __tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0, rcv_nxt);
3686}
3687EXPORT_SYMBOL_GPL(__tcp_send_ack);
3688
3689void tcp_send_ack(struct sock *sk)
3690{
3691 __tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt);
3692}
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
3706{
3707 struct tcp_sock *tp = tcp_sk(sk);
3708 struct sk_buff *skb;
3709
3710
3711 skb = alloc_skb(MAX_TCP_HEADER,
3712 sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN));
3713 if (!skb)
3714 return -1;
3715
3716
3717 skb_reserve(skb, MAX_TCP_HEADER);
3718
3719
3720
3721
3722 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
3723 NET_INC_STATS(sock_net(sk), mib);
3724 return tcp_transmit_skb(sk, skb, 0, (__force gfp_t)0);
3725}
3726
3727
3728void tcp_send_window_probe(struct sock *sk)
3729{
3730 if (sk->sk_state == TCP_ESTABLISHED) {
3731 tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
3732 tcp_mstamp_refresh(tcp_sk(sk));
3733 tcp_xmit_probe_skb(sk, 0, LINUX_MIB_TCPWINPROBE);
3734 }
3735}
3736
3737
3738int tcp_write_wakeup(struct sock *sk, int mib)
3739{
3740 struct tcp_sock *tp = tcp_sk(sk);
3741 struct sk_buff *skb;
3742
3743 if (sk->sk_state == TCP_CLOSE)
3744 return -1;
3745
3746 skb = tcp_send_head(sk);
3747 if (skb && before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
3748 int err;
3749 unsigned int mss = tcp_current_mss(sk);
3750 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
3751
3752 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
3753 tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
3754
3755
3756
3757
3758
3759 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
3760 skb->len > mss) {
3761 seg_size = min(seg_size, mss);
3762 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3763 if (tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
3764 skb, seg_size, mss, GFP_ATOMIC))
3765 return -1;
3766 } else if (!tcp_skb_pcount(skb))
3767 tcp_set_skb_tso_segs(skb, mss);
3768
3769 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3770 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
3771 if (!err)
3772 tcp_event_new_data_sent(sk, skb);
3773 return err;
3774 } else {
3775 if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
3776 tcp_xmit_probe_skb(sk, 1, mib);
3777 return tcp_xmit_probe_skb(sk, 0, mib);
3778 }
3779}
3780
3781
3782
3783
3784void tcp_send_probe0(struct sock *sk)
3785{
3786 struct inet_connection_sock *icsk = inet_csk(sk);
3787 struct tcp_sock *tp = tcp_sk(sk);
3788 struct net *net = sock_net(sk);
3789 unsigned long timeout;
3790 int err;
3791
3792 err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
3793
3794 if (tp->packets_out || tcp_write_queue_empty(sk)) {
3795
3796 icsk->icsk_probes_out = 0;
3797 icsk->icsk_backoff = 0;
3798 return;
3799 }
3800
3801 icsk->icsk_probes_out++;
3802 if (err <= 0) {
3803 if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2)
3804 icsk->icsk_backoff++;
3805 timeout = tcp_probe0_when(sk, TCP_RTO_MAX);
3806 } else {
3807
3808
3809
3810 timeout = TCP_RESOURCE_PROBE_INTERVAL;
3811 }
3812 tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, TCP_RTO_MAX, NULL);
3813}
3814
3815int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
3816{
3817 const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific;
3818 struct flowi fl;
3819 int res;
3820
3821 tcp_rsk(req)->txhash = net_tx_rndhash();
3822 res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL);
3823 if (!res) {
3824 __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
3825 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
3826 if (unlikely(tcp_passive_fastopen(sk)))
3827 tcp_sk(sk)->total_retrans++;
3828 trace_tcp_retransmit_synack(sk, req);
3829 }
3830 return res;
3831}
3832EXPORT_SYMBOL(tcp_rtx_synack);
3833