1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37#define pr_fmt(fmt) "TCP: " fmt
38
39#include <net/tcp.h>
40
41#include <linux/compiler.h>
42#include <linux/gfp.h>
43#include <linux/module.h>
44#include <linux/static_key.h>
45
46#include <trace/events/tcp.h>
47
48
49
50
51void tcp_mstamp_refresh(struct tcp_sock *tp)
52{
53 u64 val = tcp_clock_ns();
54
55 if (val > tp->tcp_clock_cache)
56 tp->tcp_clock_cache = val;
57
58 val = div_u64(val, NSEC_PER_USEC);
59 if (val > tp->tcp_mstamp)
60 tp->tcp_mstamp = val;
61}
62
63static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
64 int push_one, gfp_t gfp);
65
66
67static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
68{
69 struct inet_connection_sock *icsk = inet_csk(sk);
70 struct tcp_sock *tp = tcp_sk(sk);
71 unsigned int prior_packets = tp->packets_out;
72
73 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
74
75 __skb_unlink(skb, &sk->sk_write_queue);
76 tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
77
78 tp->packets_out += tcp_skb_pcount(skb);
79 if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
80 tcp_rearm_rto(sk);
81
82 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
83 tcp_skb_pcount(skb));
84}
85
86
87
88
89
90
91
92
93static inline __u32 tcp_acceptable_seq(const struct sock *sk)
94{
95 const struct tcp_sock *tp = tcp_sk(sk);
96
97 if (!before(tcp_wnd_end(tp), tp->snd_nxt) ||
98 (tp->rx_opt.wscale_ok &&
99 ((tp->snd_nxt - tcp_wnd_end(tp)) < (1 << tp->rx_opt.rcv_wscale))))
100 return tp->snd_nxt;
101 else
102 return tcp_wnd_end(tp);
103}
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119static __u16 tcp_advertise_mss(struct sock *sk)
120{
121 struct tcp_sock *tp = tcp_sk(sk);
122 const struct dst_entry *dst = __sk_dst_get(sk);
123 int mss = tp->advmss;
124
125 if (dst) {
126 unsigned int metric = dst_metric_advmss(dst);
127
128 if (metric < mss) {
129 mss = metric;
130 tp->advmss = mss;
131 }
132 }
133
134 return (__u16)mss;
135}
136
137
138
139
140void tcp_cwnd_restart(struct sock *sk, s32 delta)
141{
142 struct tcp_sock *tp = tcp_sk(sk);
143 u32 restart_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
144 u32 cwnd = tp->snd_cwnd;
145
146 tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
147
148 tp->snd_ssthresh = tcp_current_ssthresh(sk);
149 restart_cwnd = min(restart_cwnd, cwnd);
150
151 while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
152 cwnd >>= 1;
153 tp->snd_cwnd = max(cwnd, restart_cwnd);
154 tp->snd_cwnd_stamp = tcp_jiffies32;
155 tp->snd_cwnd_used = 0;
156}
157
158
159static void tcp_event_data_sent(struct tcp_sock *tp,
160 struct sock *sk)
161{
162 struct inet_connection_sock *icsk = inet_csk(sk);
163 const u32 now = tcp_jiffies32;
164
165 if (tcp_packets_in_flight(tp) == 0)
166 tcp_ca_event(sk, CA_EVENT_TX_START);
167
168
169
170
171
172
173 if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) &&
174 (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
175 inet_csk_inc_pingpong_cnt(sk);
176
177 tp->lsndtime = now;
178}
179
180
181static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
182 u32 rcv_nxt)
183{
184 struct tcp_sock *tp = tcp_sk(sk);
185
186 if (unlikely(tp->compressed_ack > TCP_FASTRETRANS_THRESH)) {
187 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
188 tp->compressed_ack - TCP_FASTRETRANS_THRESH);
189 tp->compressed_ack = TCP_FASTRETRANS_THRESH;
190 if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
191 __sock_put(sk);
192 }
193
194 if (unlikely(rcv_nxt != tp->rcv_nxt))
195 return;
196 tcp_dec_quickack_mode(sk, pkts);
197 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
198}
199
200
201
202
203
204
205
206
207void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
208 __u32 *rcv_wnd, __u32 *window_clamp,
209 int wscale_ok, __u8 *rcv_wscale,
210 __u32 init_rcv_wnd)
211{
212 unsigned int space = (__space < 0 ? 0 : __space);
213
214
215 if (*window_clamp == 0)
216 (*window_clamp) = (U16_MAX << TCP_MAX_WSCALE);
217 space = min(*window_clamp, space);
218
219
220 if (space > mss)
221 space = rounddown(space, mss);
222
223
224
225
226
227
228
229
230
231 if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
232 (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
233 else
234 (*rcv_wnd) = min_t(u32, space, U16_MAX);
235
236 if (init_rcv_wnd)
237 *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
238
239 *rcv_wscale = 0;
240 if (wscale_ok) {
241
242 space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
243 space = max_t(u32, space, sysctl_rmem_max);
244 space = min_t(u32, space, *window_clamp);
245 *rcv_wscale = clamp_t(int, ilog2(space) - 15,
246 0, TCP_MAX_WSCALE);
247 }
248
249 (*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp);
250}
251EXPORT_SYMBOL(tcp_select_initial_window);
252
253
254
255
256
257
258static u16 tcp_select_window(struct sock *sk)
259{
260 struct tcp_sock *tp = tcp_sk(sk);
261 u32 old_win = tp->rcv_wnd;
262 u32 cur_win = tcp_receive_window(tp);
263 u32 new_win = __tcp_select_window(sk);
264
265
266 if (new_win < cur_win) {
267
268
269
270
271
272
273
274 if (new_win == 0)
275 NET_INC_STATS(sock_net(sk),
276 LINUX_MIB_TCPWANTZEROWINDOWADV);
277 new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
278 }
279 tp->rcv_wnd = new_win;
280 tp->rcv_wup = tp->rcv_nxt;
281
282
283
284
285 if (!tp->rx_opt.rcv_wscale &&
286 sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
287 new_win = min(new_win, MAX_TCP_WINDOW);
288 else
289 new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
290
291
292 new_win >>= tp->rx_opt.rcv_wscale;
293
294
295 if (new_win == 0) {
296 tp->pred_flags = 0;
297 if (old_win)
298 NET_INC_STATS(sock_net(sk),
299 LINUX_MIB_TCPTOZEROWINDOWADV);
300 } else if (old_win == 0) {
301 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV);
302 }
303
304 return new_win;
305}
306
307
308static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
309{
310 const struct tcp_sock *tp = tcp_sk(sk);
311
312 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
313 if (!(tp->ecn_flags & TCP_ECN_OK))
314 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
315 else if (tcp_ca_needs_ecn(sk) ||
316 tcp_bpf_ca_needs_ecn(sk))
317 INET_ECN_xmit(sk);
318}
319
320
321static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
322{
323 struct tcp_sock *tp = tcp_sk(sk);
324 bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
325 bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
326 tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
327
328 if (!use_ecn) {
329 const struct dst_entry *dst = __sk_dst_get(sk);
330
331 if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
332 use_ecn = true;
333 }
334
335 tp->ecn_flags = 0;
336
337 if (use_ecn) {
338 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
339 tp->ecn_flags = TCP_ECN_OK;
340 if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
341 INET_ECN_xmit(sk);
342 }
343}
344
345static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
346{
347 if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)
348
349
350
351 TCP_SKB_CB(skb)->tcp_flags &= ~(TCPHDR_ECE | TCPHDR_CWR);
352}
353
354static void
355tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)
356{
357 if (inet_rsk(req)->ecn_ok)
358 th->ece = 1;
359}
360
361
362
363
364static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
365 struct tcphdr *th, int tcp_header_len)
366{
367 struct tcp_sock *tp = tcp_sk(sk);
368
369 if (tp->ecn_flags & TCP_ECN_OK) {
370
371 if (skb->len != tcp_header_len &&
372 !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
373 INET_ECN_xmit(sk);
374 if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) {
375 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
376 th->cwr = 1;
377 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
378 }
379 } else if (!tcp_ca_needs_ecn(sk)) {
380
381 INET_ECN_dontxmit(sk);
382 }
383 if (tp->ecn_flags & TCP_ECN_DEMAND_CWR)
384 th->ece = 1;
385 }
386}
387
388
389
390
391static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
392{
393 skb->ip_summed = CHECKSUM_PARTIAL;
394
395 TCP_SKB_CB(skb)->tcp_flags = flags;
396 TCP_SKB_CB(skb)->sacked = 0;
397
398 tcp_skb_pcount_set(skb, 1);
399
400 TCP_SKB_CB(skb)->seq = seq;
401 if (flags & (TCPHDR_SYN | TCPHDR_FIN))
402 seq++;
403 TCP_SKB_CB(skb)->end_seq = seq;
404}
405
406static inline bool tcp_urg_mode(const struct tcp_sock *tp)
407{
408 return tp->snd_una != tp->snd_up;
409}
410
411#define OPTION_SACK_ADVERTISE (1 << 0)
412#define OPTION_TS (1 << 1)
413#define OPTION_MD5 (1 << 2)
414#define OPTION_WSCALE (1 << 3)
415#define OPTION_FAST_OPEN_COOKIE (1 << 8)
416#define OPTION_SMC (1 << 9)
417
418static void smc_options_write(__be32 *ptr, u16 *options)
419{
420#if IS_ENABLED(CONFIG_SMC)
421 if (static_branch_unlikely(&tcp_have_smc)) {
422 if (unlikely(OPTION_SMC & *options)) {
423 *ptr++ = htonl((TCPOPT_NOP << 24) |
424 (TCPOPT_NOP << 16) |
425 (TCPOPT_EXP << 8) |
426 (TCPOLEN_EXP_SMC_BASE));
427 *ptr++ = htonl(TCPOPT_SMC_MAGIC);
428 }
429 }
430#endif
431}
432
433struct tcp_out_options {
434 u16 options;
435 u16 mss;
436 u8 ws;
437 u8 num_sack_blocks;
438 u8 hash_size;
439 __u8 *hash_location;
440 __u32 tsval, tsecr;
441 struct tcp_fastopen_cookie *fastopen_cookie;
442};
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
458 struct tcp_out_options *opts)
459{
460 u16 options = opts->options;
461
462 if (unlikely(OPTION_MD5 & options)) {
463 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
464 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
465
466 opts->hash_location = (__u8 *)ptr;
467 ptr += 4;
468 }
469
470 if (unlikely(opts->mss)) {
471 *ptr++ = htonl((TCPOPT_MSS << 24) |
472 (TCPOLEN_MSS << 16) |
473 opts->mss);
474 }
475
476 if (likely(OPTION_TS & options)) {
477 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
478 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
479 (TCPOLEN_SACK_PERM << 16) |
480 (TCPOPT_TIMESTAMP << 8) |
481 TCPOLEN_TIMESTAMP);
482 options &= ~OPTION_SACK_ADVERTISE;
483 } else {
484 *ptr++ = htonl((TCPOPT_NOP << 24) |
485 (TCPOPT_NOP << 16) |
486 (TCPOPT_TIMESTAMP << 8) |
487 TCPOLEN_TIMESTAMP);
488 }
489 *ptr++ = htonl(opts->tsval);
490 *ptr++ = htonl(opts->tsecr);
491 }
492
493 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
494 *ptr++ = htonl((TCPOPT_NOP << 24) |
495 (TCPOPT_NOP << 16) |
496 (TCPOPT_SACK_PERM << 8) |
497 TCPOLEN_SACK_PERM);
498 }
499
500 if (unlikely(OPTION_WSCALE & options)) {
501 *ptr++ = htonl((TCPOPT_NOP << 24) |
502 (TCPOPT_WINDOW << 16) |
503 (TCPOLEN_WINDOW << 8) |
504 opts->ws);
505 }
506
507 if (unlikely(opts->num_sack_blocks)) {
508 struct tcp_sack_block *sp = tp->rx_opt.dsack ?
509 tp->duplicate_sack : tp->selective_acks;
510 int this_sack;
511
512 *ptr++ = htonl((TCPOPT_NOP << 24) |
513 (TCPOPT_NOP << 16) |
514 (TCPOPT_SACK << 8) |
515 (TCPOLEN_SACK_BASE + (opts->num_sack_blocks *
516 TCPOLEN_SACK_PERBLOCK)));
517
518 for (this_sack = 0; this_sack < opts->num_sack_blocks;
519 ++this_sack) {
520 *ptr++ = htonl(sp[this_sack].start_seq);
521 *ptr++ = htonl(sp[this_sack].end_seq);
522 }
523
524 tp->rx_opt.dsack = 0;
525 }
526
527 if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) {
528 struct tcp_fastopen_cookie *foc = opts->fastopen_cookie;
529 u8 *p = (u8 *)ptr;
530 u32 len;
531
532 if (foc->exp) {
533 len = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
534 *ptr = htonl((TCPOPT_EXP << 24) | (len << 16) |
535 TCPOPT_FASTOPEN_MAGIC);
536 p += TCPOLEN_EXP_FASTOPEN_BASE;
537 } else {
538 len = TCPOLEN_FASTOPEN_BASE + foc->len;
539 *p++ = TCPOPT_FASTOPEN;
540 *p++ = len;
541 }
542
543 memcpy(p, foc->val, foc->len);
544 if ((len & 3) == 2) {
545 p[foc->len] = TCPOPT_NOP;
546 p[foc->len + 1] = TCPOPT_NOP;
547 }
548 ptr += (len + 3) >> 2;
549 }
550
551 smc_options_write(ptr, &options);
552}
553
554static void smc_set_option(const struct tcp_sock *tp,
555 struct tcp_out_options *opts,
556 unsigned int *remaining)
557{
558#if IS_ENABLED(CONFIG_SMC)
559 if (static_branch_unlikely(&tcp_have_smc)) {
560 if (tp->syn_smc) {
561 if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
562 opts->options |= OPTION_SMC;
563 *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
564 }
565 }
566 }
567#endif
568}
569
570static void smc_set_option_cond(const struct tcp_sock *tp,
571 const struct inet_request_sock *ireq,
572 struct tcp_out_options *opts,
573 unsigned int *remaining)
574{
575#if IS_ENABLED(CONFIG_SMC)
576 if (static_branch_unlikely(&tcp_have_smc)) {
577 if (tp->syn_smc && ireq->smc_ok) {
578 if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
579 opts->options |= OPTION_SMC;
580 *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
581 }
582 }
583 }
584#endif
585}
586
587
588
589
590static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
591 struct tcp_out_options *opts,
592 struct tcp_md5sig_key **md5)
593{
594 struct tcp_sock *tp = tcp_sk(sk);
595 unsigned int remaining = MAX_TCP_OPTION_SPACE;
596 struct tcp_fastopen_request *fastopen = tp->fastopen_req;
597
598 *md5 = NULL;
599#ifdef CONFIG_TCP_MD5SIG
600 if (static_branch_unlikely(&tcp_md5_needed) &&
601 rcu_access_pointer(tp->md5sig_info)) {
602 *md5 = tp->af_specific->md5_lookup(sk, sk);
603 if (*md5) {
604 opts->options |= OPTION_MD5;
605 remaining -= TCPOLEN_MD5SIG_ALIGNED;
606 }
607 }
608#endif
609
610
611
612
613
614
615
616
617
618
619 opts->mss = tcp_advertise_mss(sk);
620 remaining -= TCPOLEN_MSS_ALIGNED;
621
622 if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) {
623 opts->options |= OPTION_TS;
624 opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
625 opts->tsecr = tp->rx_opt.ts_recent;
626 remaining -= TCPOLEN_TSTAMP_ALIGNED;
627 }
628 if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
629 opts->ws = tp->rx_opt.rcv_wscale;
630 opts->options |= OPTION_WSCALE;
631 remaining -= TCPOLEN_WSCALE_ALIGNED;
632 }
633 if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) {
634 opts->options |= OPTION_SACK_ADVERTISE;
635 if (unlikely(!(OPTION_TS & opts->options)))
636 remaining -= TCPOLEN_SACKPERM_ALIGNED;
637 }
638
639 if (fastopen && fastopen->cookie.len >= 0) {
640 u32 need = fastopen->cookie.len;
641
642 need += fastopen->cookie.exp ? TCPOLEN_EXP_FASTOPEN_BASE :
643 TCPOLEN_FASTOPEN_BASE;
644 need = (need + 3) & ~3U;
645 if (remaining >= need) {
646 opts->options |= OPTION_FAST_OPEN_COOKIE;
647 opts->fastopen_cookie = &fastopen->cookie;
648 remaining -= need;
649 tp->syn_fastopen = 1;
650 tp->syn_fastopen_exp = fastopen->cookie.exp ? 1 : 0;
651 }
652 }
653
654 smc_set_option(tp, opts, &remaining);
655
656 return MAX_TCP_OPTION_SPACE - remaining;
657}
658
659
660static unsigned int tcp_synack_options(const struct sock *sk,
661 struct request_sock *req,
662 unsigned int mss, struct sk_buff *skb,
663 struct tcp_out_options *opts,
664 const struct tcp_md5sig_key *md5,
665 struct tcp_fastopen_cookie *foc)
666{
667 struct inet_request_sock *ireq = inet_rsk(req);
668 unsigned int remaining = MAX_TCP_OPTION_SPACE;
669
670#ifdef CONFIG_TCP_MD5SIG
671 if (md5) {
672 opts->options |= OPTION_MD5;
673 remaining -= TCPOLEN_MD5SIG_ALIGNED;
674
675
676
677
678
679
680 ireq->tstamp_ok &= !ireq->sack_ok;
681 }
682#endif
683
684
685 opts->mss = mss;
686 remaining -= TCPOLEN_MSS_ALIGNED;
687
688 if (likely(ireq->wscale_ok)) {
689 opts->ws = ireq->rcv_wscale;
690 opts->options |= OPTION_WSCALE;
691 remaining -= TCPOLEN_WSCALE_ALIGNED;
692 }
693 if (likely(ireq->tstamp_ok)) {
694 opts->options |= OPTION_TS;
695 opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off;
696 opts->tsecr = req->ts_recent;
697 remaining -= TCPOLEN_TSTAMP_ALIGNED;
698 }
699 if (likely(ireq->sack_ok)) {
700 opts->options |= OPTION_SACK_ADVERTISE;
701 if (unlikely(!ireq->tstamp_ok))
702 remaining -= TCPOLEN_SACKPERM_ALIGNED;
703 }
704 if (foc != NULL && foc->len >= 0) {
705 u32 need = foc->len;
706
707 need += foc->exp ? TCPOLEN_EXP_FASTOPEN_BASE :
708 TCPOLEN_FASTOPEN_BASE;
709 need = (need + 3) & ~3U;
710 if (remaining >= need) {
711 opts->options |= OPTION_FAST_OPEN_COOKIE;
712 opts->fastopen_cookie = foc;
713 remaining -= need;
714 }
715 }
716
717 smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
718
719 return MAX_TCP_OPTION_SPACE - remaining;
720}
721
722
723
724
725static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb,
726 struct tcp_out_options *opts,
727 struct tcp_md5sig_key **md5)
728{
729 struct tcp_sock *tp = tcp_sk(sk);
730 unsigned int size = 0;
731 unsigned int eff_sacks;
732
733 opts->options = 0;
734
735 *md5 = NULL;
736#ifdef CONFIG_TCP_MD5SIG
737 if (static_branch_unlikely(&tcp_md5_needed) &&
738 rcu_access_pointer(tp->md5sig_info)) {
739 *md5 = tp->af_specific->md5_lookup(sk, sk);
740 if (*md5) {
741 opts->options |= OPTION_MD5;
742 size += TCPOLEN_MD5SIG_ALIGNED;
743 }
744 }
745#endif
746
747 if (likely(tp->rx_opt.tstamp_ok)) {
748 opts->options |= OPTION_TS;
749 opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0;
750 opts->tsecr = tp->rx_opt.ts_recent;
751 size += TCPOLEN_TSTAMP_ALIGNED;
752 }
753
754 eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
755 if (unlikely(eff_sacks)) {
756 const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
757 opts->num_sack_blocks =
758 min_t(unsigned int, eff_sacks,
759 (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
760 TCPOLEN_SACK_PERBLOCK);
761 size += TCPOLEN_SACK_BASE_ALIGNED +
762 opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
763 }
764
765 return size;
766}
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783struct tsq_tasklet {
784 struct tasklet_struct tasklet;
785 struct list_head head;
786};
787static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);
788
789static void tcp_tsq_write(struct sock *sk)
790{
791 if ((1 << sk->sk_state) &
792 (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
793 TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) {
794 struct tcp_sock *tp = tcp_sk(sk);
795
796 if (tp->lost_out > tp->retrans_out &&
797 tp->snd_cwnd > tcp_packets_in_flight(tp)) {
798 tcp_mstamp_refresh(tp);
799 tcp_xmit_retransmit_queue(sk);
800 }
801
802 tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle,
803 0, GFP_ATOMIC);
804 }
805}
806
807static void tcp_tsq_handler(struct sock *sk)
808{
809 bh_lock_sock(sk);
810 if (!sock_owned_by_user(sk))
811 tcp_tsq_write(sk);
812 else if (!test_and_set_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
813 sock_hold(sk);
814 bh_unlock_sock(sk);
815}
816
817
818
819
820
821
822static void tcp_tasklet_func(unsigned long data)
823{
824 struct tsq_tasklet *tsq = (struct tsq_tasklet *)data;
825 LIST_HEAD(list);
826 unsigned long flags;
827 struct list_head *q, *n;
828 struct tcp_sock *tp;
829 struct sock *sk;
830
831 local_irq_save(flags);
832 list_splice_init(&tsq->head, &list);
833 local_irq_restore(flags);
834
835 list_for_each_safe(q, n, &list) {
836 tp = list_entry(q, struct tcp_sock, tsq_node);
837 list_del(&tp->tsq_node);
838
839 sk = (struct sock *)tp;
840 smp_mb__before_atomic();
841 clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags);
842
843 tcp_tsq_handler(sk);
844 sk_free(sk);
845 }
846}
847
848#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \
849 TCPF_WRITE_TIMER_DEFERRED | \
850 TCPF_DELACK_TIMER_DEFERRED | \
851 TCPF_MTU_REDUCED_DEFERRED)
852
853
854
855
856
857
858
859void tcp_release_cb(struct sock *sk)
860{
861 unsigned long flags, nflags;
862
863
864 do {
865 flags = sk->sk_tsq_flags;
866 if (!(flags & TCP_DEFERRED_ALL))
867 return;
868 nflags = flags & ~TCP_DEFERRED_ALL;
869 } while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags);
870
871 if (flags & TCPF_TSQ_DEFERRED) {
872 tcp_tsq_write(sk);
873 __sock_put(sk);
874 }
875
876
877
878
879
880
881
882
883
884 sock_release_ownership(sk);
885
886 if (flags & TCPF_WRITE_TIMER_DEFERRED) {
887 tcp_write_timer_handler(sk);
888 __sock_put(sk);
889 }
890 if (flags & TCPF_DELACK_TIMER_DEFERRED) {
891 tcp_delack_timer_handler(sk);
892 __sock_put(sk);
893 }
894 if (flags & TCPF_MTU_REDUCED_DEFERRED) {
895 inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
896 __sock_put(sk);
897 }
898}
899EXPORT_SYMBOL(tcp_release_cb);
900
901void __init tcp_tasklet_init(void)
902{
903 int i;
904
905 for_each_possible_cpu(i) {
906 struct tsq_tasklet *tsq = &per_cpu(tsq_tasklet, i);
907
908 INIT_LIST_HEAD(&tsq->head);
909 tasklet_init(&tsq->tasklet,
910 tcp_tasklet_func,
911 (unsigned long)tsq);
912 }
913}
914
915
916
917
918
919
920void tcp_wfree(struct sk_buff *skb)
921{
922 struct sock *sk = skb->sk;
923 struct tcp_sock *tp = tcp_sk(sk);
924 unsigned long flags, nval, oval;
925
926
927
928
929 WARN_ON(refcount_sub_and_test(skb->truesize - 1, &sk->sk_wmem_alloc));
930
931
932
933
934
935
936
937
938 if (refcount_read(&sk->sk_wmem_alloc) >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
939 goto out;
940
941 for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) {
942 struct tsq_tasklet *tsq;
943 bool empty;
944
945 if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED))
946 goto out;
947
948 nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED;
949 nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
950 if (nval != oval)
951 continue;
952
953
954 local_irq_save(flags);
955 tsq = this_cpu_ptr(&tsq_tasklet);
956 empty = list_empty(&tsq->head);
957 list_add(&tp->tsq_node, &tsq->head);
958 if (empty)
959 tasklet_schedule(&tsq->tasklet);
960 local_irq_restore(flags);
961 return;
962 }
963out:
964 sk_free(sk);
965}
966
967
968
969
970enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
971{
972 struct tcp_sock *tp = container_of(timer, struct tcp_sock, pacing_timer);
973 struct sock *sk = (struct sock *)tp;
974
975 tcp_tsq_handler(sk);
976 sock_put(sk);
977
978 return HRTIMER_NORESTART;
979}
980
981static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb,
982 u64 prior_wstamp)
983{
984 struct tcp_sock *tp = tcp_sk(sk);
985
986 if (sk->sk_pacing_status != SK_PACING_NONE) {
987 unsigned long rate = sk->sk_pacing_rate;
988
989
990
991
992
993 if (rate != ~0UL && rate && tp->data_segs_out >= 10) {
994 u64 len_ns = div64_ul((u64)skb->len * NSEC_PER_SEC, rate);
995 u64 credit = tp->tcp_wstamp_ns - prior_wstamp;
996
997
998 len_ns -= min_t(u64, len_ns / 2, credit);
999 tp->tcp_wstamp_ns += len_ns;
1000 }
1001 }
1002 list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
1003}
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
1017 int clone_it, gfp_t gfp_mask, u32 rcv_nxt)
1018{
1019 const struct inet_connection_sock *icsk = inet_csk(sk);
1020 struct inet_sock *inet;
1021 struct tcp_sock *tp;
1022 struct tcp_skb_cb *tcb;
1023 struct tcp_out_options opts;
1024 unsigned int tcp_options_size, tcp_header_size;
1025 struct sk_buff *oskb = NULL;
1026 struct tcp_md5sig_key *md5;
1027 struct tcphdr *th;
1028 u64 prior_wstamp;
1029 int err;
1030
1031 BUG_ON(!skb || !tcp_skb_pcount(skb));
1032 tp = tcp_sk(sk);
1033 prior_wstamp = tp->tcp_wstamp_ns;
1034 tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
1035 skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
1036 if (clone_it) {
1037 TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
1038 - tp->snd_una;
1039 oskb = skb;
1040
1041 tcp_skb_tsorted_save(oskb) {
1042 if (unlikely(skb_cloned(oskb)))
1043 skb = pskb_copy(oskb, gfp_mask);
1044 else
1045 skb = skb_clone(oskb, gfp_mask);
1046 } tcp_skb_tsorted_restore(oskb);
1047
1048 if (unlikely(!skb))
1049 return -ENOBUFS;
1050 }
1051
1052 inet = inet_sk(sk);
1053 tcb = TCP_SKB_CB(skb);
1054 memset(&opts, 0, sizeof(opts));
1055
1056 if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
1057 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
1058 else
1059 tcp_options_size = tcp_established_options(sk, skb, &opts,
1060 &md5);
1061 tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
1062
1063
1064
1065
1066
1067
1068
1069
1070 skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1);
1071
1072
1073
1074
1075
1076
1077 skb->pfmemalloc = 0;
1078
1079 skb_push(skb, tcp_header_size);
1080 skb_reset_transport_header(skb);
1081
1082 skb_orphan(skb);
1083 skb->sk = sk;
1084 skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree;
1085 skb_set_hash_from_sk(skb, sk);
1086 refcount_add(skb->truesize, &sk->sk_wmem_alloc);
1087
1088 skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm);
1089
1090
1091 th = (struct tcphdr *)skb->data;
1092 th->source = inet->inet_sport;
1093 th->dest = inet->inet_dport;
1094 th->seq = htonl(tcb->seq);
1095 th->ack_seq = htonl(rcv_nxt);
1096 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
1097 tcb->tcp_flags);
1098
1099 th->check = 0;
1100 th->urg_ptr = 0;
1101
1102
1103 if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
1104 if (before(tp->snd_up, tcb->seq + 0x10000)) {
1105 th->urg_ptr = htons(tp->snd_up - tcb->seq);
1106 th->urg = 1;
1107 } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
1108 th->urg_ptr = htons(0xFFFF);
1109 th->urg = 1;
1110 }
1111 }
1112
1113 tcp_options_write((__be32 *)(th + 1), tp, &opts);
1114 skb_shinfo(skb)->gso_type = sk->sk_gso_type;
1115 if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) {
1116 th->window = htons(tcp_select_window(sk));
1117 tcp_ecn_send(sk, skb, th, tcp_header_size);
1118 } else {
1119
1120
1121
1122 th->window = htons(min(tp->rcv_wnd, 65535U));
1123 }
1124#ifdef CONFIG_TCP_MD5SIG
1125
1126 if (md5) {
1127 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1128 tp->af_specific->calc_md5_hash(opts.hash_location,
1129 md5, sk, skb);
1130 }
1131#endif
1132
1133 icsk->icsk_af_ops->send_check(sk, skb);
1134
1135 if (likely(tcb->tcp_flags & TCPHDR_ACK))
1136 tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);
1137
1138 if (skb->len != tcp_header_size) {
1139 tcp_event_data_sent(tp, sk);
1140 tp->data_segs_out += tcp_skb_pcount(skb);
1141 tp->bytes_sent += skb->len - tcp_header_size;
1142 }
1143
1144 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
1145 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
1146 tcp_skb_pcount(skb));
1147
1148 tp->segs_out += tcp_skb_pcount(skb);
1149
1150 skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
1151 skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);
1152
1153
1154
1155
1156 memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
1157 sizeof(struct inet6_skb_parm)));
1158
1159 err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
1160
1161 if (unlikely(err > 0)) {
1162 tcp_enter_cwr(sk);
1163 err = net_xmit_eval(err);
1164 }
1165 if (!err && oskb) {
1166 tcp_update_skb_after_send(sk, oskb, prior_wstamp);
1167 tcp_rate_skb_sent(sk, oskb);
1168 }
1169 return err;
1170}
1171
1172static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
1173 gfp_t gfp_mask)
1174{
1175 return __tcp_transmit_skb(sk, skb, clone_it, gfp_mask,
1176 tcp_sk(sk)->rcv_nxt);
1177}
1178
1179
1180
1181
1182
1183
1184static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
1185{
1186 struct tcp_sock *tp = tcp_sk(sk);
1187
1188
1189 tp->write_seq = TCP_SKB_CB(skb)->end_seq;
1190 __skb_header_release(skb);
1191 tcp_add_write_queue_tail(sk, skb);
1192 sk->sk_wmem_queued += skb->truesize;
1193 sk_mem_charge(sk, skb->truesize);
1194}
1195
1196
1197static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
1198{
1199 if (skb->len <= mss_now) {
1200
1201
1202
1203 tcp_skb_pcount_set(skb, 1);
1204 TCP_SKB_CB(skb)->tcp_gso_size = 0;
1205 } else {
1206 tcp_skb_pcount_set(skb, DIV_ROUND_UP(skb->len, mss_now));
1207 TCP_SKB_CB(skb)->tcp_gso_size = mss_now;
1208 }
1209}
1210
1211
1212
1213
1214static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
1215{
1216 struct tcp_sock *tp = tcp_sk(sk);
1217
1218 tp->packets_out -= decr;
1219
1220 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1221 tp->sacked_out -= decr;
1222 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
1223 tp->retrans_out -= decr;
1224 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
1225 tp->lost_out -= decr;
1226
1227
1228 if (tcp_is_reno(tp) && decr > 0)
1229 tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
1230
1231 if (tp->lost_skb_hint &&
1232 before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
1233 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
1234 tp->lost_cnt_hint -= decr;
1235
1236 tcp_verify_left_out(tp);
1237}
1238
1239static bool tcp_has_tx_tstamp(const struct sk_buff *skb)
1240{
1241 return TCP_SKB_CB(skb)->txstamp_ack ||
1242 (skb_shinfo(skb)->tx_flags & SKBTX_ANY_TSTAMP);
1243}
1244
1245static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2)
1246{
1247 struct skb_shared_info *shinfo = skb_shinfo(skb);
1248
1249 if (unlikely(tcp_has_tx_tstamp(skb)) &&
1250 !before(shinfo->tskey, TCP_SKB_CB(skb2)->seq)) {
1251 struct skb_shared_info *shinfo2 = skb_shinfo(skb2);
1252 u8 tsflags = shinfo->tx_flags & SKBTX_ANY_TSTAMP;
1253
1254 shinfo->tx_flags &= ~tsflags;
1255 shinfo2->tx_flags |= tsflags;
1256 swap(shinfo->tskey, shinfo2->tskey);
1257 TCP_SKB_CB(skb2)->txstamp_ack = TCP_SKB_CB(skb)->txstamp_ack;
1258 TCP_SKB_CB(skb)->txstamp_ack = 0;
1259 }
1260}
1261
1262static void tcp_skb_fragment_eor(struct sk_buff *skb, struct sk_buff *skb2)
1263{
1264 TCP_SKB_CB(skb2)->eor = TCP_SKB_CB(skb)->eor;
1265 TCP_SKB_CB(skb)->eor = 0;
1266}
1267
1268
1269static void tcp_insert_write_queue_after(struct sk_buff *skb,
1270 struct sk_buff *buff,
1271 struct sock *sk,
1272 enum tcp_queue tcp_queue)
1273{
1274 if (tcp_queue == TCP_FRAG_IN_WRITE_QUEUE)
1275 __skb_queue_after(&sk->sk_write_queue, skb, buff);
1276 else
1277 tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
1278}
1279
1280
1281
1282
1283
1284
1285int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
1286 struct sk_buff *skb, u32 len,
1287 unsigned int mss_now, gfp_t gfp)
1288{
1289 struct tcp_sock *tp = tcp_sk(sk);
1290 struct sk_buff *buff;
1291 int nsize, old_factor;
1292 int nlen;
1293 u8 flags;
1294
1295 if (WARN_ON(len > skb->len))
1296 return -EINVAL;
1297
1298 nsize = skb_headlen(skb) - len;
1299 if (nsize < 0)
1300 nsize = 0;
1301
1302 if (skb_unclone(skb, gfp))
1303 return -ENOMEM;
1304
1305
1306 buff = sk_stream_alloc_skb(sk, nsize, gfp, true);
1307 if (!buff)
1308 return -ENOMEM;
1309
1310 sk->sk_wmem_queued += buff->truesize;
1311 sk_mem_charge(sk, buff->truesize);
1312 nlen = skb->len - len - nsize;
1313 buff->truesize += nlen;
1314 skb->truesize -= nlen;
1315
1316
1317 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1318 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1319 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1320
1321
1322 flags = TCP_SKB_CB(skb)->tcp_flags;
1323 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1324 TCP_SKB_CB(buff)->tcp_flags = flags;
1325 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
1326 tcp_skb_fragment_eor(skb, buff);
1327
1328 skb_split(skb, buff, len);
1329
1330 buff->ip_summed = CHECKSUM_PARTIAL;
1331
1332 buff->tstamp = skb->tstamp;
1333 tcp_fragment_tstamp(skb, buff);
1334
1335 old_factor = tcp_skb_pcount(skb);
1336
1337
1338 tcp_set_skb_tso_segs(skb, mss_now);
1339 tcp_set_skb_tso_segs(buff, mss_now);
1340
1341
1342 TCP_SKB_CB(buff)->tx = TCP_SKB_CB(skb)->tx;
1343
1344
1345
1346
1347 if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) {
1348 int diff = old_factor - tcp_skb_pcount(skb) -
1349 tcp_skb_pcount(buff);
1350
1351 if (diff)
1352 tcp_adjust_pcount(sk, skb, diff);
1353 }
1354
1355
1356 __skb_header_release(buff);
1357 tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
1358 if (tcp_queue == TCP_FRAG_IN_RTX_QUEUE)
1359 list_add(&buff->tcp_tsorted_anchor, &skb->tcp_tsorted_anchor);
1360
1361 return 0;
1362}
1363
1364
1365
1366
1367static int __pskb_trim_head(struct sk_buff *skb, int len)
1368{
1369 struct skb_shared_info *shinfo;
1370 int i, k, eat;
1371
1372 eat = min_t(int, len, skb_headlen(skb));
1373 if (eat) {
1374 __skb_pull(skb, eat);
1375 len -= eat;
1376 if (!len)
1377 return 0;
1378 }
1379 eat = len;
1380 k = 0;
1381 shinfo = skb_shinfo(skb);
1382 for (i = 0; i < shinfo->nr_frags; i++) {
1383 int size = skb_frag_size(&shinfo->frags[i]);
1384
1385 if (size <= eat) {
1386 skb_frag_unref(skb, i);
1387 eat -= size;
1388 } else {
1389 shinfo->frags[k] = shinfo->frags[i];
1390 if (eat) {
1391 shinfo->frags[k].page_offset += eat;
1392 skb_frag_size_sub(&shinfo->frags[k], eat);
1393 eat = 0;
1394 }
1395 k++;
1396 }
1397 }
1398 shinfo->nr_frags = k;
1399
1400 skb->data_len -= len;
1401 skb->len = skb->data_len;
1402 return len;
1403}
1404
1405
1406int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
1407{
1408 u32 delta_truesize;
1409
1410 if (skb_unclone(skb, GFP_ATOMIC))
1411 return -ENOMEM;
1412
1413 delta_truesize = __pskb_trim_head(skb, len);
1414
1415 TCP_SKB_CB(skb)->seq += len;
1416 skb->ip_summed = CHECKSUM_PARTIAL;
1417
1418 if (delta_truesize) {
1419 skb->truesize -= delta_truesize;
1420 sk->sk_wmem_queued -= delta_truesize;
1421 sk_mem_uncharge(sk, delta_truesize);
1422 sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
1423 }
1424
1425
1426 if (tcp_skb_pcount(skb) > 1)
1427 tcp_set_skb_tso_segs(skb, tcp_skb_mss(skb));
1428
1429 return 0;
1430}
1431
1432
1433static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
1434{
1435 const struct tcp_sock *tp = tcp_sk(sk);
1436 const struct inet_connection_sock *icsk = inet_csk(sk);
1437 int mss_now;
1438
1439
1440
1441
1442 mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr);
1443
1444
1445 if (icsk->icsk_af_ops->net_frag_header_len) {
1446 const struct dst_entry *dst = __sk_dst_get(sk);
1447
1448 if (dst && dst_allfrag(dst))
1449 mss_now -= icsk->icsk_af_ops->net_frag_header_len;
1450 }
1451
1452
1453 if (mss_now > tp->rx_opt.mss_clamp)
1454 mss_now = tp->rx_opt.mss_clamp;
1455
1456
1457 mss_now -= icsk->icsk_ext_hdr_len;
1458
1459
1460 if (mss_now < 48)
1461 mss_now = 48;
1462 return mss_now;
1463}
1464
1465
1466int tcp_mtu_to_mss(struct sock *sk, int pmtu)
1467{
1468
1469 return __tcp_mtu_to_mss(sk, pmtu) -
1470 (tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr));
1471}
1472
1473
1474int tcp_mss_to_mtu(struct sock *sk, int mss)
1475{
1476 const struct tcp_sock *tp = tcp_sk(sk);
1477 const struct inet_connection_sock *icsk = inet_csk(sk);
1478 int mtu;
1479
1480 mtu = mss +
1481 tp->tcp_header_len +
1482 icsk->icsk_ext_hdr_len +
1483 icsk->icsk_af_ops->net_header_len;
1484
1485
1486 if (icsk->icsk_af_ops->net_frag_header_len) {
1487 const struct dst_entry *dst = __sk_dst_get(sk);
1488
1489 if (dst && dst_allfrag(dst))
1490 mtu += icsk->icsk_af_ops->net_frag_header_len;
1491 }
1492 return mtu;
1493}
1494EXPORT_SYMBOL(tcp_mss_to_mtu);
1495
1496
1497void tcp_mtup_init(struct sock *sk)
1498{
1499 struct tcp_sock *tp = tcp_sk(sk);
1500 struct inet_connection_sock *icsk = inet_csk(sk);
1501 struct net *net = sock_net(sk);
1502
1503 icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
1504 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
1505 icsk->icsk_af_ops->net_header_len;
1506 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
1507 icsk->icsk_mtup.probe_size = 0;
1508 if (icsk->icsk_mtup.enabled)
1509 icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
1510}
1511EXPORT_SYMBOL(tcp_mtup_init);
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
1536{
1537 struct tcp_sock *tp = tcp_sk(sk);
1538 struct inet_connection_sock *icsk = inet_csk(sk);
1539 int mss_now;
1540
1541 if (icsk->icsk_mtup.search_high > pmtu)
1542 icsk->icsk_mtup.search_high = pmtu;
1543
1544 mss_now = tcp_mtu_to_mss(sk, pmtu);
1545 mss_now = tcp_bound_to_half_wnd(tp, mss_now);
1546
1547
1548 icsk->icsk_pmtu_cookie = pmtu;
1549 if (icsk->icsk_mtup.enabled)
1550 mss_now = min(mss_now, tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low));
1551 tp->mss_cache = mss_now;
1552
1553 return mss_now;
1554}
1555EXPORT_SYMBOL(tcp_sync_mss);
1556
1557
1558
1559
1560unsigned int tcp_current_mss(struct sock *sk)
1561{
1562 const struct tcp_sock *tp = tcp_sk(sk);
1563 const struct dst_entry *dst = __sk_dst_get(sk);
1564 u32 mss_now;
1565 unsigned int header_len;
1566 struct tcp_out_options opts;
1567 struct tcp_md5sig_key *md5;
1568
1569 mss_now = tp->mss_cache;
1570
1571 if (dst) {
1572 u32 mtu = dst_mtu(dst);
1573 if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
1574 mss_now = tcp_sync_mss(sk, mtu);
1575 }
1576
1577 header_len = tcp_established_options(sk, NULL, &opts, &md5) +
1578 sizeof(struct tcphdr);
1579
1580
1581
1582
1583 if (header_len != tp->tcp_header_len) {
1584 int delta = (int) header_len - tp->tcp_header_len;
1585 mss_now -= delta;
1586 }
1587
1588 return mss_now;
1589}
1590
1591
1592
1593
1594
1595static void tcp_cwnd_application_limited(struct sock *sk)
1596{
1597 struct tcp_sock *tp = tcp_sk(sk);
1598
1599 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
1600 sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
1601
1602 u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
1603 u32 win_used = max(tp->snd_cwnd_used, init_win);
1604 if (win_used < tp->snd_cwnd) {
1605 tp->snd_ssthresh = tcp_current_ssthresh(sk);
1606 tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
1607 }
1608 tp->snd_cwnd_used = 0;
1609 }
1610 tp->snd_cwnd_stamp = tcp_jiffies32;
1611}
1612
1613static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
1614{
1615 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
1616 struct tcp_sock *tp = tcp_sk(sk);
1617
1618
1619
1620
1621 if (!before(tp->snd_una, tp->max_packets_seq) ||
1622 tp->packets_out > tp->max_packets_out) {
1623 tp->max_packets_out = tp->packets_out;
1624 tp->max_packets_seq = tp->snd_nxt;
1625 tp->is_cwnd_limited = is_cwnd_limited;
1626 }
1627
1628 if (tcp_is_cwnd_limited(sk)) {
1629
1630 tp->snd_cwnd_used = 0;
1631 tp->snd_cwnd_stamp = tcp_jiffies32;
1632 } else {
1633
1634 if (tp->packets_out > tp->snd_cwnd_used)
1635 tp->snd_cwnd_used = tp->packets_out;
1636
1637 if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
1638 (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
1639 !ca_ops->cong_control)
1640 tcp_cwnd_application_limited(sk);
1641
1642
1643
1644
1645
1646
1647
1648
1649 if (tcp_write_queue_empty(sk) && sk->sk_socket &&
1650 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) &&
1651 (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
1652 tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED);
1653 }
1654}
1655
1656
1657static bool tcp_minshall_check(const struct tcp_sock *tp)
1658{
1659 return after(tp->snd_sml, tp->snd_una) &&
1660 !after(tp->snd_sml, tp->snd_nxt);
1661}
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
1672 const struct sk_buff *skb)
1673{
1674 if (skb->len < tcp_skb_pcount(skb) * mss_now)
1675 tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
1676}
1677
1678
1679
1680
1681
1682
1683
1684
1685static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
1686 int nonagle)
1687{
1688 return partial &&
1689 ((nonagle & TCP_NAGLE_CORK) ||
1690 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1691}
1692
1693
1694
1695
1696static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
1697 int min_tso_segs)
1698{
1699 u32 bytes, segs;
1700
1701 bytes = min_t(unsigned long,
1702 sk->sk_pacing_rate >> sk->sk_pacing_shift,
1703 sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
1704
1705
1706
1707
1708
1709
1710 segs = max_t(u32, bytes / mss_now, min_tso_segs);
1711
1712 return segs;
1713}
1714
1715
1716
1717
1718static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
1719{
1720 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
1721 u32 min_tso, tso_segs;
1722
1723 min_tso = ca_ops->min_tso_segs ?
1724 ca_ops->min_tso_segs(sk) :
1725 sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
1726
1727 tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
1728 return min_t(u32, tso_segs, sk->sk_gso_max_segs);
1729}
1730
1731
1732static unsigned int tcp_mss_split_point(const struct sock *sk,
1733 const struct sk_buff *skb,
1734 unsigned int mss_now,
1735 unsigned int max_segs,
1736 int nonagle)
1737{
1738 const struct tcp_sock *tp = tcp_sk(sk);
1739 u32 partial, needed, window, max_len;
1740
1741 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1742 max_len = mss_now * max_segs;
1743
1744 if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
1745 return max_len;
1746
1747 needed = min(skb->len, window);
1748
1749 if (max_len <= needed)
1750 return max_len;
1751
1752 partial = needed % mss_now;
1753
1754
1755
1756
1757 if (tcp_nagle_check(partial != 0, tp, nonagle))
1758 return needed - partial;
1759
1760 return needed;
1761}
1762
1763
1764
1765
1766static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
1767 const struct sk_buff *skb)
1768{
1769 u32 in_flight, cwnd, halfcwnd;
1770
1771
1772 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
1773 tcp_skb_pcount(skb) == 1)
1774 return 1;
1775
1776 in_flight = tcp_packets_in_flight(tp);
1777 cwnd = tp->snd_cwnd;
1778 if (in_flight >= cwnd)
1779 return 0;
1780
1781
1782
1783
1784 halfcwnd = max(cwnd >> 1, 1U);
1785 return min(halfcwnd, cwnd - in_flight);
1786}
1787
1788
1789
1790
1791
1792static int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now)
1793{
1794 int tso_segs = tcp_skb_pcount(skb);
1795
1796 if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
1797 tcp_set_skb_tso_segs(skb, mss_now);
1798 tso_segs = tcp_skb_pcount(skb);
1799 }
1800 return tso_segs;
1801}
1802
1803
1804
1805
1806
1807static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
1808 unsigned int cur_mss, int nonagle)
1809{
1810
1811
1812
1813
1814
1815
1816 if (nonagle & TCP_NAGLE_PUSH)
1817 return true;
1818
1819
1820 if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
1821 return true;
1822
1823 if (!tcp_nagle_check(skb->len < cur_mss, tp, nonagle))
1824 return true;
1825
1826 return false;
1827}
1828
1829
1830static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
1831 const struct sk_buff *skb,
1832 unsigned int cur_mss)
1833{
1834 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
1835
1836 if (skb->len > cur_mss)
1837 end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
1838
1839 return !after(end_seq, tcp_wnd_end(tp));
1840}
1841
1842
1843
1844
1845
1846
1847
1848
1849static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1850 unsigned int mss_now, gfp_t gfp)
1851{
1852 int nlen = skb->len - len;
1853 struct sk_buff *buff;
1854 u8 flags;
1855
1856
1857 if (skb->len != skb->data_len)
1858 return tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
1859 skb, len, mss_now, gfp);
1860
1861 buff = sk_stream_alloc_skb(sk, 0, gfp, true);
1862 if (unlikely(!buff))
1863 return -ENOMEM;
1864
1865 sk->sk_wmem_queued += buff->truesize;
1866 sk_mem_charge(sk, buff->truesize);
1867 buff->truesize += nlen;
1868 skb->truesize -= nlen;
1869
1870
1871 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1872 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1873 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1874
1875
1876 flags = TCP_SKB_CB(skb)->tcp_flags;
1877 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1878 TCP_SKB_CB(buff)->tcp_flags = flags;
1879
1880
1881 TCP_SKB_CB(buff)->sacked = 0;
1882
1883 tcp_skb_fragment_eor(skb, buff);
1884
1885 buff->ip_summed = CHECKSUM_PARTIAL;
1886 skb_split(skb, buff, len);
1887 tcp_fragment_tstamp(skb, buff);
1888
1889
1890 tcp_set_skb_tso_segs(skb, mss_now);
1891 tcp_set_skb_tso_segs(buff, mss_now);
1892
1893
1894 __skb_header_release(buff);
1895 tcp_insert_write_queue_after(skb, buff, sk, TCP_FRAG_IN_WRITE_QUEUE);
1896
1897 return 0;
1898}
1899
1900
1901
1902
1903
1904
1905static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1906 bool *is_cwnd_limited,
1907 bool *is_rwnd_limited,
1908 u32 max_segs)
1909{
1910 const struct inet_connection_sock *icsk = inet_csk(sk);
1911 u32 send_win, cong_win, limit, in_flight;
1912 struct tcp_sock *tp = tcp_sk(sk);
1913 struct sk_buff *head;
1914 int win_divisor;
1915 s64 delta;
1916
1917 if (icsk->icsk_ca_state >= TCP_CA_Recovery)
1918 goto send_now;
1919
1920
1921
1922
1923
1924
1925 delta = tp->tcp_clock_cache - tp->tcp_wstamp_ns - NSEC_PER_MSEC;
1926 if (delta > 0)
1927 goto send_now;
1928
1929 in_flight = tcp_packets_in_flight(tp);
1930
1931 BUG_ON(tcp_skb_pcount(skb) <= 1);
1932 BUG_ON(tp->snd_cwnd <= in_flight);
1933
1934 send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1935
1936
1937 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
1938
1939 limit = min(send_win, cong_win);
1940
1941
1942 if (limit >= max_segs * tp->mss_cache)
1943 goto send_now;
1944
1945
1946 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
1947 goto send_now;
1948
1949 win_divisor = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor);
1950 if (win_divisor) {
1951 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
1952
1953
1954
1955
1956 chunk /= win_divisor;
1957 if (limit >= chunk)
1958 goto send_now;
1959 } else {
1960
1961
1962
1963
1964
1965 if (limit > tcp_max_tso_deferred_mss(tp) * tp->mss_cache)
1966 goto send_now;
1967 }
1968
1969
1970 head = tcp_rtx_queue_head(sk);
1971 if (!head)
1972 goto send_now;
1973 delta = tp->tcp_clock_cache - head->tstamp;
1974
1975 if ((s64)(delta - (u64)NSEC_PER_USEC * (tp->srtt_us >> 4)) < 0)
1976 goto send_now;
1977
1978
1979
1980
1981
1982
1983
1984 if (cong_win < send_win) {
1985 if (cong_win <= skb->len) {
1986 *is_cwnd_limited = true;
1987 return true;
1988 }
1989 } else {
1990 if (send_win <= skb->len) {
1991 *is_rwnd_limited = true;
1992 return true;
1993 }
1994 }
1995
1996
1997 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) ||
1998 TCP_SKB_CB(skb)->eor)
1999 goto send_now;
2000
2001 return true;
2002
2003send_now:
2004 return false;
2005}
2006
2007static inline void tcp_mtu_check_reprobe(struct sock *sk)
2008{
2009 struct inet_connection_sock *icsk = inet_csk(sk);
2010 struct tcp_sock *tp = tcp_sk(sk);
2011 struct net *net = sock_net(sk);
2012 u32 interval;
2013 s32 delta;
2014
2015 interval = net->ipv4.sysctl_tcp_probe_interval;
2016 delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp;
2017 if (unlikely(delta >= interval * HZ)) {
2018 int mss = tcp_current_mss(sk);
2019
2020
2021 icsk->icsk_mtup.probe_size = 0;
2022 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp +
2023 sizeof(struct tcphdr) +
2024 icsk->icsk_af_ops->net_header_len;
2025 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
2026
2027
2028 icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
2029 }
2030}
2031
2032static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
2033{
2034 struct sk_buff *skb, *next;
2035
2036 skb = tcp_send_head(sk);
2037 tcp_for_write_queue_from_safe(skb, next, sk) {
2038 if (len <= skb->len)
2039 break;
2040
2041 if (unlikely(TCP_SKB_CB(skb)->eor))
2042 return false;
2043
2044 len -= skb->len;
2045 }
2046
2047 return true;
2048}
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059static int tcp_mtu_probe(struct sock *sk)
2060{
2061 struct inet_connection_sock *icsk = inet_csk(sk);
2062 struct tcp_sock *tp = tcp_sk(sk);
2063 struct sk_buff *skb, *nskb, *next;
2064 struct net *net = sock_net(sk);
2065 int probe_size;
2066 int size_needed;
2067 int copy, len;
2068 int mss_now;
2069 int interval;
2070
2071
2072
2073
2074
2075
2076 if (likely(!icsk->icsk_mtup.enabled ||
2077 icsk->icsk_mtup.probe_size ||
2078 inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
2079 tp->snd_cwnd < 11 ||
2080 tp->rx_opt.num_sacks || tp->rx_opt.dsack))
2081 return -1;
2082
2083
2084
2085
2086
2087 mss_now = tcp_current_mss(sk);
2088 probe_size = tcp_mtu_to_mss(sk, (icsk->icsk_mtup.search_high +
2089 icsk->icsk_mtup.search_low) >> 1);
2090 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
2091 interval = icsk->icsk_mtup.search_high - icsk->icsk_mtup.search_low;
2092
2093
2094
2095
2096 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
2097 interval < net->ipv4.sysctl_tcp_probe_threshold) {
2098
2099
2100
2101 tcp_mtu_check_reprobe(sk);
2102 return -1;
2103 }
2104
2105
2106 if (tp->write_seq - tp->snd_nxt < size_needed)
2107 return -1;
2108
2109 if (tp->snd_wnd < size_needed)
2110 return -1;
2111 if (after(tp->snd_nxt + size_needed, tcp_wnd_end(tp)))
2112 return 0;
2113
2114
2115 if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
2116 if (!tcp_packets_in_flight(tp))
2117 return -1;
2118 else
2119 return 0;
2120 }
2121
2122 if (!tcp_can_coalesce_send_queue_head(sk, probe_size))
2123 return -1;
2124
2125
2126 nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
2127 if (!nskb)
2128 return -1;
2129 sk->sk_wmem_queued += nskb->truesize;
2130 sk_mem_charge(sk, nskb->truesize);
2131
2132 skb = tcp_send_head(sk);
2133
2134 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
2135 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
2136 TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
2137 TCP_SKB_CB(nskb)->sacked = 0;
2138 nskb->csum = 0;
2139 nskb->ip_summed = CHECKSUM_PARTIAL;
2140
2141 tcp_insert_write_queue_before(nskb, skb, sk);
2142 tcp_highest_sack_replace(sk, skb, nskb);
2143
2144 len = 0;
2145 tcp_for_write_queue_from_safe(skb, next, sk) {
2146 copy = min_t(int, skb->len, probe_size - len);
2147 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
2148
2149 if (skb->len <= copy) {
2150
2151
2152 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
2153
2154
2155
2156 TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor;
2157 tcp_unlink_write_queue(skb, sk);
2158 sk_wmem_free_skb(sk, skb);
2159 } else {
2160 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
2161 ~(TCPHDR_FIN|TCPHDR_PSH);
2162 if (!skb_shinfo(skb)->nr_frags) {
2163 skb_pull(skb, copy);
2164 } else {
2165 __pskb_trim_head(skb, copy);
2166 tcp_set_skb_tso_segs(skb, mss_now);
2167 }
2168 TCP_SKB_CB(skb)->seq += copy;
2169 }
2170
2171 len += copy;
2172
2173 if (len >= probe_size)
2174 break;
2175 }
2176 tcp_init_tso_segs(nskb, nskb->len);
2177
2178
2179
2180
2181 if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
2182
2183
2184 tp->snd_cwnd--;
2185 tcp_event_new_data_sent(sk, nskb);
2186
2187 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
2188 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
2189 tp->mtu_probe.probe_seq_end = TCP_SKB_CB(nskb)->end_seq;
2190
2191 return 1;
2192 }
2193
2194 return -1;
2195}
2196
2197static bool tcp_pacing_check(struct sock *sk)
2198{
2199 struct tcp_sock *tp = tcp_sk(sk);
2200
2201 if (!tcp_needs_internal_pacing(sk))
2202 return false;
2203
2204 if (tp->tcp_wstamp_ns <= tp->tcp_clock_cache)
2205 return false;
2206
2207 if (!hrtimer_is_queued(&tp->pacing_timer)) {
2208 hrtimer_start(&tp->pacing_timer,
2209 ns_to_ktime(tp->tcp_wstamp_ns),
2210 HRTIMER_MODE_ABS_PINNED_SOFT);
2211 sock_hold(sk);
2212 }
2213 return true;
2214}
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
2228 unsigned int factor)
2229{
2230 unsigned long limit;
2231
2232 limit = max_t(unsigned long,
2233 2 * skb->truesize,
2234 sk->sk_pacing_rate >> sk->sk_pacing_shift);
2235 if (sk->sk_pacing_status == SK_PACING_NONE)
2236 limit = min_t(unsigned long, limit,
2237 sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
2238 limit <<= factor;
2239
2240 if (refcount_read(&sk->sk_wmem_alloc) > limit) {
2241
2242
2243
2244
2245
2246 if (tcp_rtx_queue_empty(sk))
2247 return false;
2248
2249 set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
2250
2251
2252
2253
2254 smp_mb__after_atomic();
2255 if (refcount_read(&sk->sk_wmem_alloc) > limit)
2256 return true;
2257 }
2258 return false;
2259}
2260
2261static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new)
2262{
2263 const u32 now = tcp_jiffies32;
2264 enum tcp_chrono old = tp->chrono_type;
2265
2266 if (old > TCP_CHRONO_UNSPEC)
2267 tp->chrono_stat[old - 1] += now - tp->chrono_start;
2268 tp->chrono_start = now;
2269 tp->chrono_type = new;
2270}
2271
2272void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type)
2273{
2274 struct tcp_sock *tp = tcp_sk(sk);
2275
2276
2277
2278
2279
2280
2281 if (type > tp->chrono_type)
2282 tcp_chrono_set(tp, type);
2283}
2284
2285void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type)
2286{
2287 struct tcp_sock *tp = tcp_sk(sk);
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297 if (tcp_rtx_and_write_queues_empty(sk))
2298 tcp_chrono_set(tp, TCP_CHRONO_UNSPEC);
2299 else if (type == tp->chrono_type)
2300 tcp_chrono_set(tp, TCP_CHRONO_BUSY);
2301}
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2318 int push_one, gfp_t gfp)
2319{
2320 struct tcp_sock *tp = tcp_sk(sk);
2321 struct sk_buff *skb;
2322 unsigned int tso_segs, sent_pkts;
2323 int cwnd_quota;
2324 int result;
2325 bool is_cwnd_limited = false, is_rwnd_limited = false;
2326 u32 max_segs;
2327
2328 sent_pkts = 0;
2329
2330 tcp_mstamp_refresh(tp);
2331 if (!push_one) {
2332
2333 result = tcp_mtu_probe(sk);
2334 if (!result) {
2335 return false;
2336 } else if (result > 0) {
2337 sent_pkts = 1;
2338 }
2339 }
2340
2341 max_segs = tcp_tso_segs(sk, mss_now);
2342 while ((skb = tcp_send_head(sk))) {
2343 unsigned int limit;
2344
2345 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
2346
2347 skb->skb_mstamp_ns = tp->tcp_wstamp_ns = tp->tcp_clock_cache;
2348 list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
2349 tcp_init_tso_segs(skb, mss_now);
2350 goto repair;
2351 }
2352
2353 if (tcp_pacing_check(sk))
2354 break;
2355
2356 tso_segs = tcp_init_tso_segs(skb, mss_now);
2357 BUG_ON(!tso_segs);
2358
2359 cwnd_quota = tcp_cwnd_test(tp, skb);
2360 if (!cwnd_quota) {
2361 if (push_one == 2)
2362
2363 cwnd_quota = 1;
2364 else
2365 break;
2366 }
2367
2368 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) {
2369 is_rwnd_limited = true;
2370 break;
2371 }
2372
2373 if (tso_segs == 1) {
2374 if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
2375 (tcp_skb_is_last(sk, skb) ?
2376 nonagle : TCP_NAGLE_PUSH))))
2377 break;
2378 } else {
2379 if (!push_one &&
2380 tcp_tso_should_defer(sk, skb, &is_cwnd_limited,
2381 &is_rwnd_limited, max_segs))
2382 break;
2383 }
2384
2385 limit = mss_now;
2386 if (tso_segs > 1 && !tcp_urg_mode(tp))
2387 limit = tcp_mss_split_point(sk, skb, mss_now,
2388 min_t(unsigned int,
2389 cwnd_quota,
2390 max_segs),
2391 nonagle);
2392
2393 if (skb->len > limit &&
2394 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
2395 break;
2396
2397 if (tcp_small_queue_check(sk, skb, 0))
2398 break;
2399
2400 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
2401 break;
2402
2403repair:
2404
2405
2406
2407 tcp_event_new_data_sent(sk, skb);
2408
2409 tcp_minshall_update(tp, mss_now, skb);
2410 sent_pkts += tcp_skb_pcount(skb);
2411
2412 if (push_one)
2413 break;
2414 }
2415
2416 if (is_rwnd_limited)
2417 tcp_chrono_start(sk, TCP_CHRONO_RWND_LIMITED);
2418 else
2419 tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED);
2420
2421 if (likely(sent_pkts)) {
2422 if (tcp_in_cwnd_reduction(sk))
2423 tp->prr_out += sent_pkts;
2424
2425
2426 if (push_one != 2)
2427 tcp_schedule_loss_probe(sk, false);
2428 is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
2429 tcp_cwnd_validate(sk, is_cwnd_limited);
2430 return false;
2431 }
2432 return !tp->packets_out && !tcp_write_queue_empty(sk);
2433}
2434
2435bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
2436{
2437 struct inet_connection_sock *icsk = inet_csk(sk);
2438 struct tcp_sock *tp = tcp_sk(sk);
2439 u32 timeout, rto_delta_us;
2440 int early_retrans;
2441
2442
2443
2444
2445 if (tp->fastopen_rsk)
2446 return false;
2447
2448 early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
2449
2450
2451
2452 if ((early_retrans != 3 && early_retrans != 4) ||
2453 !tp->packets_out || !tcp_is_sack(tp) ||
2454 (icsk->icsk_ca_state != TCP_CA_Open &&
2455 icsk->icsk_ca_state != TCP_CA_CWR))
2456 return false;
2457
2458
2459
2460
2461
2462 if (tp->srtt_us) {
2463 timeout = usecs_to_jiffies(tp->srtt_us >> 2);
2464 if (tp->packets_out == 1)
2465 timeout += TCP_RTO_MIN;
2466 else
2467 timeout += TCP_TIMEOUT_MIN;
2468 } else {
2469 timeout = TCP_TIMEOUT_INIT;
2470 }
2471
2472
2473 rto_delta_us = advancing_rto ?
2474 jiffies_to_usecs(inet_csk(sk)->icsk_rto) :
2475 tcp_rto_delta_us(sk);
2476 if (rto_delta_us > 0)
2477 timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us));
2478
2479 tcp_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
2480 TCP_RTO_MAX, NULL);
2481 return true;
2482}
2483
2484
2485
2486
2487
2488static bool skb_still_in_host_queue(const struct sock *sk,
2489 const struct sk_buff *skb)
2490{
2491 if (unlikely(skb_fclone_busy(sk, skb))) {
2492 NET_INC_STATS(sock_net(sk),
2493 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
2494 return true;
2495 }
2496 return false;
2497}
2498
2499
2500
2501
2502void tcp_send_loss_probe(struct sock *sk)
2503{
2504 struct tcp_sock *tp = tcp_sk(sk);
2505 struct sk_buff *skb;
2506 int pcount;
2507 int mss = tcp_current_mss(sk);
2508
2509 skb = tcp_send_head(sk);
2510 if (skb && tcp_snd_wnd_test(tp, skb, mss)) {
2511 pcount = tp->packets_out;
2512 tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
2513 if (tp->packets_out > pcount)
2514 goto probe_sent;
2515 goto rearm_timer;
2516 }
2517 skb = skb_rb_last(&sk->tcp_rtx_queue);
2518 if (unlikely(!skb)) {
2519 WARN_ONCE(tp->packets_out,
2520 "invalid inflight: %u state %u cwnd %u mss %d\n",
2521 tp->packets_out, sk->sk_state, tp->snd_cwnd, mss);
2522 inet_csk(sk)->icsk_pending = 0;
2523 return;
2524 }
2525
2526
2527 if (tp->tlp_high_seq)
2528 goto rearm_timer;
2529
2530 if (skb_still_in_host_queue(sk, skb))
2531 goto rearm_timer;
2532
2533 pcount = tcp_skb_pcount(skb);
2534 if (WARN_ON(!pcount))
2535 goto rearm_timer;
2536
2537 if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
2538 if (unlikely(tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
2539 (pcount - 1) * mss, mss,
2540 GFP_ATOMIC)))
2541 goto rearm_timer;
2542 skb = skb_rb_next(skb);
2543 }
2544
2545 if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
2546 goto rearm_timer;
2547
2548 if (__tcp_retransmit_skb(sk, skb, 1))
2549 goto rearm_timer;
2550
2551
2552 tp->tlp_high_seq = tp->snd_nxt;
2553
2554probe_sent:
2555 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES);
2556
2557 inet_csk(sk)->icsk_pending = 0;
2558rearm_timer:
2559 tcp_rearm_rto(sk);
2560}
2561
2562
2563
2564
2565
2566void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
2567 int nonagle)
2568{
2569
2570
2571
2572
2573 if (unlikely(sk->sk_state == TCP_CLOSE))
2574 return;
2575
2576 if (tcp_write_xmit(sk, cur_mss, nonagle, 0,
2577 sk_gfp_mask(sk, GFP_ATOMIC)))
2578 tcp_check_probe_timer(sk);
2579}
2580
2581
2582
2583
2584void tcp_push_one(struct sock *sk, unsigned int mss_now)
2585{
2586 struct sk_buff *skb = tcp_send_head(sk);
2587
2588 BUG_ON(!skb || skb->len < mss_now);
2589
2590 tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation);
2591}
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645u32 __tcp_select_window(struct sock *sk)
2646{
2647 struct inet_connection_sock *icsk = inet_csk(sk);
2648 struct tcp_sock *tp = tcp_sk(sk);
2649
2650
2651
2652
2653
2654
2655 int mss = icsk->icsk_ack.rcv_mss;
2656 int free_space = tcp_space(sk);
2657 int allowed_space = tcp_full_space(sk);
2658 int full_space = min_t(int, tp->window_clamp, allowed_space);
2659 int window;
2660
2661 if (unlikely(mss > full_space)) {
2662 mss = full_space;
2663 if (mss <= 0)
2664 return 0;
2665 }
2666 if (free_space < (full_space >> 1)) {
2667 icsk->icsk_ack.quick = 0;
2668
2669 if (tcp_under_memory_pressure(sk))
2670 tp->rcv_ssthresh = min(tp->rcv_ssthresh,
2671 4U * tp->advmss);
2672
2673
2674
2675
2676 free_space = round_down(free_space, 1 << tp->rx_opt.rcv_wscale);
2677
2678
2679
2680
2681
2682
2683
2684
2685 if (free_space < (allowed_space >> 4) || free_space < mss)
2686 return 0;
2687 }
2688
2689 if (free_space > tp->rcv_ssthresh)
2690 free_space = tp->rcv_ssthresh;
2691
2692
2693
2694
2695 if (tp->rx_opt.rcv_wscale) {
2696 window = free_space;
2697
2698
2699
2700
2701
2702 window = ALIGN(window, (1 << tp->rx_opt.rcv_wscale));
2703 } else {
2704 window = tp->rcv_wnd;
2705
2706
2707
2708
2709
2710
2711
2712
2713 if (window <= free_space - mss || window > free_space)
2714 window = rounddown(free_space, mss);
2715 else if (mss == full_space &&
2716 free_space > window + (full_space >> 1))
2717 window = free_space;
2718 }
2719
2720 return window;
2721}
2722
2723void tcp_skb_collapse_tstamp(struct sk_buff *skb,
2724 const struct sk_buff *next_skb)
2725{
2726 if (unlikely(tcp_has_tx_tstamp(next_skb))) {
2727 const struct skb_shared_info *next_shinfo =
2728 skb_shinfo(next_skb);
2729 struct skb_shared_info *shinfo = skb_shinfo(skb);
2730
2731 shinfo->tx_flags |= next_shinfo->tx_flags & SKBTX_ANY_TSTAMP;
2732 shinfo->tskey = next_shinfo->tskey;
2733 TCP_SKB_CB(skb)->txstamp_ack |=
2734 TCP_SKB_CB(next_skb)->txstamp_ack;
2735 }
2736}
2737
2738
2739static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2740{
2741 struct tcp_sock *tp = tcp_sk(sk);
2742 struct sk_buff *next_skb = skb_rb_next(skb);
2743 int next_skb_size;
2744
2745 next_skb_size = next_skb->len;
2746
2747 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
2748
2749 if (next_skb_size) {
2750 if (next_skb_size <= skb_availroom(skb))
2751 skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
2752 next_skb_size);
2753 else if (!skb_shift(skb, next_skb, next_skb_size))
2754 return false;
2755 }
2756 tcp_highest_sack_replace(sk, next_skb, skb);
2757
2758
2759 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
2760
2761
2762 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags;
2763
2764
2765
2766
2767 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
2768 TCP_SKB_CB(skb)->eor = TCP_SKB_CB(next_skb)->eor;
2769
2770
2771 tcp_clear_retrans_hints_partial(tp);
2772 if (next_skb == tp->retransmit_skb_hint)
2773 tp->retransmit_skb_hint = skb;
2774
2775 tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb));
2776
2777 tcp_skb_collapse_tstamp(skb, next_skb);
2778
2779 tcp_rtx_queue_unlink_and_free(next_skb, sk);
2780 return true;
2781}
2782
2783
2784static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
2785{
2786 if (tcp_skb_pcount(skb) > 1)
2787 return false;
2788 if (skb_cloned(skb))
2789 return false;
2790
2791 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2792 return false;
2793
2794 return true;
2795}
2796
2797
2798
2799
2800static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2801 int space)
2802{
2803 struct tcp_sock *tp = tcp_sk(sk);
2804 struct sk_buff *skb = to, *tmp;
2805 bool first = true;
2806
2807 if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
2808 return;
2809 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2810 return;
2811
2812 skb_rbtree_walk_from_safe(skb, tmp) {
2813 if (!tcp_can_collapse(sk, skb))
2814 break;
2815
2816 if (!tcp_skb_can_collapse_to(to))
2817 break;
2818
2819 space -= skb->len;
2820
2821 if (first) {
2822 first = false;
2823 continue;
2824 }
2825
2826 if (space < 0)
2827 break;
2828
2829 if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
2830 break;
2831
2832 if (!tcp_collapse_retrans(sk, to))
2833 break;
2834 }
2835}
2836
2837
2838
2839
2840
2841int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2842{
2843 struct inet_connection_sock *icsk = inet_csk(sk);
2844 struct tcp_sock *tp = tcp_sk(sk);
2845 unsigned int cur_mss;
2846 int diff, len, err;
2847
2848
2849
2850 if (icsk->icsk_mtup.probe_size)
2851 icsk->icsk_mtup.probe_size = 0;
2852
2853
2854
2855
2856 if (refcount_read(&sk->sk_wmem_alloc) >
2857 min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2),
2858 sk->sk_sndbuf))
2859 return -EAGAIN;
2860
2861 if (skb_still_in_host_queue(sk, skb))
2862 return -EBUSY;
2863
2864 if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
2865 if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) {
2866 WARN_ON_ONCE(1);
2867 return -EINVAL;
2868 }
2869 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
2870 return -ENOMEM;
2871 }
2872
2873 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
2874 return -EHOSTUNREACH;
2875
2876 cur_mss = tcp_current_mss(sk);
2877
2878
2879
2880
2881
2882
2883 if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) &&
2884 TCP_SKB_CB(skb)->seq != tp->snd_una)
2885 return -EAGAIN;
2886
2887 len = cur_mss * segs;
2888 if (skb->len > len) {
2889 if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len,
2890 cur_mss, GFP_ATOMIC))
2891 return -ENOMEM;
2892 } else {
2893 if (skb_unclone(skb, GFP_ATOMIC))
2894 return -ENOMEM;
2895
2896 diff = tcp_skb_pcount(skb);
2897 tcp_set_skb_tso_segs(skb, cur_mss);
2898 diff -= tcp_skb_pcount(skb);
2899 if (diff)
2900 tcp_adjust_pcount(sk, skb, diff);
2901 if (skb->len < cur_mss)
2902 tcp_retrans_try_collapse(sk, skb, cur_mss);
2903 }
2904
2905
2906 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN_ECN) == TCPHDR_SYN_ECN)
2907 tcp_ecn_clear_syn(sk, skb);
2908
2909
2910 segs = tcp_skb_pcount(skb);
2911 TCP_ADD_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS, segs);
2912 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2913 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
2914 tp->total_retrans += segs;
2915 tp->bytes_retrans += skb->len;
2916
2917
2918
2919
2920
2921 if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) ||
2922 skb_headroom(skb) >= 0xFFFF)) {
2923 struct sk_buff *nskb;
2924
2925 tcp_skb_tsorted_save(skb) {
2926 nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
2927 err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
2928 -ENOBUFS;
2929 } tcp_skb_tsorted_restore(skb);
2930
2931 if (!err) {
2932 tcp_update_skb_after_send(sk, skb, tp->tcp_wstamp_ns);
2933 tcp_rate_skb_sent(sk, skb);
2934 }
2935 } else {
2936 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2937 }
2938
2939
2940
2941
2942 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
2943
2944 if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG))
2945 tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB,
2946 TCP_SKB_CB(skb)->seq, segs, err);
2947
2948 if (likely(!err)) {
2949 trace_tcp_retransmit_skb(sk, skb);
2950 } else if (err != -EBUSY) {
2951 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs);
2952 }
2953 return err;
2954}
2955
2956int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2957{
2958 struct tcp_sock *tp = tcp_sk(sk);
2959 int err = __tcp_retransmit_skb(sk, skb, segs);
2960
2961 if (err == 0) {
2962#if FASTRETRANS_DEBUG > 0
2963 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2964 net_dbg_ratelimited("retrans_out leaked\n");
2965 }
2966#endif
2967 TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
2968 tp->retrans_out += tcp_skb_pcount(skb);
2969 }
2970
2971
2972 if (!tp->retrans_stamp)
2973 tp->retrans_stamp = tcp_skb_timestamp(skb);
2974
2975 if (tp->undo_retrans < 0)
2976 tp->undo_retrans = 0;
2977 tp->undo_retrans += tcp_skb_pcount(skb);
2978 return err;
2979}
2980
2981
2982
2983
2984
2985
2986void tcp_xmit_retransmit_queue(struct sock *sk)
2987{
2988 const struct inet_connection_sock *icsk = inet_csk(sk);
2989 struct sk_buff *skb, *rtx_head, *hole = NULL;
2990 struct tcp_sock *tp = tcp_sk(sk);
2991 u32 max_segs;
2992 int mib_idx;
2993
2994 if (!tp->packets_out)
2995 return;
2996
2997 rtx_head = tcp_rtx_queue_head(sk);
2998 skb = tp->retransmit_skb_hint ?: rtx_head;
2999 max_segs = tcp_tso_segs(sk, tcp_current_mss(sk));
3000 skb_rbtree_walk_from(skb) {
3001 __u8 sacked;
3002 int segs;
3003
3004 if (tcp_pacing_check(sk))
3005 break;
3006
3007
3008 if (!hole)
3009 tp->retransmit_skb_hint = skb;
3010
3011 segs = tp->snd_cwnd - tcp_packets_in_flight(tp);
3012 if (segs <= 0)
3013 return;
3014 sacked = TCP_SKB_CB(skb)->sacked;
3015
3016
3017
3018 segs = min_t(int, segs, max_segs);
3019
3020 if (tp->retrans_out >= tp->lost_out) {
3021 break;
3022 } else if (!(sacked & TCPCB_LOST)) {
3023 if (!hole && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
3024 hole = skb;
3025 continue;
3026
3027 } else {
3028 if (icsk->icsk_ca_state != TCP_CA_Loss)
3029 mib_idx = LINUX_MIB_TCPFASTRETRANS;
3030 else
3031 mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
3032 }
3033
3034 if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
3035 continue;
3036
3037 if (tcp_small_queue_check(sk, skb, 1))
3038 return;
3039
3040 if (tcp_retransmit_skb(sk, skb, segs))
3041 return;
3042
3043 NET_ADD_STATS(sock_net(sk), mib_idx, tcp_skb_pcount(skb));
3044
3045 if (tcp_in_cwnd_reduction(sk))
3046 tp->prr_out += tcp_skb_pcount(skb);
3047
3048 if (skb == rtx_head &&
3049 icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT)
3050 tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3051 inet_csk(sk)->icsk_rto,
3052 TCP_RTO_MAX,
3053 skb);
3054 }
3055}
3056
3057
3058
3059
3060
3061
3062
3063
3064void sk_forced_mem_schedule(struct sock *sk, int size)
3065{
3066 int amt;
3067
3068 if (size <= sk->sk_forward_alloc)
3069 return;
3070 amt = sk_mem_pages(size);
3071 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
3072 sk_memory_allocated_add(sk, amt);
3073
3074 if (mem_cgroup_sockets_enabled && sk->sk_memcg)
3075 mem_cgroup_charge_skmem(sk->sk_memcg, amt);
3076}
3077
3078
3079
3080
3081void tcp_send_fin(struct sock *sk)
3082{
3083 struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk);
3084 struct tcp_sock *tp = tcp_sk(sk);
3085
3086
3087
3088
3089
3090
3091 if (!tskb && tcp_under_memory_pressure(sk))
3092 tskb = skb_rb_last(&sk->tcp_rtx_queue);
3093
3094 if (tskb) {
3095coalesce:
3096 TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
3097 TCP_SKB_CB(tskb)->end_seq++;
3098 tp->write_seq++;
3099 if (tcp_write_queue_empty(sk)) {
3100
3101
3102
3103
3104
3105
3106 tp->snd_nxt++;
3107 return;
3108 }
3109 } else {
3110 skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation);
3111 if (unlikely(!skb)) {
3112 if (tskb)
3113 goto coalesce;
3114 return;
3115 }
3116 INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
3117 skb_reserve(skb, MAX_TCP_HEADER);
3118 sk_forced_mem_schedule(sk, skb->truesize);
3119
3120 tcp_init_nondata_skb(skb, tp->write_seq,
3121 TCPHDR_ACK | TCPHDR_FIN);
3122 tcp_queue_skb(sk, skb);
3123 }
3124 __tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF);
3125}
3126
3127
3128
3129
3130
3131
3132void tcp_send_active_reset(struct sock *sk, gfp_t priority)
3133{
3134 struct sk_buff *skb;
3135
3136 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
3137
3138
3139 skb = alloc_skb(MAX_TCP_HEADER, priority);
3140 if (!skb) {
3141 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
3142 return;
3143 }
3144
3145
3146 skb_reserve(skb, MAX_TCP_HEADER);
3147 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
3148 TCPHDR_ACK | TCPHDR_RST);
3149 tcp_mstamp_refresh(tcp_sk(sk));
3150
3151 if (tcp_transmit_skb(sk, skb, 0, priority))
3152 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
3153
3154
3155
3156
3157 trace_tcp_send_reset(sk, NULL);
3158}
3159
3160
3161
3162
3163
3164
3165
3166int tcp_send_synack(struct sock *sk)
3167{
3168 struct sk_buff *skb;
3169
3170 skb = tcp_rtx_queue_head(sk);
3171 if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
3172 pr_err("%s: wrong queue state\n", __func__);
3173 return -EFAULT;
3174 }
3175 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
3176 if (skb_cloned(skb)) {
3177 struct sk_buff *nskb;
3178
3179 tcp_skb_tsorted_save(skb) {
3180 nskb = skb_copy(skb, GFP_ATOMIC);
3181 } tcp_skb_tsorted_restore(skb);
3182 if (!nskb)
3183 return -ENOMEM;
3184 INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor);
3185 tcp_rtx_queue_unlink_and_free(skb, sk);
3186 __skb_header_release(nskb);
3187 tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb);
3188 sk->sk_wmem_queued += nskb->truesize;
3189 sk_mem_charge(sk, nskb->truesize);
3190 skb = nskb;
3191 }
3192
3193 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK;
3194 tcp_ecn_send_synack(sk, skb);
3195 }
3196 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
3197}
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
3209 struct request_sock *req,
3210 struct tcp_fastopen_cookie *foc,
3211 enum tcp_synack_type synack_type)
3212{
3213 struct inet_request_sock *ireq = inet_rsk(req);
3214 const struct tcp_sock *tp = tcp_sk(sk);
3215 struct tcp_md5sig_key *md5 = NULL;
3216 struct tcp_out_options opts;
3217 struct sk_buff *skb;
3218 int tcp_header_size;
3219 struct tcphdr *th;
3220 int mss;
3221
3222 skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
3223 if (unlikely(!skb)) {
3224 dst_release(dst);
3225 return NULL;
3226 }
3227
3228 skb_reserve(skb, MAX_TCP_HEADER);
3229
3230 switch (synack_type) {
3231 case TCP_SYNACK_NORMAL:
3232 skb_set_owner_w(skb, req_to_sk(req));
3233 break;
3234 case TCP_SYNACK_COOKIE:
3235
3236
3237
3238 break;
3239 case TCP_SYNACK_FASTOPEN:
3240
3241
3242
3243
3244 skb_set_owner_w(skb, (struct sock *)sk);
3245 break;
3246 }
3247 skb_dst_set(skb, dst);
3248
3249 mss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
3250
3251 memset(&opts, 0, sizeof(opts));
3252#ifdef CONFIG_SYN_COOKIES
3253 if (unlikely(req->cookie_ts))
3254 skb->skb_mstamp_ns = cookie_init_timestamp(req);
3255 else
3256#endif
3257 skb->skb_mstamp_ns = tcp_clock_ns();
3258
3259#ifdef CONFIG_TCP_MD5SIG
3260 rcu_read_lock();
3261 md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
3262#endif
3263 skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
3264 tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
3265 foc) + sizeof(*th);
3266
3267 skb_push(skb, tcp_header_size);
3268 skb_reset_transport_header(skb);
3269
3270 th = (struct tcphdr *)skb->data;
3271 memset(th, 0, sizeof(struct tcphdr));
3272 th->syn = 1;
3273 th->ack = 1;
3274 tcp_ecn_make_synack(req, th);
3275 th->source = htons(ireq->ir_num);
3276 th->dest = ireq->ir_rmt_port;
3277 skb->mark = ireq->ir_mark;
3278 skb->ip_summed = CHECKSUM_PARTIAL;
3279 th->seq = htonl(tcp_rsk(req)->snt_isn);
3280
3281 th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
3282
3283
3284 th->window = htons(min(req->rsk_rcv_wnd, 65535U));
3285 tcp_options_write((__be32 *)(th + 1), NULL, &opts);
3286 th->doff = (tcp_header_size >> 2);
3287 __TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
3288
3289#ifdef CONFIG_TCP_MD5SIG
3290
3291 if (md5)
3292 tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
3293 md5, req_to_sk(req), skb);
3294 rcu_read_unlock();
3295#endif
3296
3297
3298 skb->tstamp = 0;
3299 return skb;
3300}
3301EXPORT_SYMBOL(tcp_make_synack);
3302
3303static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst)
3304{
3305 struct inet_connection_sock *icsk = inet_csk(sk);
3306 const struct tcp_congestion_ops *ca;
3307 u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
3308
3309 if (ca_key == TCP_CA_UNSPEC)
3310 return;
3311
3312 rcu_read_lock();
3313 ca = tcp_ca_find_key(ca_key);
3314 if (likely(ca && try_module_get(ca->owner))) {
3315 module_put(icsk->icsk_ca_ops->owner);
3316 icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst);
3317 icsk->icsk_ca_ops = ca;
3318 }
3319 rcu_read_unlock();
3320}
3321
3322
3323static void tcp_connect_init(struct sock *sk)
3324{
3325 const struct dst_entry *dst = __sk_dst_get(sk);
3326 struct tcp_sock *tp = tcp_sk(sk);
3327 __u8 rcv_wscale;
3328 u32 rcv_wnd;
3329
3330
3331
3332
3333 tp->tcp_header_len = sizeof(struct tcphdr);
3334 if (sock_net(sk)->ipv4.sysctl_tcp_timestamps)
3335 tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
3336
3337#ifdef CONFIG_TCP_MD5SIG
3338 if (tp->af_specific->md5_lookup(sk, sk))
3339 tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
3340#endif
3341
3342
3343 if (tp->rx_opt.user_mss)
3344 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
3345 tp->max_window = 0;
3346 tcp_mtup_init(sk);
3347 tcp_sync_mss(sk, dst_mtu(dst));
3348
3349 tcp_ca_dst_init(sk, dst);
3350
3351 if (!tp->window_clamp)
3352 tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
3353 tp->advmss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
3354
3355 tcp_initialize_rcv_mss(sk);
3356
3357
3358 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
3359 (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
3360 tp->window_clamp = tcp_full_space(sk);
3361
3362 rcv_wnd = tcp_rwnd_init_bpf(sk);
3363 if (rcv_wnd == 0)
3364 rcv_wnd = dst_metric(dst, RTAX_INITRWND);
3365
3366 tcp_select_initial_window(sk, tcp_full_space(sk),
3367 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
3368 &tp->rcv_wnd,
3369 &tp->window_clamp,
3370 sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
3371 &rcv_wscale,
3372 rcv_wnd);
3373
3374 tp->rx_opt.rcv_wscale = rcv_wscale;
3375 tp->rcv_ssthresh = tp->rcv_wnd;
3376
3377 sk->sk_err = 0;
3378 sock_reset_flag(sk, SOCK_DONE);
3379 tp->snd_wnd = 0;
3380 tcp_init_wl(tp, 0);
3381 tcp_write_queue_purge(sk);
3382 tp->snd_una = tp->write_seq;
3383 tp->snd_sml = tp->write_seq;
3384 tp->snd_up = tp->write_seq;
3385 tp->snd_nxt = tp->write_seq;
3386
3387 if (likely(!tp->repair))
3388 tp->rcv_nxt = 0;
3389 else
3390 tp->rcv_tstamp = tcp_jiffies32;
3391 tp->rcv_wup = tp->rcv_nxt;
3392 tp->copied_seq = tp->rcv_nxt;
3393
3394 inet_csk(sk)->icsk_rto = tcp_timeout_init(sk);
3395 inet_csk(sk)->icsk_retransmits = 0;
3396 tcp_clear_retrans(tp);
3397}
3398
3399static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
3400{
3401 struct tcp_sock *tp = tcp_sk(sk);
3402 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
3403
3404 tcb->end_seq += skb->len;
3405 __skb_header_release(skb);
3406 sk->sk_wmem_queued += skb->truesize;
3407 sk_mem_charge(sk, skb->truesize);
3408 tp->write_seq = tcb->end_seq;
3409 tp->packets_out += tcp_skb_pcount(skb);
3410}
3411
3412
3413
3414
3415
3416
3417
3418
3419static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3420{
3421 struct tcp_sock *tp = tcp_sk(sk);
3422 struct tcp_fastopen_request *fo = tp->fastopen_req;
3423 int space, err = 0;
3424 struct sk_buff *syn_data;
3425
3426 tp->rx_opt.mss_clamp = tp->advmss;
3427 if (!tcp_fastopen_cookie_check(sk, &tp->rx_opt.mss_clamp, &fo->cookie))
3428 goto fallback;
3429
3430
3431
3432
3433
3434 tp->rx_opt.mss_clamp = tcp_mss_clamp(tp, tp->rx_opt.mss_clamp);
3435
3436 space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
3437 MAX_TCP_OPTION_SPACE;
3438
3439 space = min_t(size_t, space, fo->size);
3440
3441
3442 space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));
3443
3444 syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation, false);
3445 if (!syn_data)
3446 goto fallback;
3447 syn_data->ip_summed = CHECKSUM_PARTIAL;
3448 memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
3449 if (space) {
3450 int copied = copy_from_iter(skb_put(syn_data, space), space,
3451 &fo->data->msg_iter);
3452 if (unlikely(!copied)) {
3453 tcp_skb_tsorted_anchor_cleanup(syn_data);
3454 kfree_skb(syn_data);
3455 goto fallback;
3456 }
3457 if (copied != space) {
3458 skb_trim(syn_data, copied);
3459 space = copied;
3460 }
3461 skb_zcopy_set(syn_data, fo->uarg, NULL);
3462 }
3463
3464 if (space == fo->size)
3465 fo->data = NULL;
3466 fo->copied = space;
3467
3468 tcp_connect_queue_skb(sk, syn_data);
3469 if (syn_data->len)
3470 tcp_chrono_start(sk, TCP_CHRONO_BUSY);
3471
3472 err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
3473
3474 syn->skb_mstamp_ns = syn_data->skb_mstamp_ns;
3475
3476
3477
3478
3479
3480
3481 TCP_SKB_CB(syn_data)->seq++;
3482 TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH;
3483 if (!err) {
3484 tp->syn_data = (fo->copied > 0);
3485 tcp_rbtree_insert(&sk->tcp_rtx_queue, syn_data);
3486 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
3487 goto done;
3488 }
3489
3490
3491 __skb_queue_tail(&sk->sk_write_queue, syn_data);
3492 tp->packets_out -= tcp_skb_pcount(syn_data);
3493
3494fallback:
3495
3496 if (fo->cookie.len > 0)
3497 fo->cookie.len = 0;
3498 err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation);
3499 if (err)
3500 tp->syn_fastopen = 0;
3501done:
3502 fo->cookie.len = -1;
3503 return err;
3504}
3505
3506
3507int tcp_connect(struct sock *sk)
3508{
3509 struct tcp_sock *tp = tcp_sk(sk);
3510 struct sk_buff *buff;
3511 int err;
3512
3513 tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB, 0, NULL);
3514
3515 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
3516 return -EHOSTUNREACH;
3517
3518 tcp_connect_init(sk);
3519
3520 if (unlikely(tp->repair)) {
3521 tcp_finish_connect(sk, NULL);
3522 return 0;
3523 }
3524
3525 buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, true);
3526 if (unlikely(!buff))
3527 return -ENOBUFS;
3528
3529 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
3530 tcp_mstamp_refresh(tp);
3531 tp->retrans_stamp = tcp_time_stamp(tp);
3532 tcp_connect_queue_skb(sk, buff);
3533 tcp_ecn_send_syn(sk, buff);
3534 tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
3535
3536
3537 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
3538 tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
3539 if (err == -ECONNREFUSED)
3540 return err;
3541
3542
3543
3544
3545 tp->snd_nxt = tp->write_seq;
3546 tp->pushed_seq = tp->write_seq;
3547 buff = tcp_send_head(sk);
3548 if (unlikely(buff)) {
3549 tp->snd_nxt = TCP_SKB_CB(buff)->seq;
3550 tp->pushed_seq = TCP_SKB_CB(buff)->seq;
3551 }
3552 TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
3553
3554
3555 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3556 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
3557 return 0;
3558}
3559EXPORT_SYMBOL(tcp_connect);
3560
3561
3562
3563
3564
3565void tcp_send_delayed_ack(struct sock *sk)
3566{
3567 struct inet_connection_sock *icsk = inet_csk(sk);
3568 int ato = icsk->icsk_ack.ato;
3569 unsigned long timeout;
3570
3571 if (ato > TCP_DELACK_MIN) {
3572 const struct tcp_sock *tp = tcp_sk(sk);
3573 int max_ato = HZ / 2;
3574
3575 if (inet_csk_in_pingpong_mode(sk) ||
3576 (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
3577 max_ato = TCP_DELACK_MAX;
3578
3579
3580
3581
3582
3583
3584
3585 if (tp->srtt_us) {
3586 int rtt = max_t(int, usecs_to_jiffies(tp->srtt_us >> 3),
3587 TCP_DELACK_MIN);
3588
3589 if (rtt < max_ato)
3590 max_ato = rtt;
3591 }
3592
3593 ato = min(ato, max_ato);
3594 }
3595
3596
3597 timeout = jiffies + ato;
3598
3599
3600 if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
3601
3602
3603
3604 if (icsk->icsk_ack.blocked ||
3605 time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
3606 tcp_send_ack(sk);
3607 return;
3608 }
3609
3610 if (!time_before(timeout, icsk->icsk_ack.timeout))
3611 timeout = icsk->icsk_ack.timeout;
3612 }
3613 icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
3614 icsk->icsk_ack.timeout = timeout;
3615 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
3616}
3617
3618
3619void __tcp_send_ack(struct sock *sk, u32 rcv_nxt)
3620{
3621 struct sk_buff *buff;
3622
3623
3624 if (sk->sk_state == TCP_CLOSE)
3625 return;
3626
3627
3628
3629
3630
3631 buff = alloc_skb(MAX_TCP_HEADER,
3632 sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN));
3633 if (unlikely(!buff)) {
3634 inet_csk_schedule_ack(sk);
3635 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
3636 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
3637 TCP_DELACK_MAX, TCP_RTO_MAX);
3638 return;
3639 }
3640
3641
3642 skb_reserve(buff, MAX_TCP_HEADER);
3643 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
3644
3645
3646
3647
3648
3649 skb_set_tcp_pure_ack(buff);
3650
3651
3652 __tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0, rcv_nxt);
3653}
3654EXPORT_SYMBOL_GPL(__tcp_send_ack);
3655
3656void tcp_send_ack(struct sock *sk)
3657{
3658 __tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt);
3659}
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
3673{
3674 struct tcp_sock *tp = tcp_sk(sk);
3675 struct sk_buff *skb;
3676
3677
3678 skb = alloc_skb(MAX_TCP_HEADER,
3679 sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN));
3680 if (!skb)
3681 return -1;
3682
3683
3684 skb_reserve(skb, MAX_TCP_HEADER);
3685
3686
3687
3688
3689 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
3690 NET_INC_STATS(sock_net(sk), mib);
3691 return tcp_transmit_skb(sk, skb, 0, (__force gfp_t)0);
3692}
3693
3694
3695void tcp_send_window_probe(struct sock *sk)
3696{
3697 if (sk->sk_state == TCP_ESTABLISHED) {
3698 tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
3699 tcp_mstamp_refresh(tcp_sk(sk));
3700 tcp_xmit_probe_skb(sk, 0, LINUX_MIB_TCPWINPROBE);
3701 }
3702}
3703
3704
3705int tcp_write_wakeup(struct sock *sk, int mib)
3706{
3707 struct tcp_sock *tp = tcp_sk(sk);
3708 struct sk_buff *skb;
3709
3710 if (sk->sk_state == TCP_CLOSE)
3711 return -1;
3712
3713 skb = tcp_send_head(sk);
3714 if (skb && before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
3715 int err;
3716 unsigned int mss = tcp_current_mss(sk);
3717 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
3718
3719 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
3720 tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
3721
3722
3723
3724
3725
3726 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
3727 skb->len > mss) {
3728 seg_size = min(seg_size, mss);
3729 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3730 if (tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
3731 skb, seg_size, mss, GFP_ATOMIC))
3732 return -1;
3733 } else if (!tcp_skb_pcount(skb))
3734 tcp_set_skb_tso_segs(skb, mss);
3735
3736 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3737 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
3738 if (!err)
3739 tcp_event_new_data_sent(sk, skb);
3740 return err;
3741 } else {
3742 if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
3743 tcp_xmit_probe_skb(sk, 1, mib);
3744 return tcp_xmit_probe_skb(sk, 0, mib);
3745 }
3746}
3747
3748
3749
3750
3751void tcp_send_probe0(struct sock *sk)
3752{
3753 struct inet_connection_sock *icsk = inet_csk(sk);
3754 struct tcp_sock *tp = tcp_sk(sk);
3755 struct net *net = sock_net(sk);
3756 unsigned long timeout;
3757 int err;
3758
3759 err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
3760
3761 if (tp->packets_out || tcp_write_queue_empty(sk)) {
3762
3763 icsk->icsk_probes_out = 0;
3764 icsk->icsk_backoff = 0;
3765 return;
3766 }
3767
3768 icsk->icsk_probes_out++;
3769 if (err <= 0) {
3770 if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2)
3771 icsk->icsk_backoff++;
3772 timeout = tcp_probe0_when(sk, TCP_RTO_MAX);
3773 } else {
3774
3775
3776
3777 timeout = TCP_RESOURCE_PROBE_INTERVAL;
3778 }
3779 tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, TCP_RTO_MAX, NULL);
3780}
3781
3782int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
3783{
3784 const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific;
3785 struct flowi fl;
3786 int res;
3787
3788 tcp_rsk(req)->txhash = net_tx_rndhash();
3789 res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL);
3790 if (!res) {
3791 __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
3792 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
3793 if (unlikely(tcp_passive_fastopen(sk)))
3794 tcp_sk(sk)->total_retrans++;
3795 trace_tcp_retransmit_synack(sk, req);
3796 }
3797 return res;
3798}
3799EXPORT_SYMBOL(tcp_rtx_synack);
3800