1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37#define pr_fmt(fmt) "TCP: " fmt
38
39#include <net/tcp.h>
40
41#include <linux/compiler.h>
42#include <linux/gfp.h>
43#include <linux/module.h>
44
45
46int sysctl_tcp_retrans_collapse __read_mostly = 1;
47
48
49
50
51int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
52
53
54int sysctl_tcp_limit_output_bytes __read_mostly = 262144;
55
56
57
58
59
60int sysctl_tcp_tso_win_divisor __read_mostly = 3;
61
62
63int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
64
65unsigned int sysctl_tcp_notsent_lowat __read_mostly = UINT_MAX;
66EXPORT_SYMBOL(sysctl_tcp_notsent_lowat);
67
68static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
69 int push_one, gfp_t gfp);
70
71
72static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
73{
74 struct inet_connection_sock *icsk = inet_csk(sk);
75 struct tcp_sock *tp = tcp_sk(sk);
76 unsigned int prior_packets = tp->packets_out;
77
78 tcp_advance_send_head(sk, skb);
79 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
80
81 tp->packets_out += tcp_skb_pcount(skb);
82 if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
83 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
84 tcp_rearm_rto(sk);
85 }
86
87 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
88 tcp_skb_pcount(skb));
89}
90
91
92
93
94
95
96
97static inline __u32 tcp_acceptable_seq(const struct sock *sk)
98{
99 const struct tcp_sock *tp = tcp_sk(sk);
100
101 if (!before(tcp_wnd_end(tp), tp->snd_nxt))
102 return tp->snd_nxt;
103 else
104 return tcp_wnd_end(tp);
105}
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121static __u16 tcp_advertise_mss(struct sock *sk)
122{
123 struct tcp_sock *tp = tcp_sk(sk);
124 const struct dst_entry *dst = __sk_dst_get(sk);
125 int mss = tp->advmss;
126
127 if (dst) {
128 unsigned int metric = dst_metric_advmss(dst);
129
130 if (metric < mss) {
131 mss = metric;
132 tp->advmss = mss;
133 }
134 }
135
136 return (__u16)mss;
137}
138
139
140
141
142void tcp_cwnd_restart(struct sock *sk, s32 delta)
143{
144 struct tcp_sock *tp = tcp_sk(sk);
145 u32 restart_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
146 u32 cwnd = tp->snd_cwnd;
147
148 tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
149
150 tp->snd_ssthresh = tcp_current_ssthresh(sk);
151 restart_cwnd = min(restart_cwnd, cwnd);
152
153 while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
154 cwnd >>= 1;
155 tp->snd_cwnd = max(cwnd, restart_cwnd);
156 tp->snd_cwnd_stamp = tcp_time_stamp;
157 tp->snd_cwnd_used = 0;
158}
159
160
161static void tcp_event_data_sent(struct tcp_sock *tp,
162 struct sock *sk)
163{
164 struct inet_connection_sock *icsk = inet_csk(sk);
165 const u32 now = tcp_time_stamp;
166
167 if (tcp_packets_in_flight(tp) == 0)
168 tcp_ca_event(sk, CA_EVENT_TX_START);
169
170 tp->lsndtime = now;
171
172
173
174
175 if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
176 icsk->icsk_ack.pingpong = 1;
177}
178
179
180static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
181{
182 tcp_dec_quickack_mode(sk, pkts);
183 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
184}
185
186
187u32 tcp_default_init_rwnd(u32 mss)
188{
189
190
191
192
193
194 u32 init_rwnd = TCP_INIT_CWND * 2;
195
196 if (mss > 1460)
197 init_rwnd = max((1460 * init_rwnd) / mss, 2U);
198 return init_rwnd;
199}
200
201
202
203
204
205
206
207
208void tcp_select_initial_window(int __space, __u32 mss,
209 __u32 *rcv_wnd, __u32 *window_clamp,
210 int wscale_ok, __u8 *rcv_wscale,
211 __u32 init_rcv_wnd)
212{
213 unsigned int space = (__space < 0 ? 0 : __space);
214
215
216 if (*window_clamp == 0)
217 (*window_clamp) = (65535 << 14);
218 space = min(*window_clamp, space);
219
220
221 if (space > mss)
222 space = (space / mss) * mss;
223
224
225
226
227
228
229
230
231
232 if (sysctl_tcp_workaround_signed_windows)
233 (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
234 else
235 (*rcv_wnd) = space;
236
237 (*rcv_wscale) = 0;
238 if (wscale_ok) {
239
240
241
242 space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
243 space = min_t(u32, space, *window_clamp);
244 while (space > 65535 && (*rcv_wscale) < 14) {
245 space >>= 1;
246 (*rcv_wscale)++;
247 }
248 }
249
250 if (mss > (1 << *rcv_wscale)) {
251 if (!init_rcv_wnd)
252 init_rcv_wnd = tcp_default_init_rwnd(mss);
253 *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
254 }
255
256
257 (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
258}
259EXPORT_SYMBOL(tcp_select_initial_window);
260
261
262
263
264
265
266static u16 tcp_select_window(struct sock *sk)
267{
268 struct tcp_sock *tp = tcp_sk(sk);
269 u32 old_win = tp->rcv_wnd;
270 u32 cur_win = tcp_receive_window(tp);
271 u32 new_win = __tcp_select_window(sk);
272
273
274 if (new_win < cur_win) {
275
276
277
278
279
280
281
282 if (new_win == 0)
283 NET_INC_STATS(sock_net(sk),
284 LINUX_MIB_TCPWANTZEROWINDOWADV);
285 new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
286 }
287 tp->rcv_wnd = new_win;
288 tp->rcv_wup = tp->rcv_nxt;
289
290
291
292
293 if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
294 new_win = min(new_win, MAX_TCP_WINDOW);
295 else
296 new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
297
298
299 new_win >>= tp->rx_opt.rcv_wscale;
300
301
302 if (new_win == 0) {
303 tp->pred_flags = 0;
304 if (old_win)
305 NET_INC_STATS(sock_net(sk),
306 LINUX_MIB_TCPTOZEROWINDOWADV);
307 } else if (old_win == 0) {
308 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV);
309 }
310
311 return new_win;
312}
313
314
315static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
316{
317 const struct tcp_sock *tp = tcp_sk(sk);
318
319 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
320 if (!(tp->ecn_flags & TCP_ECN_OK))
321 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
322 else if (tcp_ca_needs_ecn(sk))
323 INET_ECN_xmit(sk);
324}
325
326
327static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
328{
329 struct tcp_sock *tp = tcp_sk(sk);
330 bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
331 tcp_ca_needs_ecn(sk);
332
333 if (!use_ecn) {
334 const struct dst_entry *dst = __sk_dst_get(sk);
335
336 if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
337 use_ecn = true;
338 }
339
340 tp->ecn_flags = 0;
341
342 if (use_ecn) {
343 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
344 tp->ecn_flags = TCP_ECN_OK;
345 if (tcp_ca_needs_ecn(sk))
346 INET_ECN_xmit(sk);
347 }
348}
349
350static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
351{
352 if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)
353
354
355
356 TCP_SKB_CB(skb)->tcp_flags &= ~(TCPHDR_ECE | TCPHDR_CWR);
357}
358
359static void
360tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)
361{
362 if (inet_rsk(req)->ecn_ok)
363 th->ece = 1;
364}
365
366
367
368
369static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
370 int tcp_header_len)
371{
372 struct tcp_sock *tp = tcp_sk(sk);
373
374 if (tp->ecn_flags & TCP_ECN_OK) {
375
376 if (skb->len != tcp_header_len &&
377 !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
378 INET_ECN_xmit(sk);
379 if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) {
380 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
381 tcp_hdr(skb)->cwr = 1;
382 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
383 }
384 } else if (!tcp_ca_needs_ecn(sk)) {
385
386 INET_ECN_dontxmit(sk);
387 }
388 if (tp->ecn_flags & TCP_ECN_DEMAND_CWR)
389 tcp_hdr(skb)->ece = 1;
390 }
391}
392
393
394
395
396static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
397{
398 skb->ip_summed = CHECKSUM_PARTIAL;
399 skb->csum = 0;
400
401 TCP_SKB_CB(skb)->tcp_flags = flags;
402 TCP_SKB_CB(skb)->sacked = 0;
403
404 tcp_skb_pcount_set(skb, 1);
405
406 TCP_SKB_CB(skb)->seq = seq;
407 if (flags & (TCPHDR_SYN | TCPHDR_FIN))
408 seq++;
409 TCP_SKB_CB(skb)->end_seq = seq;
410}
411
412static inline bool tcp_urg_mode(const struct tcp_sock *tp)
413{
414 return tp->snd_una != tp->snd_up;
415}
416
417#define OPTION_SACK_ADVERTISE (1 << 0)
418#define OPTION_TS (1 << 1)
419#define OPTION_MD5 (1 << 2)
420#define OPTION_WSCALE (1 << 3)
421#define OPTION_FAST_OPEN_COOKIE (1 << 8)
422
423struct tcp_out_options {
424 u16 options;
425 u16 mss;
426 u8 ws;
427 u8 num_sack_blocks;
428 u8 hash_size;
429 __u8 *hash_location;
430 __u32 tsval, tsecr;
431 struct tcp_fastopen_cookie *fastopen_cookie;
432};
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
448 struct tcp_out_options *opts)
449{
450 u16 options = opts->options;
451
452 if (unlikely(OPTION_MD5 & options)) {
453 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
454 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
455
456 opts->hash_location = (__u8 *)ptr;
457 ptr += 4;
458 }
459
460 if (unlikely(opts->mss)) {
461 *ptr++ = htonl((TCPOPT_MSS << 24) |
462 (TCPOLEN_MSS << 16) |
463 opts->mss);
464 }
465
466 if (likely(OPTION_TS & options)) {
467 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
468 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
469 (TCPOLEN_SACK_PERM << 16) |
470 (TCPOPT_TIMESTAMP << 8) |
471 TCPOLEN_TIMESTAMP);
472 options &= ~OPTION_SACK_ADVERTISE;
473 } else {
474 *ptr++ = htonl((TCPOPT_NOP << 24) |
475 (TCPOPT_NOP << 16) |
476 (TCPOPT_TIMESTAMP << 8) |
477 TCPOLEN_TIMESTAMP);
478 }
479 *ptr++ = htonl(opts->tsval);
480 *ptr++ = htonl(opts->tsecr);
481 }
482
483 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
484 *ptr++ = htonl((TCPOPT_NOP << 24) |
485 (TCPOPT_NOP << 16) |
486 (TCPOPT_SACK_PERM << 8) |
487 TCPOLEN_SACK_PERM);
488 }
489
490 if (unlikely(OPTION_WSCALE & options)) {
491 *ptr++ = htonl((TCPOPT_NOP << 24) |
492 (TCPOPT_WINDOW << 16) |
493 (TCPOLEN_WINDOW << 8) |
494 opts->ws);
495 }
496
497 if (unlikely(opts->num_sack_blocks)) {
498 struct tcp_sack_block *sp = tp->rx_opt.dsack ?
499 tp->duplicate_sack : tp->selective_acks;
500 int this_sack;
501
502 *ptr++ = htonl((TCPOPT_NOP << 24) |
503 (TCPOPT_NOP << 16) |
504 (TCPOPT_SACK << 8) |
505 (TCPOLEN_SACK_BASE + (opts->num_sack_blocks *
506 TCPOLEN_SACK_PERBLOCK)));
507
508 for (this_sack = 0; this_sack < opts->num_sack_blocks;
509 ++this_sack) {
510 *ptr++ = htonl(sp[this_sack].start_seq);
511 *ptr++ = htonl(sp[this_sack].end_seq);
512 }
513
514 tp->rx_opt.dsack = 0;
515 }
516
517 if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) {
518 struct tcp_fastopen_cookie *foc = opts->fastopen_cookie;
519 u8 *p = (u8 *)ptr;
520 u32 len;
521
522 if (foc->exp) {
523 len = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
524 *ptr = htonl((TCPOPT_EXP << 24) | (len << 16) |
525 TCPOPT_FASTOPEN_MAGIC);
526 p += TCPOLEN_EXP_FASTOPEN_BASE;
527 } else {
528 len = TCPOLEN_FASTOPEN_BASE + foc->len;
529 *p++ = TCPOPT_FASTOPEN;
530 *p++ = len;
531 }
532
533 memcpy(p, foc->val, foc->len);
534 if ((len & 3) == 2) {
535 p[foc->len] = TCPOPT_NOP;
536 p[foc->len + 1] = TCPOPT_NOP;
537 }
538 ptr += (len + 3) >> 2;
539 }
540}
541
542
543
544
545static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
546 struct tcp_out_options *opts,
547 struct tcp_md5sig_key **md5)
548{
549 struct tcp_sock *tp = tcp_sk(sk);
550 unsigned int remaining = MAX_TCP_OPTION_SPACE;
551 struct tcp_fastopen_request *fastopen = tp->fastopen_req;
552
553#ifdef CONFIG_TCP_MD5SIG
554 *md5 = tp->af_specific->md5_lookup(sk, sk);
555 if (*md5) {
556 opts->options |= OPTION_MD5;
557 remaining -= TCPOLEN_MD5SIG_ALIGNED;
558 }
559#else
560 *md5 = NULL;
561#endif
562
563
564
565
566
567
568
569
570
571
572 opts->mss = tcp_advertise_mss(sk);
573 remaining -= TCPOLEN_MSS_ALIGNED;
574
575 if (likely(sysctl_tcp_timestamps && !*md5)) {
576 opts->options |= OPTION_TS;
577 opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
578 opts->tsecr = tp->rx_opt.ts_recent;
579 remaining -= TCPOLEN_TSTAMP_ALIGNED;
580 }
581 if (likely(sysctl_tcp_window_scaling)) {
582 opts->ws = tp->rx_opt.rcv_wscale;
583 opts->options |= OPTION_WSCALE;
584 remaining -= TCPOLEN_WSCALE_ALIGNED;
585 }
586 if (likely(sysctl_tcp_sack)) {
587 opts->options |= OPTION_SACK_ADVERTISE;
588 if (unlikely(!(OPTION_TS & opts->options)))
589 remaining -= TCPOLEN_SACKPERM_ALIGNED;
590 }
591
592 if (fastopen && fastopen->cookie.len >= 0) {
593 u32 need = fastopen->cookie.len;
594
595 need += fastopen->cookie.exp ? TCPOLEN_EXP_FASTOPEN_BASE :
596 TCPOLEN_FASTOPEN_BASE;
597 need = (need + 3) & ~3U;
598 if (remaining >= need) {
599 opts->options |= OPTION_FAST_OPEN_COOKIE;
600 opts->fastopen_cookie = &fastopen->cookie;
601 remaining -= need;
602 tp->syn_fastopen = 1;
603 tp->syn_fastopen_exp = fastopen->cookie.exp ? 1 : 0;
604 }
605 }
606
607 return MAX_TCP_OPTION_SPACE - remaining;
608}
609
610
611static unsigned int tcp_synack_options(struct request_sock *req,
612 unsigned int mss, struct sk_buff *skb,
613 struct tcp_out_options *opts,
614 const struct tcp_md5sig_key *md5,
615 struct tcp_fastopen_cookie *foc)
616{
617 struct inet_request_sock *ireq = inet_rsk(req);
618 unsigned int remaining = MAX_TCP_OPTION_SPACE;
619
620#ifdef CONFIG_TCP_MD5SIG
621 if (md5) {
622 opts->options |= OPTION_MD5;
623 remaining -= TCPOLEN_MD5SIG_ALIGNED;
624
625
626
627
628
629
630 ireq->tstamp_ok &= !ireq->sack_ok;
631 }
632#endif
633
634
635 opts->mss = mss;
636 remaining -= TCPOLEN_MSS_ALIGNED;
637
638 if (likely(ireq->wscale_ok)) {
639 opts->ws = ireq->rcv_wscale;
640 opts->options |= OPTION_WSCALE;
641 remaining -= TCPOLEN_WSCALE_ALIGNED;
642 }
643 if (likely(ireq->tstamp_ok)) {
644 opts->options |= OPTION_TS;
645 opts->tsval = tcp_skb_timestamp(skb);
646 opts->tsecr = req->ts_recent;
647 remaining -= TCPOLEN_TSTAMP_ALIGNED;
648 }
649 if (likely(ireq->sack_ok)) {
650 opts->options |= OPTION_SACK_ADVERTISE;
651 if (unlikely(!ireq->tstamp_ok))
652 remaining -= TCPOLEN_SACKPERM_ALIGNED;
653 }
654 if (foc != NULL && foc->len >= 0) {
655 u32 need = foc->len;
656
657 need += foc->exp ? TCPOLEN_EXP_FASTOPEN_BASE :
658 TCPOLEN_FASTOPEN_BASE;
659 need = (need + 3) & ~3U;
660 if (remaining >= need) {
661 opts->options |= OPTION_FAST_OPEN_COOKIE;
662 opts->fastopen_cookie = foc;
663 remaining -= need;
664 }
665 }
666
667 return MAX_TCP_OPTION_SPACE - remaining;
668}
669
670
671
672
673static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb,
674 struct tcp_out_options *opts,
675 struct tcp_md5sig_key **md5)
676{
677 struct tcp_sock *tp = tcp_sk(sk);
678 unsigned int size = 0;
679 unsigned int eff_sacks;
680
681 opts->options = 0;
682
683#ifdef CONFIG_TCP_MD5SIG
684 *md5 = tp->af_specific->md5_lookup(sk, sk);
685 if (unlikely(*md5)) {
686 opts->options |= OPTION_MD5;
687 size += TCPOLEN_MD5SIG_ALIGNED;
688 }
689#else
690 *md5 = NULL;
691#endif
692
693 if (likely(tp->rx_opt.tstamp_ok)) {
694 opts->options |= OPTION_TS;
695 opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0;
696 opts->tsecr = tp->rx_opt.ts_recent;
697 size += TCPOLEN_TSTAMP_ALIGNED;
698 }
699
700 eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
701 if (unlikely(eff_sacks)) {
702 const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
703 opts->num_sack_blocks =
704 min_t(unsigned int, eff_sacks,
705 (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
706 TCPOLEN_SACK_PERBLOCK);
707 size += TCPOLEN_SACK_BASE_ALIGNED +
708 opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
709 }
710
711 return size;
712}
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729struct tsq_tasklet {
730 struct tasklet_struct tasklet;
731 struct list_head head;
732};
733static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);
734
735static void tcp_tsq_handler(struct sock *sk)
736{
737 if ((1 << sk->sk_state) &
738 (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
739 TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
740 tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle,
741 0, GFP_ATOMIC);
742}
743
744
745
746
747
748
749static void tcp_tasklet_func(unsigned long data)
750{
751 struct tsq_tasklet *tsq = (struct tsq_tasklet *)data;
752 LIST_HEAD(list);
753 unsigned long flags;
754 struct list_head *q, *n;
755 struct tcp_sock *tp;
756 struct sock *sk;
757
758 local_irq_save(flags);
759 list_splice_init(&tsq->head, &list);
760 local_irq_restore(flags);
761
762 list_for_each_safe(q, n, &list) {
763 tp = list_entry(q, struct tcp_sock, tsq_node);
764 list_del(&tp->tsq_node);
765
766 sk = (struct sock *)tp;
767 bh_lock_sock(sk);
768
769 if (!sock_owned_by_user(sk)) {
770 tcp_tsq_handler(sk);
771 } else {
772
773 set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags);
774 }
775 bh_unlock_sock(sk);
776
777 clear_bit(TSQ_QUEUED, &tp->tsq_flags);
778 sk_free(sk);
779 }
780}
781
782#define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \
783 (1UL << TCP_WRITE_TIMER_DEFERRED) | \
784 (1UL << TCP_DELACK_TIMER_DEFERRED) | \
785 (1UL << TCP_MTU_REDUCED_DEFERRED))
786
787
788
789
790
791
792
793void tcp_release_cb(struct sock *sk)
794{
795 struct tcp_sock *tp = tcp_sk(sk);
796 unsigned long flags, nflags;
797
798
799 do {
800 flags = tp->tsq_flags;
801 if (!(flags & TCP_DEFERRED_ALL))
802 return;
803 nflags = flags & ~TCP_DEFERRED_ALL;
804 } while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags);
805
806 if (flags & (1UL << TCP_TSQ_DEFERRED))
807 tcp_tsq_handler(sk);
808
809
810
811
812
813
814
815
816
817
818 sock_release_ownership(sk);
819
820 if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
821 tcp_write_timer_handler(sk);
822 __sock_put(sk);
823 }
824 if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) {
825 tcp_delack_timer_handler(sk);
826 __sock_put(sk);
827 }
828 if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
829 inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
830 __sock_put(sk);
831 }
832}
833EXPORT_SYMBOL(tcp_release_cb);
834
835void __init tcp_tasklet_init(void)
836{
837 int i;
838
839 for_each_possible_cpu(i) {
840 struct tsq_tasklet *tsq = &per_cpu(tsq_tasklet, i);
841
842 INIT_LIST_HEAD(&tsq->head);
843 tasklet_init(&tsq->tasklet,
844 tcp_tasklet_func,
845 (unsigned long)tsq);
846 }
847}
848
849
850
851
852
853
854void tcp_wfree(struct sk_buff *skb)
855{
856 struct sock *sk = skb->sk;
857 struct tcp_sock *tp = tcp_sk(sk);
858 int wmem;
859
860
861
862
863 wmem = atomic_sub_return(skb->truesize - 1, &sk->sk_wmem_alloc);
864
865
866
867
868
869
870
871
872 if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
873 goto out;
874
875 if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) &&
876 !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) {
877 unsigned long flags;
878 struct tsq_tasklet *tsq;
879
880
881 local_irq_save(flags);
882 tsq = this_cpu_ptr(&tsq_tasklet);
883 list_add(&tp->tsq_node, &tsq->head);
884 tasklet_schedule(&tsq->tasklet);
885 local_irq_restore(flags);
886 return;
887 }
888out:
889 sk_free(sk);
890}
891
892
893
894
895
896
897
898
899
900
901
902
903static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
904 gfp_t gfp_mask)
905{
906 const struct inet_connection_sock *icsk = inet_csk(sk);
907 struct inet_sock *inet;
908 struct tcp_sock *tp;
909 struct tcp_skb_cb *tcb;
910 struct tcp_out_options opts;
911 unsigned int tcp_options_size, tcp_header_size;
912 struct tcp_md5sig_key *md5;
913 struct tcphdr *th;
914 int err;
915
916 BUG_ON(!skb || !tcp_skb_pcount(skb));
917
918 if (clone_it) {
919 skb_mstamp_get(&skb->skb_mstamp);
920
921 if (unlikely(skb_cloned(skb)))
922 skb = pskb_copy(skb, gfp_mask);
923 else
924 skb = skb_clone(skb, gfp_mask);
925 if (unlikely(!skb))
926 return -ENOBUFS;
927 }
928
929 inet = inet_sk(sk);
930 tp = tcp_sk(sk);
931 tcb = TCP_SKB_CB(skb);
932 memset(&opts, 0, sizeof(opts));
933
934 if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
935 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
936 else
937 tcp_options_size = tcp_established_options(sk, skb, &opts,
938 &md5);
939 tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
940
941
942
943
944
945
946
947
948 skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1);
949
950 skb_push(skb, tcp_header_size);
951 skb_reset_transport_header(skb);
952
953 skb_orphan(skb);
954 skb->sk = sk;
955 skb->destructor = skb_is_tcp_pure_ack(skb) ? sock_wfree : tcp_wfree;
956 skb_set_hash_from_sk(skb, sk);
957 atomic_add(skb->truesize, &sk->sk_wmem_alloc);
958
959
960 th = tcp_hdr(skb);
961 th->source = inet->inet_sport;
962 th->dest = inet->inet_dport;
963 th->seq = htonl(tcb->seq);
964 th->ack_seq = htonl(tp->rcv_nxt);
965 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
966 tcb->tcp_flags);
967
968 if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
969
970
971
972 th->window = htons(min(tp->rcv_wnd, 65535U));
973 } else {
974 th->window = htons(tcp_select_window(sk));
975 }
976 th->check = 0;
977 th->urg_ptr = 0;
978
979
980 if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
981 if (before(tp->snd_up, tcb->seq + 0x10000)) {
982 th->urg_ptr = htons(tp->snd_up - tcb->seq);
983 th->urg = 1;
984 } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
985 th->urg_ptr = htons(0xFFFF);
986 th->urg = 1;
987 }
988 }
989
990 tcp_options_write((__be32 *)(th + 1), tp, &opts);
991 skb_shinfo(skb)->gso_type = sk->sk_gso_type;
992 if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0))
993 tcp_ecn_send(sk, skb, tcp_header_size);
994
995#ifdef CONFIG_TCP_MD5SIG
996
997 if (md5) {
998 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
999 tp->af_specific->calc_md5_hash(opts.hash_location,
1000 md5, sk, skb);
1001 }
1002#endif
1003
1004 icsk->icsk_af_ops->send_check(sk, skb);
1005
1006 if (likely(tcb->tcp_flags & TCPHDR_ACK))
1007 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
1008
1009 if (skb->len != tcp_header_size)
1010 tcp_event_data_sent(tp, sk);
1011
1012 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
1013 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
1014 tcp_skb_pcount(skb));
1015
1016 tp->segs_out += tcp_skb_pcount(skb);
1017
1018 skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
1019 skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);
1020
1021
1022 skb->tstamp.tv64 = 0;
1023
1024
1025 memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
1026 sizeof(struct inet6_skb_parm)));
1027
1028 err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
1029
1030 if (likely(err <= 0))
1031 return err;
1032
1033 tcp_enter_cwr(sk);
1034
1035 return net_xmit_eval(err);
1036}
1037
1038
1039
1040
1041
1042
1043static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
1044{
1045 struct tcp_sock *tp = tcp_sk(sk);
1046
1047
1048 tp->write_seq = TCP_SKB_CB(skb)->end_seq;
1049 __skb_header_release(skb);
1050 tcp_add_write_queue_tail(sk, skb);
1051 sk->sk_wmem_queued += skb->truesize;
1052 sk_mem_charge(sk, skb->truesize);
1053}
1054
1055
1056static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
1057{
1058 if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) {
1059
1060
1061
1062 tcp_skb_pcount_set(skb, 1);
1063 TCP_SKB_CB(skb)->tcp_gso_size = 0;
1064 } else {
1065 tcp_skb_pcount_set(skb, DIV_ROUND_UP(skb->len, mss_now));
1066 TCP_SKB_CB(skb)->tcp_gso_size = mss_now;
1067 }
1068}
1069
1070
1071
1072
1073static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb,
1074 int decr)
1075{
1076 struct tcp_sock *tp = tcp_sk(sk);
1077
1078 if (!tp->sacked_out || tcp_is_reno(tp))
1079 return;
1080
1081 if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
1082 tp->fackets_out -= decr;
1083}
1084
1085
1086
1087
1088static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
1089{
1090 struct tcp_sock *tp = tcp_sk(sk);
1091
1092 tp->packets_out -= decr;
1093
1094 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1095 tp->sacked_out -= decr;
1096 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
1097 tp->retrans_out -= decr;
1098 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
1099 tp->lost_out -= decr;
1100
1101
1102 if (tcp_is_reno(tp) && decr > 0)
1103 tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
1104
1105 tcp_adjust_fackets_out(sk, skb, decr);
1106
1107 if (tp->lost_skb_hint &&
1108 before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
1109 (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)))
1110 tp->lost_cnt_hint -= decr;
1111
1112 tcp_verify_left_out(tp);
1113}
1114
1115static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2)
1116{
1117 struct skb_shared_info *shinfo = skb_shinfo(skb);
1118
1119 if (unlikely(shinfo->tx_flags & SKBTX_ANY_TSTAMP) &&
1120 !before(shinfo->tskey, TCP_SKB_CB(skb2)->seq)) {
1121 struct skb_shared_info *shinfo2 = skb_shinfo(skb2);
1122 u8 tsflags = shinfo->tx_flags & SKBTX_ANY_TSTAMP;
1123
1124 shinfo->tx_flags &= ~tsflags;
1125 shinfo2->tx_flags |= tsflags;
1126 swap(shinfo->tskey, shinfo2->tskey);
1127 }
1128}
1129
1130
1131
1132
1133
1134
1135int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1136 unsigned int mss_now, gfp_t gfp)
1137{
1138 struct tcp_sock *tp = tcp_sk(sk);
1139 struct sk_buff *buff;
1140 int nsize, old_factor;
1141 int nlen;
1142 u8 flags;
1143
1144 if (WARN_ON(len > skb->len))
1145 return -EINVAL;
1146
1147 nsize = skb_headlen(skb) - len;
1148 if (nsize < 0)
1149 nsize = 0;
1150
1151 if (skb_unclone(skb, gfp))
1152 return -ENOMEM;
1153
1154
1155 buff = sk_stream_alloc_skb(sk, nsize, gfp, true);
1156 if (!buff)
1157 return -ENOMEM;
1158
1159 sk->sk_wmem_queued += buff->truesize;
1160 sk_mem_charge(sk, buff->truesize);
1161 nlen = skb->len - len - nsize;
1162 buff->truesize += nlen;
1163 skb->truesize -= nlen;
1164
1165
1166 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1167 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1168 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1169
1170
1171 flags = TCP_SKB_CB(skb)->tcp_flags;
1172 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1173 TCP_SKB_CB(buff)->tcp_flags = flags;
1174 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
1175
1176 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
1177
1178 buff->csum = csum_partial_copy_nocheck(skb->data + len,
1179 skb_put(buff, nsize),
1180 nsize, 0);
1181
1182 skb_trim(skb, len);
1183
1184 skb->csum = csum_block_sub(skb->csum, buff->csum, len);
1185 } else {
1186 skb->ip_summed = CHECKSUM_PARTIAL;
1187 skb_split(skb, buff, len);
1188 }
1189
1190 buff->ip_summed = skb->ip_summed;
1191
1192 buff->tstamp = skb->tstamp;
1193 tcp_fragment_tstamp(skb, buff);
1194
1195 old_factor = tcp_skb_pcount(skb);
1196
1197
1198 tcp_set_skb_tso_segs(skb, mss_now);
1199 tcp_set_skb_tso_segs(buff, mss_now);
1200
1201
1202
1203
1204 if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) {
1205 int diff = old_factor - tcp_skb_pcount(skb) -
1206 tcp_skb_pcount(buff);
1207
1208 if (diff)
1209 tcp_adjust_pcount(sk, skb, diff);
1210 }
1211
1212
1213 __skb_header_release(buff);
1214 tcp_insert_write_queue_after(skb, buff, sk);
1215
1216 return 0;
1217}
1218
1219
1220
1221
1222
1223static void __pskb_trim_head(struct sk_buff *skb, int len)
1224{
1225 struct skb_shared_info *shinfo;
1226 int i, k, eat;
1227
1228 eat = min_t(int, len, skb_headlen(skb));
1229 if (eat) {
1230 __skb_pull(skb, eat);
1231 len -= eat;
1232 if (!len)
1233 return;
1234 }
1235 eat = len;
1236 k = 0;
1237 shinfo = skb_shinfo(skb);
1238 for (i = 0; i < shinfo->nr_frags; i++) {
1239 int size = skb_frag_size(&shinfo->frags[i]);
1240
1241 if (size <= eat) {
1242 skb_frag_unref(skb, i);
1243 eat -= size;
1244 } else {
1245 shinfo->frags[k] = shinfo->frags[i];
1246 if (eat) {
1247 shinfo->frags[k].page_offset += eat;
1248 skb_frag_size_sub(&shinfo->frags[k], eat);
1249 eat = 0;
1250 }
1251 k++;
1252 }
1253 }
1254 shinfo->nr_frags = k;
1255
1256 skb_reset_tail_pointer(skb);
1257 skb->data_len -= len;
1258 skb->len = skb->data_len;
1259}
1260
1261
1262int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
1263{
1264 if (skb_unclone(skb, GFP_ATOMIC))
1265 return -ENOMEM;
1266
1267 __pskb_trim_head(skb, len);
1268
1269 TCP_SKB_CB(skb)->seq += len;
1270 skb->ip_summed = CHECKSUM_PARTIAL;
1271
1272 skb->truesize -= len;
1273 sk->sk_wmem_queued -= len;
1274 sk_mem_uncharge(sk, len);
1275 sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
1276
1277
1278 if (tcp_skb_pcount(skb) > 1)
1279 tcp_set_skb_tso_segs(skb, tcp_skb_mss(skb));
1280
1281 return 0;
1282}
1283
1284
1285static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
1286{
1287 const struct tcp_sock *tp = tcp_sk(sk);
1288 const struct inet_connection_sock *icsk = inet_csk(sk);
1289 int mss_now;
1290
1291
1292
1293
1294 mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr);
1295
1296
1297 if (icsk->icsk_af_ops->net_frag_header_len) {
1298 const struct dst_entry *dst = __sk_dst_get(sk);
1299
1300 if (dst && dst_allfrag(dst))
1301 mss_now -= icsk->icsk_af_ops->net_frag_header_len;
1302 }
1303
1304
1305 if (mss_now > tp->rx_opt.mss_clamp)
1306 mss_now = tp->rx_opt.mss_clamp;
1307
1308
1309 mss_now -= icsk->icsk_ext_hdr_len;
1310
1311
1312 if (mss_now < 48)
1313 mss_now = 48;
1314 return mss_now;
1315}
1316
1317
1318int tcp_mtu_to_mss(struct sock *sk, int pmtu)
1319{
1320
1321 return __tcp_mtu_to_mss(sk, pmtu) -
1322 (tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr));
1323}
1324
1325
1326int tcp_mss_to_mtu(struct sock *sk, int mss)
1327{
1328 const struct tcp_sock *tp = tcp_sk(sk);
1329 const struct inet_connection_sock *icsk = inet_csk(sk);
1330 int mtu;
1331
1332 mtu = mss +
1333 tp->tcp_header_len +
1334 icsk->icsk_ext_hdr_len +
1335 icsk->icsk_af_ops->net_header_len;
1336
1337
1338 if (icsk->icsk_af_ops->net_frag_header_len) {
1339 const struct dst_entry *dst = __sk_dst_get(sk);
1340
1341 if (dst && dst_allfrag(dst))
1342 mtu += icsk->icsk_af_ops->net_frag_header_len;
1343 }
1344 return mtu;
1345}
1346
1347
1348void tcp_mtup_init(struct sock *sk)
1349{
1350 struct tcp_sock *tp = tcp_sk(sk);
1351 struct inet_connection_sock *icsk = inet_csk(sk);
1352 struct net *net = sock_net(sk);
1353
1354 icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
1355 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
1356 icsk->icsk_af_ops->net_header_len;
1357 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
1358 icsk->icsk_mtup.probe_size = 0;
1359 if (icsk->icsk_mtup.enabled)
1360 icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
1361}
1362EXPORT_SYMBOL(tcp_mtup_init);
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
1387{
1388 struct tcp_sock *tp = tcp_sk(sk);
1389 struct inet_connection_sock *icsk = inet_csk(sk);
1390 int mss_now;
1391
1392 if (icsk->icsk_mtup.search_high > pmtu)
1393 icsk->icsk_mtup.search_high = pmtu;
1394
1395 mss_now = tcp_mtu_to_mss(sk, pmtu);
1396 mss_now = tcp_bound_to_half_wnd(tp, mss_now);
1397
1398
1399 icsk->icsk_pmtu_cookie = pmtu;
1400 if (icsk->icsk_mtup.enabled)
1401 mss_now = min(mss_now, tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low));
1402 tp->mss_cache = mss_now;
1403
1404 return mss_now;
1405}
1406EXPORT_SYMBOL(tcp_sync_mss);
1407
1408
1409
1410
1411unsigned int tcp_current_mss(struct sock *sk)
1412{
1413 const struct tcp_sock *tp = tcp_sk(sk);
1414 const struct dst_entry *dst = __sk_dst_get(sk);
1415 u32 mss_now;
1416 unsigned int header_len;
1417 struct tcp_out_options opts;
1418 struct tcp_md5sig_key *md5;
1419
1420 mss_now = tp->mss_cache;
1421
1422 if (dst) {
1423 u32 mtu = dst_mtu(dst);
1424 if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
1425 mss_now = tcp_sync_mss(sk, mtu);
1426 }
1427
1428 header_len = tcp_established_options(sk, NULL, &opts, &md5) +
1429 sizeof(struct tcphdr);
1430
1431
1432
1433
1434 if (header_len != tp->tcp_header_len) {
1435 int delta = (int) header_len - tp->tcp_header_len;
1436 mss_now -= delta;
1437 }
1438
1439 return mss_now;
1440}
1441
1442
1443
1444
1445
1446static void tcp_cwnd_application_limited(struct sock *sk)
1447{
1448 struct tcp_sock *tp = tcp_sk(sk);
1449
1450 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
1451 sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
1452
1453 u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
1454 u32 win_used = max(tp->snd_cwnd_used, init_win);
1455 if (win_used < tp->snd_cwnd) {
1456 tp->snd_ssthresh = tcp_current_ssthresh(sk);
1457 tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
1458 }
1459 tp->snd_cwnd_used = 0;
1460 }
1461 tp->snd_cwnd_stamp = tcp_time_stamp;
1462}
1463
1464static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
1465{
1466 struct tcp_sock *tp = tcp_sk(sk);
1467
1468
1469
1470
1471 if (!before(tp->snd_una, tp->max_packets_seq) ||
1472 tp->packets_out > tp->max_packets_out) {
1473 tp->max_packets_out = tp->packets_out;
1474 tp->max_packets_seq = tp->snd_nxt;
1475 tp->is_cwnd_limited = is_cwnd_limited;
1476 }
1477
1478 if (tcp_is_cwnd_limited(sk)) {
1479
1480 tp->snd_cwnd_used = 0;
1481 tp->snd_cwnd_stamp = tcp_time_stamp;
1482 } else {
1483
1484 if (tp->packets_out > tp->snd_cwnd_used)
1485 tp->snd_cwnd_used = tp->packets_out;
1486
1487 if (sysctl_tcp_slow_start_after_idle &&
1488 (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
1489 tcp_cwnd_application_limited(sk);
1490 }
1491}
1492
1493
1494static bool tcp_minshall_check(const struct tcp_sock *tp)
1495{
1496 return after(tp->snd_sml, tp->snd_una) &&
1497 !after(tp->snd_sml, tp->snd_nxt);
1498}
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
1509 const struct sk_buff *skb)
1510{
1511 if (skb->len < tcp_skb_pcount(skb) * mss_now)
1512 tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
1513}
1514
1515
1516
1517
1518
1519
1520
1521
1522static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
1523 int nonagle)
1524{
1525 return partial &&
1526 ((nonagle & TCP_NAGLE_CORK) ||
1527 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1528}
1529
1530
1531
1532
1533static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now)
1534{
1535 u32 bytes, segs;
1536
1537 bytes = min(sk->sk_pacing_rate >> 10,
1538 sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
1539
1540
1541
1542
1543
1544
1545 segs = max_t(u32, bytes / mss_now, sysctl_tcp_min_tso_segs);
1546
1547 return min_t(u32, segs, sk->sk_gso_max_segs);
1548}
1549
1550
1551static unsigned int tcp_mss_split_point(const struct sock *sk,
1552 const struct sk_buff *skb,
1553 unsigned int mss_now,
1554 unsigned int max_segs,
1555 int nonagle)
1556{
1557 const struct tcp_sock *tp = tcp_sk(sk);
1558 u32 partial, needed, window, max_len;
1559
1560 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1561 max_len = mss_now * max_segs;
1562
1563 if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
1564 return max_len;
1565
1566 needed = min(skb->len, window);
1567
1568 if (max_len <= needed)
1569 return max_len;
1570
1571 partial = needed % mss_now;
1572
1573
1574
1575
1576 if (tcp_nagle_check(partial != 0, tp, nonagle))
1577 return needed - partial;
1578
1579 return needed;
1580}
1581
1582
1583
1584
1585static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
1586 const struct sk_buff *skb)
1587{
1588 u32 in_flight, cwnd, halfcwnd;
1589
1590
1591 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
1592 tcp_skb_pcount(skb) == 1)
1593 return 1;
1594
1595 in_flight = tcp_packets_in_flight(tp);
1596 cwnd = tp->snd_cwnd;
1597 if (in_flight >= cwnd)
1598 return 0;
1599
1600
1601
1602
1603 halfcwnd = max(cwnd >> 1, 1U);
1604 return min(halfcwnd, cwnd - in_flight);
1605}
1606
1607
1608
1609
1610
1611static int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now)
1612{
1613 int tso_segs = tcp_skb_pcount(skb);
1614
1615 if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
1616 tcp_set_skb_tso_segs(skb, mss_now);
1617 tso_segs = tcp_skb_pcount(skb);
1618 }
1619 return tso_segs;
1620}
1621
1622
1623
1624
1625
1626static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
1627 unsigned int cur_mss, int nonagle)
1628{
1629
1630
1631
1632
1633
1634
1635 if (nonagle & TCP_NAGLE_PUSH)
1636 return true;
1637
1638
1639 if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
1640 return true;
1641
1642 if (!tcp_nagle_check(skb->len < cur_mss, tp, nonagle))
1643 return true;
1644
1645 return false;
1646}
1647
1648
1649static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
1650 const struct sk_buff *skb,
1651 unsigned int cur_mss)
1652{
1653 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
1654
1655 if (skb->len > cur_mss)
1656 end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
1657
1658 return !after(end_seq, tcp_wnd_end(tp));
1659}
1660
1661
1662
1663
1664
1665static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
1666 unsigned int cur_mss, int nonagle)
1667{
1668 const struct tcp_sock *tp = tcp_sk(sk);
1669 unsigned int cwnd_quota;
1670
1671 tcp_init_tso_segs(skb, cur_mss);
1672
1673 if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
1674 return 0;
1675
1676 cwnd_quota = tcp_cwnd_test(tp, skb);
1677 if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
1678 cwnd_quota = 0;
1679
1680 return cwnd_quota;
1681}
1682
1683
1684bool tcp_may_send_now(struct sock *sk)
1685{
1686 const struct tcp_sock *tp = tcp_sk(sk);
1687 struct sk_buff *skb = tcp_send_head(sk);
1688
1689 return skb &&
1690 tcp_snd_test(sk, skb, tcp_current_mss(sk),
1691 (tcp_skb_is_last(sk, skb) ?
1692 tp->nonagle : TCP_NAGLE_PUSH));
1693}
1694
1695
1696
1697
1698
1699
1700
1701
1702static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1703 unsigned int mss_now, gfp_t gfp)
1704{
1705 struct sk_buff *buff;
1706 int nlen = skb->len - len;
1707 u8 flags;
1708
1709
1710 if (skb->len != skb->data_len)
1711 return tcp_fragment(sk, skb, len, mss_now, gfp);
1712
1713 buff = sk_stream_alloc_skb(sk, 0, gfp, true);
1714 if (unlikely(!buff))
1715 return -ENOMEM;
1716
1717 sk->sk_wmem_queued += buff->truesize;
1718 sk_mem_charge(sk, buff->truesize);
1719 buff->truesize += nlen;
1720 skb->truesize -= nlen;
1721
1722
1723 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1724 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1725 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1726
1727
1728 flags = TCP_SKB_CB(skb)->tcp_flags;
1729 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1730 TCP_SKB_CB(buff)->tcp_flags = flags;
1731
1732
1733 TCP_SKB_CB(buff)->sacked = 0;
1734
1735 buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
1736 skb_split(skb, buff, len);
1737 tcp_fragment_tstamp(skb, buff);
1738
1739
1740 tcp_set_skb_tso_segs(skb, mss_now);
1741 tcp_set_skb_tso_segs(buff, mss_now);
1742
1743
1744 __skb_header_release(buff);
1745 tcp_insert_write_queue_after(skb, buff, sk);
1746
1747 return 0;
1748}
1749
1750
1751
1752
1753
1754
1755static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1756 bool *is_cwnd_limited, u32 max_segs)
1757{
1758 const struct inet_connection_sock *icsk = inet_csk(sk);
1759 u32 age, send_win, cong_win, limit, in_flight;
1760 struct tcp_sock *tp = tcp_sk(sk);
1761 struct skb_mstamp now;
1762 struct sk_buff *head;
1763 int win_divisor;
1764
1765 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1766 goto send_now;
1767
1768 if (icsk->icsk_ca_state >= TCP_CA_Recovery)
1769 goto send_now;
1770
1771
1772
1773
1774 if ((s32)(tcp_time_stamp - tp->lsndtime) > 0)
1775 goto send_now;
1776
1777 in_flight = tcp_packets_in_flight(tp);
1778
1779 BUG_ON(tcp_skb_pcount(skb) <= 1 || (tp->snd_cwnd <= in_flight));
1780
1781 send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1782
1783
1784 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
1785
1786 limit = min(send_win, cong_win);
1787
1788
1789 if (limit >= max_segs * tp->mss_cache)
1790 goto send_now;
1791
1792
1793 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
1794 goto send_now;
1795
1796 win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor);
1797 if (win_divisor) {
1798 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
1799
1800
1801
1802
1803 chunk /= win_divisor;
1804 if (limit >= chunk)
1805 goto send_now;
1806 } else {
1807
1808
1809
1810
1811
1812 if (limit > tcp_max_tso_deferred_mss(tp) * tp->mss_cache)
1813 goto send_now;
1814 }
1815
1816 head = tcp_write_queue_head(sk);
1817 skb_mstamp_get(&now);
1818 age = skb_mstamp_us_delta(&now, &head->skb_mstamp);
1819
1820 if (age < (tp->srtt_us >> 4))
1821 goto send_now;
1822
1823
1824
1825 if (cong_win < send_win && cong_win <= skb->len)
1826 *is_cwnd_limited = true;
1827
1828 return true;
1829
1830send_now:
1831 return false;
1832}
1833
1834static inline void tcp_mtu_check_reprobe(struct sock *sk)
1835{
1836 struct inet_connection_sock *icsk = inet_csk(sk);
1837 struct tcp_sock *tp = tcp_sk(sk);
1838 struct net *net = sock_net(sk);
1839 u32 interval;
1840 s32 delta;
1841
1842 interval = net->ipv4.sysctl_tcp_probe_interval;
1843 delta = tcp_time_stamp - icsk->icsk_mtup.probe_timestamp;
1844 if (unlikely(delta >= interval * HZ)) {
1845 int mss = tcp_current_mss(sk);
1846
1847
1848 icsk->icsk_mtup.probe_size = 0;
1849 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp +
1850 sizeof(struct tcphdr) +
1851 icsk->icsk_af_ops->net_header_len;
1852 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
1853
1854
1855 icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
1856 }
1857}
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868static int tcp_mtu_probe(struct sock *sk)
1869{
1870 struct tcp_sock *tp = tcp_sk(sk);
1871 struct inet_connection_sock *icsk = inet_csk(sk);
1872 struct sk_buff *skb, *nskb, *next;
1873 struct net *net = sock_net(sk);
1874 int len;
1875 int probe_size;
1876 int size_needed;
1877 int copy;
1878 int mss_now;
1879 int interval;
1880
1881
1882
1883
1884
1885 if (!icsk->icsk_mtup.enabled ||
1886 icsk->icsk_mtup.probe_size ||
1887 inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
1888 tp->snd_cwnd < 11 ||
1889 tp->rx_opt.num_sacks || tp->rx_opt.dsack)
1890 return -1;
1891
1892
1893
1894
1895
1896 mss_now = tcp_current_mss(sk);
1897 probe_size = tcp_mtu_to_mss(sk, (icsk->icsk_mtup.search_high +
1898 icsk->icsk_mtup.search_low) >> 1);
1899 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
1900 interval = icsk->icsk_mtup.search_high - icsk->icsk_mtup.search_low;
1901
1902
1903
1904
1905 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
1906 interval < net->ipv4.sysctl_tcp_probe_threshold) {
1907
1908
1909
1910 tcp_mtu_check_reprobe(sk);
1911 return -1;
1912 }
1913
1914
1915 if (tp->write_seq - tp->snd_nxt < size_needed)
1916 return -1;
1917
1918 if (tp->snd_wnd < size_needed)
1919 return -1;
1920 if (after(tp->snd_nxt + size_needed, tcp_wnd_end(tp)))
1921 return 0;
1922
1923
1924 if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
1925 if (!tcp_packets_in_flight(tp))
1926 return -1;
1927 else
1928 return 0;
1929 }
1930
1931
1932 nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
1933 if (!nskb)
1934 return -1;
1935 sk->sk_wmem_queued += nskb->truesize;
1936 sk_mem_charge(sk, nskb->truesize);
1937
1938 skb = tcp_send_head(sk);
1939
1940 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
1941 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
1942 TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
1943 TCP_SKB_CB(nskb)->sacked = 0;
1944 nskb->csum = 0;
1945 nskb->ip_summed = skb->ip_summed;
1946
1947 tcp_insert_write_queue_before(nskb, skb, sk);
1948
1949 len = 0;
1950 tcp_for_write_queue_from_safe(skb, next, sk) {
1951 copy = min_t(int, skb->len, probe_size - len);
1952 if (nskb->ip_summed)
1953 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
1954 else
1955 nskb->csum = skb_copy_and_csum_bits(skb, 0,
1956 skb_put(nskb, copy),
1957 copy, nskb->csum);
1958
1959 if (skb->len <= copy) {
1960
1961
1962 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1963 tcp_unlink_write_queue(skb, sk);
1964 sk_wmem_free_skb(sk, skb);
1965 } else {
1966 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
1967 ~(TCPHDR_FIN|TCPHDR_PSH);
1968 if (!skb_shinfo(skb)->nr_frags) {
1969 skb_pull(skb, copy);
1970 if (skb->ip_summed != CHECKSUM_PARTIAL)
1971 skb->csum = csum_partial(skb->data,
1972 skb->len, 0);
1973 } else {
1974 __pskb_trim_head(skb, copy);
1975 tcp_set_skb_tso_segs(skb, mss_now);
1976 }
1977 TCP_SKB_CB(skb)->seq += copy;
1978 }
1979
1980 len += copy;
1981
1982 if (len >= probe_size)
1983 break;
1984 }
1985 tcp_init_tso_segs(nskb, nskb->len);
1986
1987
1988
1989
1990 if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
1991
1992
1993 tp->snd_cwnd--;
1994 tcp_event_new_data_sent(sk, nskb);
1995
1996 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
1997 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
1998 tp->mtu_probe.probe_seq_end = TCP_SKB_CB(nskb)->end_seq;
1999
2000 return 1;
2001 }
2002
2003 return -1;
2004}
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2021 int push_one, gfp_t gfp)
2022{
2023 struct tcp_sock *tp = tcp_sk(sk);
2024 struct sk_buff *skb;
2025 unsigned int tso_segs, sent_pkts;
2026 int cwnd_quota;
2027 int result;
2028 bool is_cwnd_limited = false;
2029 u32 max_segs;
2030
2031 sent_pkts = 0;
2032
2033 if (!push_one) {
2034
2035 result = tcp_mtu_probe(sk);
2036 if (!result) {
2037 return false;
2038 } else if (result > 0) {
2039 sent_pkts = 1;
2040 }
2041 }
2042
2043 max_segs = tcp_tso_autosize(sk, mss_now);
2044 while ((skb = tcp_send_head(sk))) {
2045 unsigned int limit;
2046
2047 tso_segs = tcp_init_tso_segs(skb, mss_now);
2048 BUG_ON(!tso_segs);
2049
2050 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
2051
2052 skb_mstamp_get(&skb->skb_mstamp);
2053 goto repair;
2054 }
2055
2056 cwnd_quota = tcp_cwnd_test(tp, skb);
2057 if (!cwnd_quota) {
2058 if (push_one == 2)
2059
2060 cwnd_quota = 1;
2061 else
2062 break;
2063 }
2064
2065 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
2066 break;
2067
2068 if (tso_segs == 1) {
2069 if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
2070 (tcp_skb_is_last(sk, skb) ?
2071 nonagle : TCP_NAGLE_PUSH))))
2072 break;
2073 } else {
2074 if (!push_one &&
2075 tcp_tso_should_defer(sk, skb, &is_cwnd_limited,
2076 max_segs))
2077 break;
2078 }
2079
2080 limit = mss_now;
2081 if (tso_segs > 1 && !tcp_urg_mode(tp))
2082 limit = tcp_mss_split_point(sk, skb, mss_now,
2083 min_t(unsigned int,
2084 cwnd_quota,
2085 max_segs),
2086 nonagle);
2087
2088 if (skb->len > limit &&
2089 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
2090 break;
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102 limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
2103 limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
2104
2105 if (atomic_read(&sk->sk_wmem_alloc) > limit) {
2106 set_bit(TSQ_THROTTLED, &tp->tsq_flags);
2107
2108
2109
2110
2111 smp_mb__after_atomic();
2112 if (atomic_read(&sk->sk_wmem_alloc) > limit)
2113 break;
2114 }
2115
2116 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
2117 break;
2118
2119repair:
2120
2121
2122
2123 tcp_event_new_data_sent(sk, skb);
2124
2125 tcp_minshall_update(tp, mss_now, skb);
2126 sent_pkts += tcp_skb_pcount(skb);
2127
2128 if (push_one)
2129 break;
2130 }
2131
2132 if (likely(sent_pkts)) {
2133 if (tcp_in_cwnd_reduction(sk))
2134 tp->prr_out += sent_pkts;
2135
2136
2137 if (push_one != 2)
2138 tcp_schedule_loss_probe(sk);
2139 is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
2140 tcp_cwnd_validate(sk, is_cwnd_limited);
2141 return false;
2142 }
2143 return !tp->packets_out && tcp_send_head(sk);
2144}
2145
2146bool tcp_schedule_loss_probe(struct sock *sk)
2147{
2148 struct inet_connection_sock *icsk = inet_csk(sk);
2149 struct tcp_sock *tp = tcp_sk(sk);
2150 u32 timeout, tlp_time_stamp, rto_time_stamp;
2151 u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
2152
2153 if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS))
2154 return false;
2155
2156 if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
2157 tcp_rearm_rto(sk);
2158 return false;
2159 }
2160
2161
2162
2163 if (tp->fastopen_rsk)
2164 return false;
2165
2166
2167 if (icsk->icsk_pending != ICSK_TIME_RETRANS)
2168 return false;
2169
2170
2171
2172
2173 if (sysctl_tcp_early_retrans < 3 || !tp->packets_out ||
2174 !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
2175 return false;
2176
2177 if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
2178 tcp_send_head(sk))
2179 return false;
2180
2181
2182
2183
2184
2185 timeout = rtt << 1 ? : TCP_TIMEOUT_INIT;
2186 if (tp->packets_out == 1)
2187 timeout = max_t(u32, timeout,
2188 (rtt + (rtt >> 1) + TCP_DELACK_MAX));
2189 timeout = max_t(u32, timeout, msecs_to_jiffies(10));
2190
2191
2192 tlp_time_stamp = tcp_time_stamp + timeout;
2193 rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout;
2194 if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) {
2195 s32 delta = rto_time_stamp - tcp_time_stamp;
2196 if (delta > 0)
2197 timeout = delta;
2198 }
2199
2200 inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
2201 TCP_RTO_MAX);
2202 return true;
2203}
2204
2205
2206
2207
2208
2209
2210static bool skb_still_in_host_queue(const struct sock *sk,
2211 const struct sk_buff *skb)
2212{
2213 if (unlikely(skb_fclone_busy(sk, skb))) {
2214 NET_INC_STATS_BH(sock_net(sk),
2215 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
2216 return true;
2217 }
2218 return false;
2219}
2220
2221
2222
2223
2224void tcp_send_loss_probe(struct sock *sk)
2225{
2226 struct tcp_sock *tp = tcp_sk(sk);
2227 struct sk_buff *skb;
2228 int pcount;
2229 int mss = tcp_current_mss(sk);
2230
2231 skb = tcp_send_head(sk);
2232 if (skb) {
2233 if (tcp_snd_wnd_test(tp, skb, mss)) {
2234 pcount = tp->packets_out;
2235 tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
2236 if (tp->packets_out > pcount)
2237 goto probe_sent;
2238 goto rearm_timer;
2239 }
2240 skb = tcp_write_queue_prev(sk, skb);
2241 } else {
2242 skb = tcp_write_queue_tail(sk);
2243 }
2244
2245
2246 if (tp->tlp_high_seq)
2247 goto rearm_timer;
2248
2249
2250 if (WARN_ON(!skb))
2251 goto rearm_timer;
2252
2253 if (skb_still_in_host_queue(sk, skb))
2254 goto rearm_timer;
2255
2256 pcount = tcp_skb_pcount(skb);
2257 if (WARN_ON(!pcount))
2258 goto rearm_timer;
2259
2260 if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
2261 if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss,
2262 GFP_ATOMIC)))
2263 goto rearm_timer;
2264 skb = tcp_write_queue_next(sk, skb);
2265 }
2266
2267 if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
2268 goto rearm_timer;
2269
2270 if (__tcp_retransmit_skb(sk, skb))
2271 goto rearm_timer;
2272
2273
2274 tp->tlp_high_seq = tp->snd_nxt;
2275
2276probe_sent:
2277 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSPROBES);
2278
2279 inet_csk(sk)->icsk_pending = 0;
2280rearm_timer:
2281 tcp_rearm_rto(sk);
2282}
2283
2284
2285
2286
2287
2288void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
2289 int nonagle)
2290{
2291
2292
2293
2294
2295 if (unlikely(sk->sk_state == TCP_CLOSE))
2296 return;
2297
2298 if (tcp_write_xmit(sk, cur_mss, nonagle, 0,
2299 sk_gfp_atomic(sk, GFP_ATOMIC)))
2300 tcp_check_probe_timer(sk);
2301}
2302
2303
2304
2305
2306void tcp_push_one(struct sock *sk, unsigned int mss_now)
2307{
2308 struct sk_buff *skb = tcp_send_head(sk);
2309
2310 BUG_ON(!skb || skb->len < mss_now);
2311
2312 tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation);
2313}
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367u32 __tcp_select_window(struct sock *sk)
2368{
2369 struct inet_connection_sock *icsk = inet_csk(sk);
2370 struct tcp_sock *tp = tcp_sk(sk);
2371
2372
2373
2374
2375
2376
2377 int mss = icsk->icsk_ack.rcv_mss;
2378 int free_space = tcp_space(sk);
2379 int allowed_space = tcp_full_space(sk);
2380 int full_space = min_t(int, tp->window_clamp, allowed_space);
2381 int window;
2382
2383 if (mss > full_space)
2384 mss = full_space;
2385
2386 if (free_space < (full_space >> 1)) {
2387 icsk->icsk_ack.quick = 0;
2388
2389 if (tcp_under_memory_pressure(sk))
2390 tp->rcv_ssthresh = min(tp->rcv_ssthresh,
2391 4U * tp->advmss);
2392
2393
2394
2395
2396 free_space = round_down(free_space, 1 << tp->rx_opt.rcv_wscale);
2397
2398
2399
2400
2401
2402
2403
2404
2405 if (free_space < (allowed_space >> 4) || free_space < mss)
2406 return 0;
2407 }
2408
2409 if (free_space > tp->rcv_ssthresh)
2410 free_space = tp->rcv_ssthresh;
2411
2412
2413
2414
2415 window = tp->rcv_wnd;
2416 if (tp->rx_opt.rcv_wscale) {
2417 window = free_space;
2418
2419
2420
2421
2422
2423 if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window)
2424 window = (((window >> tp->rx_opt.rcv_wscale) + 1)
2425 << tp->rx_opt.rcv_wscale);
2426 } else {
2427
2428
2429
2430
2431
2432
2433
2434
2435 if (window <= free_space - mss || window > free_space)
2436 window = (free_space / mss) * mss;
2437 else if (mss == full_space &&
2438 free_space > window + (full_space >> 1))
2439 window = free_space;
2440 }
2441
2442 return window;
2443}
2444
2445
2446static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2447{
2448 struct tcp_sock *tp = tcp_sk(sk);
2449 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
2450 int skb_size, next_skb_size;
2451
2452 skb_size = skb->len;
2453 next_skb_size = next_skb->len;
2454
2455 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
2456
2457 tcp_highest_sack_combine(sk, next_skb, skb);
2458
2459 tcp_unlink_write_queue(next_skb, sk);
2460
2461 skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size),
2462 next_skb_size);
2463
2464 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
2465 skb->ip_summed = CHECKSUM_PARTIAL;
2466
2467 if (skb->ip_summed != CHECKSUM_PARTIAL)
2468 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
2469
2470
2471 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
2472
2473
2474 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags;
2475
2476
2477
2478
2479 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
2480
2481
2482 tcp_clear_retrans_hints_partial(tp);
2483 if (next_skb == tp->retransmit_skb_hint)
2484 tp->retransmit_skb_hint = skb;
2485
2486 tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb));
2487
2488 sk_wmem_free_skb(sk, next_skb);
2489}
2490
2491
2492static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
2493{
2494 if (tcp_skb_pcount(skb) > 1)
2495 return false;
2496
2497 if (skb_shinfo(skb)->nr_frags != 0)
2498 return false;
2499 if (skb_cloned(skb))
2500 return false;
2501 if (skb == tcp_send_head(sk))
2502 return false;
2503
2504 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2505 return false;
2506
2507 return true;
2508}
2509
2510
2511
2512
2513static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2514 int space)
2515{
2516 struct tcp_sock *tp = tcp_sk(sk);
2517 struct sk_buff *skb = to, *tmp;
2518 bool first = true;
2519
2520 if (!sysctl_tcp_retrans_collapse)
2521 return;
2522 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2523 return;
2524
2525 tcp_for_write_queue_from_safe(skb, tmp, sk) {
2526 if (!tcp_can_collapse(sk, skb))
2527 break;
2528
2529 space -= skb->len;
2530
2531 if (first) {
2532 first = false;
2533 continue;
2534 }
2535
2536 if (space < 0)
2537 break;
2538
2539
2540
2541 if (skb->len > skb_availroom(to))
2542 break;
2543
2544 if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
2545 break;
2546
2547 tcp_collapse_retrans(sk, to);
2548 }
2549}
2550
2551
2552
2553
2554
2555int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2556{
2557 struct tcp_sock *tp = tcp_sk(sk);
2558 struct inet_connection_sock *icsk = inet_csk(sk);
2559 unsigned int cur_mss;
2560 int err;
2561
2562
2563 if (icsk->icsk_mtup.probe_size) {
2564 icsk->icsk_mtup.probe_size = 0;
2565 }
2566
2567
2568
2569
2570 if (atomic_read(&sk->sk_wmem_alloc) >
2571 min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
2572 return -EAGAIN;
2573
2574 if (skb_still_in_host_queue(sk, skb))
2575 return -EBUSY;
2576
2577 if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
2578 if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
2579 BUG();
2580 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
2581 return -ENOMEM;
2582 }
2583
2584 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
2585 return -EHOSTUNREACH;
2586
2587 cur_mss = tcp_current_mss(sk);
2588
2589
2590
2591
2592
2593
2594 if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) &&
2595 TCP_SKB_CB(skb)->seq != tp->snd_una)
2596 return -EAGAIN;
2597
2598 if (skb->len > cur_mss) {
2599 if (tcp_fragment(sk, skb, cur_mss, cur_mss, GFP_ATOMIC))
2600 return -ENOMEM;
2601 } else {
2602 int oldpcount = tcp_skb_pcount(skb);
2603
2604 if (unlikely(oldpcount > 1)) {
2605 if (skb_unclone(skb, GFP_ATOMIC))
2606 return -ENOMEM;
2607 tcp_init_tso_segs(skb, cur_mss);
2608 tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
2609 }
2610 }
2611
2612
2613 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN_ECN) == TCPHDR_SYN_ECN)
2614 tcp_ecn_clear_syn(sk, skb);
2615
2616 tcp_retrans_try_collapse(sk, skb, cur_mss);
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626 if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) ||
2627 skb_headroom(skb) >= 0xFFFF)) {
2628 struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
2629 GFP_ATOMIC);
2630 err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
2631 -ENOBUFS;
2632 } else {
2633 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2634 }
2635
2636 if (likely(!err)) {
2637 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
2638
2639 TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
2640 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2641 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
2642 tp->total_retrans++;
2643 }
2644 return err;
2645}
2646
2647int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2648{
2649 struct tcp_sock *tp = tcp_sk(sk);
2650 int err = __tcp_retransmit_skb(sk, skb);
2651
2652 if (err == 0) {
2653#if FASTRETRANS_DEBUG > 0
2654 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2655 net_dbg_ratelimited("retrans_out leaked\n");
2656 }
2657#endif
2658 TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
2659 tp->retrans_out += tcp_skb_pcount(skb);
2660
2661
2662 if (!tp->retrans_stamp)
2663 tp->retrans_stamp = tcp_skb_timestamp(skb);
2664
2665 } else if (err != -EBUSY) {
2666 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
2667 }
2668
2669 if (tp->undo_retrans < 0)
2670 tp->undo_retrans = 0;
2671 tp->undo_retrans += tcp_skb_pcount(skb);
2672 return err;
2673}
2674
2675
2676
2677
2678static bool tcp_can_forward_retransmit(struct sock *sk)
2679{
2680 const struct inet_connection_sock *icsk = inet_csk(sk);
2681 const struct tcp_sock *tp = tcp_sk(sk);
2682
2683
2684 if (icsk->icsk_ca_state != TCP_CA_Recovery)
2685 return false;
2686
2687
2688 if (tcp_is_reno(tp))
2689 return false;
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699 if (tcp_may_send_now(sk))
2700 return false;
2701
2702 return true;
2703}
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713void tcp_xmit_retransmit_queue(struct sock *sk)
2714{
2715 const struct inet_connection_sock *icsk = inet_csk(sk);
2716 struct tcp_sock *tp = tcp_sk(sk);
2717 struct sk_buff *skb;
2718 struct sk_buff *hole = NULL;
2719 u32 last_lost;
2720 int mib_idx;
2721 int fwd_rexmitting = 0;
2722
2723 if (!tp->packets_out)
2724 return;
2725
2726 if (!tp->lost_out)
2727 tp->retransmit_high = tp->snd_una;
2728
2729 if (tp->retransmit_skb_hint) {
2730 skb = tp->retransmit_skb_hint;
2731 last_lost = TCP_SKB_CB(skb)->end_seq;
2732 if (after(last_lost, tp->retransmit_high))
2733 last_lost = tp->retransmit_high;
2734 } else {
2735 skb = tcp_write_queue_head(sk);
2736 last_lost = tp->snd_una;
2737 }
2738
2739 tcp_for_write_queue_from(skb, sk) {
2740 __u8 sacked = TCP_SKB_CB(skb)->sacked;
2741
2742 if (skb == tcp_send_head(sk))
2743 break;
2744
2745 if (!hole)
2746 tp->retransmit_skb_hint = skb;
2747
2748
2749
2750
2751
2752
2753
2754
2755 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
2756 return;
2757
2758 if (fwd_rexmitting) {
2759begin_fwd:
2760 if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
2761 break;
2762 mib_idx = LINUX_MIB_TCPFORWARDRETRANS;
2763
2764 } else if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high)) {
2765 tp->retransmit_high = last_lost;
2766 if (!tcp_can_forward_retransmit(sk))
2767 break;
2768
2769 if (hole) {
2770 skb = hole;
2771 hole = NULL;
2772 }
2773 fwd_rexmitting = 1;
2774 goto begin_fwd;
2775
2776 } else if (!(sacked & TCPCB_LOST)) {
2777 if (!hole && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
2778 hole = skb;
2779 continue;
2780
2781 } else {
2782 last_lost = TCP_SKB_CB(skb)->end_seq;
2783 if (icsk->icsk_ca_state != TCP_CA_Loss)
2784 mib_idx = LINUX_MIB_TCPFASTRETRANS;
2785 else
2786 mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
2787 }
2788
2789 if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
2790 continue;
2791
2792 if (tcp_retransmit_skb(sk, skb))
2793 return;
2794
2795 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2796
2797 if (tcp_in_cwnd_reduction(sk))
2798 tp->prr_out += tcp_skb_pcount(skb);
2799
2800 if (skb == tcp_write_queue_head(sk))
2801 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2802 inet_csk(sk)->icsk_rto,
2803 TCP_RTO_MAX);
2804 }
2805}
2806
2807
2808
2809
2810
2811
2812
2813
2814void sk_forced_mem_schedule(struct sock *sk, int size)
2815{
2816 int amt, status;
2817
2818 if (size <= sk->sk_forward_alloc)
2819 return;
2820 amt = sk_mem_pages(size);
2821 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
2822 sk_memory_allocated_add(sk, amt, &status);
2823}
2824
2825
2826
2827
2828void tcp_send_fin(struct sock *sk)
2829{
2830 struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk);
2831 struct tcp_sock *tp = tcp_sk(sk);
2832
2833
2834
2835
2836
2837
2838 if (tskb && (tcp_send_head(sk) || tcp_under_memory_pressure(sk))) {
2839coalesce:
2840 TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
2841 TCP_SKB_CB(tskb)->end_seq++;
2842 tp->write_seq++;
2843 if (!tcp_send_head(sk)) {
2844
2845
2846
2847
2848
2849
2850 tp->snd_nxt++;
2851 return;
2852 }
2853 } else {
2854 skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation);
2855 if (unlikely(!skb)) {
2856 if (tskb)
2857 goto coalesce;
2858 return;
2859 }
2860 skb_reserve(skb, MAX_TCP_HEADER);
2861 sk_forced_mem_schedule(sk, skb->truesize);
2862
2863 tcp_init_nondata_skb(skb, tp->write_seq,
2864 TCPHDR_ACK | TCPHDR_FIN);
2865 tcp_queue_skb(sk, skb);
2866 }
2867 __tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF);
2868}
2869
2870
2871
2872
2873
2874
2875void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2876{
2877 struct sk_buff *skb;
2878
2879
2880 skb = alloc_skb(MAX_TCP_HEADER, priority);
2881 if (!skb) {
2882 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2883 return;
2884 }
2885
2886
2887 skb_reserve(skb, MAX_TCP_HEADER);
2888 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
2889 TCPHDR_ACK | TCPHDR_RST);
2890 skb_mstamp_get(&skb->skb_mstamp);
2891
2892 if (tcp_transmit_skb(sk, skb, 0, priority))
2893 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2894
2895 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
2896}
2897
2898
2899
2900
2901
2902
2903
2904int tcp_send_synack(struct sock *sk)
2905{
2906 struct sk_buff *skb;
2907
2908 skb = tcp_write_queue_head(sk);
2909 if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
2910 pr_debug("%s: wrong queue state\n", __func__);
2911 return -EFAULT;
2912 }
2913 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
2914 if (skb_cloned(skb)) {
2915 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
2916 if (!nskb)
2917 return -ENOMEM;
2918 tcp_unlink_write_queue(skb, sk);
2919 __skb_header_release(nskb);
2920 __tcp_add_write_queue_head(sk, nskb);
2921 sk_wmem_free_skb(sk, skb);
2922 sk->sk_wmem_queued += nskb->truesize;
2923 sk_mem_charge(sk, nskb->truesize);
2924 skb = nskb;
2925 }
2926
2927 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK;
2928 tcp_ecn_send_synack(sk, skb);
2929 }
2930 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2931}
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
2943 struct request_sock *req,
2944 struct tcp_fastopen_cookie *foc,
2945 bool attach_req)
2946{
2947 struct inet_request_sock *ireq = inet_rsk(req);
2948 const struct tcp_sock *tp = tcp_sk(sk);
2949 struct tcp_md5sig_key *md5 = NULL;
2950 struct tcp_out_options opts;
2951 struct sk_buff *skb;
2952 int tcp_header_size;
2953 struct tcphdr *th;
2954 u16 user_mss;
2955 int mss;
2956
2957 skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
2958 if (unlikely(!skb)) {
2959 dst_release(dst);
2960 return NULL;
2961 }
2962
2963 skb_reserve(skb, MAX_TCP_HEADER);
2964
2965 if (attach_req) {
2966 skb_set_owner_w(skb, req_to_sk(req));
2967 } else {
2968
2969
2970
2971
2972 skb_set_owner_w(skb, (struct sock *)sk);
2973 }
2974 skb_dst_set(skb, dst);
2975
2976 mss = dst_metric_advmss(dst);
2977 user_mss = READ_ONCE(tp->rx_opt.user_mss);
2978 if (user_mss && user_mss < mss)
2979 mss = user_mss;
2980
2981 memset(&opts, 0, sizeof(opts));
2982#ifdef CONFIG_SYN_COOKIES
2983 if (unlikely(req->cookie_ts))
2984 skb->skb_mstamp.stamp_jiffies = cookie_init_timestamp(req);
2985 else
2986#endif
2987 skb_mstamp_get(&skb->skb_mstamp);
2988
2989#ifdef CONFIG_TCP_MD5SIG
2990 rcu_read_lock();
2991 md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
2992#endif
2993 skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
2994 tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) +
2995 sizeof(*th);
2996
2997 skb_push(skb, tcp_header_size);
2998 skb_reset_transport_header(skb);
2999
3000 th = tcp_hdr(skb);
3001 memset(th, 0, sizeof(struct tcphdr));
3002 th->syn = 1;
3003 th->ack = 1;
3004 tcp_ecn_make_synack(req, th);
3005 th->source = htons(ireq->ir_num);
3006 th->dest = ireq->ir_rmt_port;
3007
3008
3009
3010 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
3011 TCPHDR_SYN | TCPHDR_ACK);
3012
3013 th->seq = htonl(TCP_SKB_CB(skb)->seq);
3014
3015 th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
3016
3017
3018 th->window = htons(min(req->rsk_rcv_wnd, 65535U));
3019 tcp_options_write((__be32 *)(th + 1), NULL, &opts);
3020 th->doff = (tcp_header_size >> 2);
3021 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_OUTSEGS);
3022
3023#ifdef CONFIG_TCP_MD5SIG
3024
3025 if (md5)
3026 tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
3027 md5, req_to_sk(req), skb);
3028 rcu_read_unlock();
3029#endif
3030
3031
3032 skb->tstamp.tv64 = 0;
3033 return skb;
3034}
3035EXPORT_SYMBOL(tcp_make_synack);
3036
3037static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst)
3038{
3039 struct inet_connection_sock *icsk = inet_csk(sk);
3040 const struct tcp_congestion_ops *ca;
3041 u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
3042
3043 if (ca_key == TCP_CA_UNSPEC)
3044 return;
3045
3046 rcu_read_lock();
3047 ca = tcp_ca_find_key(ca_key);
3048 if (likely(ca && try_module_get(ca->owner))) {
3049 module_put(icsk->icsk_ca_ops->owner);
3050 icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst);
3051 icsk->icsk_ca_ops = ca;
3052 }
3053 rcu_read_unlock();
3054}
3055
3056
3057static void tcp_connect_init(struct sock *sk)
3058{
3059 const struct dst_entry *dst = __sk_dst_get(sk);
3060 struct tcp_sock *tp = tcp_sk(sk);
3061 __u8 rcv_wscale;
3062
3063
3064
3065
3066 tp->tcp_header_len = sizeof(struct tcphdr) +
3067 (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
3068
3069#ifdef CONFIG_TCP_MD5SIG
3070 if (tp->af_specific->md5_lookup(sk, sk))
3071 tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
3072#endif
3073
3074
3075 if (tp->rx_opt.user_mss)
3076 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
3077 tp->max_window = 0;
3078 tcp_mtup_init(sk);
3079 tcp_sync_mss(sk, dst_mtu(dst));
3080
3081 tcp_ca_dst_init(sk, dst);
3082
3083 if (!tp->window_clamp)
3084 tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
3085 tp->advmss = dst_metric_advmss(dst);
3086 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss)
3087 tp->advmss = tp->rx_opt.user_mss;
3088
3089 tcp_initialize_rcv_mss(sk);
3090
3091
3092 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
3093 (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
3094 tp->window_clamp = tcp_full_space(sk);
3095
3096 tcp_select_initial_window(tcp_full_space(sk),
3097 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
3098 &tp->rcv_wnd,
3099 &tp->window_clamp,
3100 sysctl_tcp_window_scaling,
3101 &rcv_wscale,
3102 dst_metric(dst, RTAX_INITRWND));
3103
3104 tp->rx_opt.rcv_wscale = rcv_wscale;
3105 tp->rcv_ssthresh = tp->rcv_wnd;
3106
3107 sk->sk_err = 0;
3108 sock_reset_flag(sk, SOCK_DONE);
3109 tp->snd_wnd = 0;
3110 tcp_init_wl(tp, 0);
3111 tp->snd_una = tp->write_seq;
3112 tp->snd_sml = tp->write_seq;
3113 tp->snd_up = tp->write_seq;
3114 tp->snd_nxt = tp->write_seq;
3115
3116 if (likely(!tp->repair))
3117 tp->rcv_nxt = 0;
3118 else
3119 tp->rcv_tstamp = tcp_time_stamp;
3120 tp->rcv_wup = tp->rcv_nxt;
3121 tp->copied_seq = tp->rcv_nxt;
3122
3123 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
3124 inet_csk(sk)->icsk_retransmits = 0;
3125 tcp_clear_retrans(tp);
3126}
3127
3128static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
3129{
3130 struct tcp_sock *tp = tcp_sk(sk);
3131 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
3132
3133 tcb->end_seq += skb->len;
3134 __skb_header_release(skb);
3135 __tcp_add_write_queue_tail(sk, skb);
3136 sk->sk_wmem_queued += skb->truesize;
3137 sk_mem_charge(sk, skb->truesize);
3138 tp->write_seq = tcb->end_seq;
3139 tp->packets_out += tcp_skb_pcount(skb);
3140}
3141
3142
3143
3144
3145
3146
3147
3148
3149static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3150{
3151 struct tcp_sock *tp = tcp_sk(sk);
3152 struct tcp_fastopen_request *fo = tp->fastopen_req;
3153 int syn_loss = 0, space, err = 0;
3154 unsigned long last_syn_loss = 0;
3155 struct sk_buff *syn_data;
3156
3157 tp->rx_opt.mss_clamp = tp->advmss;
3158 tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie,
3159 &syn_loss, &last_syn_loss);
3160
3161 if (syn_loss > 1 &&
3162 time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) {
3163 fo->cookie.len = -1;
3164 goto fallback;
3165 }
3166
3167 if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE)
3168 fo->cookie.len = -1;
3169 else if (fo->cookie.len <= 0)
3170 goto fallback;
3171
3172
3173
3174
3175
3176 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->rx_opt.mss_clamp)
3177 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
3178 space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
3179 MAX_TCP_OPTION_SPACE;
3180
3181 space = min_t(size_t, space, fo->size);
3182
3183
3184 space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));
3185
3186 syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation, false);
3187 if (!syn_data)
3188 goto fallback;
3189 syn_data->ip_summed = CHECKSUM_PARTIAL;
3190 memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
3191 if (space) {
3192 int copied = copy_from_iter(skb_put(syn_data, space), space,
3193 &fo->data->msg_iter);
3194 if (unlikely(!copied)) {
3195 kfree_skb(syn_data);
3196 goto fallback;
3197 }
3198 if (copied != space) {
3199 skb_trim(syn_data, copied);
3200 space = copied;
3201 }
3202 }
3203
3204 if (space == fo->size)
3205 fo->data = NULL;
3206 fo->copied = space;
3207
3208 tcp_connect_queue_skb(sk, syn_data);
3209
3210 err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
3211
3212 syn->skb_mstamp = syn_data->skb_mstamp;
3213
3214
3215
3216
3217
3218
3219 TCP_SKB_CB(syn_data)->seq++;
3220 TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH;
3221 if (!err) {
3222 tp->syn_data = (fo->copied > 0);
3223 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
3224 goto done;
3225 }
3226
3227fallback:
3228
3229 if (fo->cookie.len > 0)
3230 fo->cookie.len = 0;
3231 err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation);
3232 if (err)
3233 tp->syn_fastopen = 0;
3234done:
3235 fo->cookie.len = -1;
3236 return err;
3237}
3238
3239
3240int tcp_connect(struct sock *sk)
3241{
3242 struct tcp_sock *tp = tcp_sk(sk);
3243 struct sk_buff *buff;
3244 int err;
3245
3246 tcp_connect_init(sk);
3247
3248 if (unlikely(tp->repair)) {
3249 tcp_finish_connect(sk, NULL);
3250 return 0;
3251 }
3252
3253 buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, true);
3254 if (unlikely(!buff))
3255 return -ENOBUFS;
3256
3257 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
3258 tp->retrans_stamp = tcp_time_stamp;
3259 tcp_connect_queue_skb(sk, buff);
3260 tcp_ecn_send_syn(sk, buff);
3261
3262
3263 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
3264 tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
3265 if (err == -ECONNREFUSED)
3266 return err;
3267
3268
3269
3270
3271 tp->snd_nxt = tp->write_seq;
3272 tp->pushed_seq = tp->write_seq;
3273 TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
3274
3275
3276 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3277 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
3278 return 0;
3279}
3280EXPORT_SYMBOL(tcp_connect);
3281
3282
3283
3284
3285
3286void tcp_send_delayed_ack(struct sock *sk)
3287{
3288 struct inet_connection_sock *icsk = inet_csk(sk);
3289 int ato = icsk->icsk_ack.ato;
3290 unsigned long timeout;
3291
3292 tcp_ca_event(sk, CA_EVENT_DELAYED_ACK);
3293
3294 if (ato > TCP_DELACK_MIN) {
3295 const struct tcp_sock *tp = tcp_sk(sk);
3296 int max_ato = HZ / 2;
3297
3298 if (icsk->icsk_ack.pingpong ||
3299 (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
3300 max_ato = TCP_DELACK_MAX;
3301
3302
3303
3304
3305
3306
3307
3308 if (tp->srtt_us) {
3309 int rtt = max_t(int, usecs_to_jiffies(tp->srtt_us >> 3),
3310 TCP_DELACK_MIN);
3311
3312 if (rtt < max_ato)
3313 max_ato = rtt;
3314 }
3315
3316 ato = min(ato, max_ato);
3317 }
3318
3319
3320 timeout = jiffies + ato;
3321
3322
3323 if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
3324
3325
3326
3327 if (icsk->icsk_ack.blocked ||
3328 time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
3329 tcp_send_ack(sk);
3330 return;
3331 }
3332
3333 if (!time_before(timeout, icsk->icsk_ack.timeout))
3334 timeout = icsk->icsk_ack.timeout;
3335 }
3336 icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
3337 icsk->icsk_ack.timeout = timeout;
3338 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
3339}
3340
3341
3342void tcp_send_ack(struct sock *sk)
3343{
3344 struct sk_buff *buff;
3345
3346
3347 if (sk->sk_state == TCP_CLOSE)
3348 return;
3349
3350 tcp_ca_event(sk, CA_EVENT_NON_DELAYED_ACK);
3351
3352
3353
3354
3355
3356 buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
3357 if (!buff) {
3358 inet_csk_schedule_ack(sk);
3359 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
3360 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
3361 TCP_DELACK_MAX, TCP_RTO_MAX);
3362 return;
3363 }
3364
3365
3366 skb_reserve(buff, MAX_TCP_HEADER);
3367 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
3368
3369
3370
3371
3372
3373
3374
3375 skb_set_tcp_pure_ack(buff);
3376
3377
3378 skb_mstamp_get(&buff->skb_mstamp);
3379 tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));
3380}
3381EXPORT_SYMBOL_GPL(tcp_send_ack);
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
3395{
3396 struct tcp_sock *tp = tcp_sk(sk);
3397 struct sk_buff *skb;
3398
3399
3400 skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
3401 if (!skb)
3402 return -1;
3403
3404
3405 skb_reserve(skb, MAX_TCP_HEADER);
3406
3407
3408
3409
3410 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
3411 skb_mstamp_get(&skb->skb_mstamp);
3412 NET_INC_STATS(sock_net(sk), mib);
3413 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
3414}
3415
3416void tcp_send_window_probe(struct sock *sk)
3417{
3418 if (sk->sk_state == TCP_ESTABLISHED) {
3419 tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
3420 tcp_xmit_probe_skb(sk, 0, LINUX_MIB_TCPWINPROBE);
3421 }
3422}
3423
3424
3425int tcp_write_wakeup(struct sock *sk, int mib)
3426{
3427 struct tcp_sock *tp = tcp_sk(sk);
3428 struct sk_buff *skb;
3429
3430 if (sk->sk_state == TCP_CLOSE)
3431 return -1;
3432
3433 skb = tcp_send_head(sk);
3434 if (skb && before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
3435 int err;
3436 unsigned int mss = tcp_current_mss(sk);
3437 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
3438
3439 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
3440 tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
3441
3442
3443
3444
3445
3446 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
3447 skb->len > mss) {
3448 seg_size = min(seg_size, mss);
3449 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3450 if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC))
3451 return -1;
3452 } else if (!tcp_skb_pcount(skb))
3453 tcp_set_skb_tso_segs(skb, mss);
3454
3455 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3456 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
3457 if (!err)
3458 tcp_event_new_data_sent(sk, skb);
3459 return err;
3460 } else {
3461 if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
3462 tcp_xmit_probe_skb(sk, 1, mib);
3463 return tcp_xmit_probe_skb(sk, 0, mib);
3464 }
3465}
3466
3467
3468
3469
3470void tcp_send_probe0(struct sock *sk)
3471{
3472 struct inet_connection_sock *icsk = inet_csk(sk);
3473 struct tcp_sock *tp = tcp_sk(sk);
3474 unsigned long probe_max;
3475 int err;
3476
3477 err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
3478
3479 if (tp->packets_out || !tcp_send_head(sk)) {
3480
3481 icsk->icsk_probes_out = 0;
3482 icsk->icsk_backoff = 0;
3483 return;
3484 }
3485
3486 if (err <= 0) {
3487 if (icsk->icsk_backoff < sysctl_tcp_retries2)
3488 icsk->icsk_backoff++;
3489 icsk->icsk_probes_out++;
3490 probe_max = TCP_RTO_MAX;
3491 } else {
3492
3493
3494
3495
3496
3497
3498 if (!icsk->icsk_probes_out)
3499 icsk->icsk_probes_out = 1;
3500 probe_max = TCP_RESOURCE_PROBE_INTERVAL;
3501 }
3502 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3503 tcp_probe0_when(sk, probe_max),
3504 TCP_RTO_MAX);
3505}
3506
3507int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
3508{
3509 const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific;
3510 struct flowi fl;
3511 int res;
3512
3513 tcp_rsk(req)->txhash = net_tx_rndhash();
3514 res = af_ops->send_synack(sk, NULL, &fl, req, NULL, true);
3515 if (!res) {
3516 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
3517 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
3518 }
3519 return res;
3520}
3521EXPORT_SYMBOL(tcp_rtx_synack);
3522