1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37#define pr_fmt(fmt) "TCP: " fmt
38
39#include <net/tcp.h>
40
41#include <linux/compiler.h>
42#include <linux/gfp.h>
43#include <linux/module.h>
44
45
46int sysctl_tcp_retrans_collapse __read_mostly = 1;
47
48
49
50
51int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
52
53
54int sysctl_tcp_limit_output_bytes __read_mostly = 262144;
55
56
57
58
59
60int sysctl_tcp_tso_win_divisor __read_mostly = 3;
61
62
63int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
64
65static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
66 int push_one, gfp_t gfp);
67
68
69static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
70{
71 struct inet_connection_sock *icsk = inet_csk(sk);
72 struct tcp_sock *tp = tcp_sk(sk);
73 unsigned int prior_packets = tp->packets_out;
74
75 tcp_advance_send_head(sk, skb);
76 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
77
78 tp->packets_out += tcp_skb_pcount(skb);
79 if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
80 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
81 tcp_rearm_rto(sk);
82 }
83
84 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
85 tcp_skb_pcount(skb));
86}
87
88
89
90
91
92
93
94static inline __u32 tcp_acceptable_seq(const struct sock *sk)
95{
96 const struct tcp_sock *tp = tcp_sk(sk);
97
98 if (!before(tcp_wnd_end(tp), tp->snd_nxt))
99 return tp->snd_nxt;
100 else
101 return tcp_wnd_end(tp);
102}
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118static __u16 tcp_advertise_mss(struct sock *sk)
119{
120 struct tcp_sock *tp = tcp_sk(sk);
121 const struct dst_entry *dst = __sk_dst_get(sk);
122 int mss = tp->advmss;
123
124 if (dst) {
125 unsigned int metric = dst_metric_advmss(dst);
126
127 if (metric < mss) {
128 mss = metric;
129 tp->advmss = mss;
130 }
131 }
132
133 return (__u16)mss;
134}
135
136
137
138
139void tcp_cwnd_restart(struct sock *sk, s32 delta)
140{
141 struct tcp_sock *tp = tcp_sk(sk);
142 u32 restart_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
143 u32 cwnd = tp->snd_cwnd;
144
145 tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
146
147 tp->snd_ssthresh = tcp_current_ssthresh(sk);
148 restart_cwnd = min(restart_cwnd, cwnd);
149
150 while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
151 cwnd >>= 1;
152 tp->snd_cwnd = max(cwnd, restart_cwnd);
153 tp->snd_cwnd_stamp = tcp_time_stamp;
154 tp->snd_cwnd_used = 0;
155}
156
157
158static void tcp_event_data_sent(struct tcp_sock *tp,
159 struct sock *sk)
160{
161 struct inet_connection_sock *icsk = inet_csk(sk);
162 const u32 now = tcp_time_stamp;
163
164 if (tcp_packets_in_flight(tp) == 0)
165 tcp_ca_event(sk, CA_EVENT_TX_START);
166
167 tp->lsndtime = now;
168
169
170
171
172 if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
173 icsk->icsk_ack.pingpong = 1;
174}
175
176
177static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
178{
179 tcp_dec_quickack_mode(sk, pkts);
180 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
181}
182
183
184u32 tcp_default_init_rwnd(u32 mss)
185{
186
187
188
189
190
191 u32 init_rwnd = TCP_INIT_CWND * 2;
192
193 if (mss > 1460)
194 init_rwnd = max((1460 * init_rwnd) / mss, 2U);
195 return init_rwnd;
196}
197
198
199
200
201
202
203
204
205void tcp_select_initial_window(int __space, __u32 mss,
206 __u32 *rcv_wnd, __u32 *window_clamp,
207 int wscale_ok, __u8 *rcv_wscale,
208 __u32 init_rcv_wnd)
209{
210 unsigned int space = (__space < 0 ? 0 : __space);
211
212
213 if (*window_clamp == 0)
214 (*window_clamp) = (65535 << 14);
215 space = min(*window_clamp, space);
216
217
218 if (space > mss)
219 space = (space / mss) * mss;
220
221
222
223
224
225
226
227
228
229 if (sysctl_tcp_workaround_signed_windows)
230 (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
231 else
232 (*rcv_wnd) = space;
233
234 (*rcv_wscale) = 0;
235 if (wscale_ok) {
236
237
238
239 space = max_t(u32, space, sysctl_tcp_rmem[2]);
240 space = max_t(u32, space, sysctl_rmem_max);
241 space = min_t(u32, space, *window_clamp);
242 while (space > 65535 && (*rcv_wscale) < 14) {
243 space >>= 1;
244 (*rcv_wscale)++;
245 }
246 }
247
248 if (mss > (1 << *rcv_wscale)) {
249 if (!init_rcv_wnd)
250 init_rcv_wnd = tcp_default_init_rwnd(mss);
251 *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
252 }
253
254
255 (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
256}
257EXPORT_SYMBOL(tcp_select_initial_window);
258
259
260
261
262
263
264static u16 tcp_select_window(struct sock *sk)
265{
266 struct tcp_sock *tp = tcp_sk(sk);
267 u32 old_win = tp->rcv_wnd;
268 u32 cur_win = tcp_receive_window(tp);
269 u32 new_win = __tcp_select_window(sk);
270
271
272 if (new_win < cur_win) {
273
274
275
276
277
278
279
280 if (new_win == 0)
281 NET_INC_STATS(sock_net(sk),
282 LINUX_MIB_TCPWANTZEROWINDOWADV);
283 new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
284 }
285 tp->rcv_wnd = new_win;
286 tp->rcv_wup = tp->rcv_nxt;
287
288
289
290
291 if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
292 new_win = min(new_win, MAX_TCP_WINDOW);
293 else
294 new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
295
296
297 new_win >>= tp->rx_opt.rcv_wscale;
298
299
300 if (new_win == 0) {
301 tp->pred_flags = 0;
302 if (old_win)
303 NET_INC_STATS(sock_net(sk),
304 LINUX_MIB_TCPTOZEROWINDOWADV);
305 } else if (old_win == 0) {
306 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV);
307 }
308
309 return new_win;
310}
311
312
313static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
314{
315 const struct tcp_sock *tp = tcp_sk(sk);
316
317 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
318 if (!(tp->ecn_flags & TCP_ECN_OK))
319 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
320 else if (tcp_ca_needs_ecn(sk))
321 INET_ECN_xmit(sk);
322}
323
324
325static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
326{
327 struct tcp_sock *tp = tcp_sk(sk);
328 bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
329 tcp_ca_needs_ecn(sk);
330
331 if (!use_ecn) {
332 const struct dst_entry *dst = __sk_dst_get(sk);
333
334 if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
335 use_ecn = true;
336 }
337
338 tp->ecn_flags = 0;
339
340 if (use_ecn) {
341 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
342 tp->ecn_flags = TCP_ECN_OK;
343 if (tcp_ca_needs_ecn(sk))
344 INET_ECN_xmit(sk);
345 }
346}
347
348static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
349{
350 if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)
351
352
353
354 TCP_SKB_CB(skb)->tcp_flags &= ~(TCPHDR_ECE | TCPHDR_CWR);
355}
356
357static void
358tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)
359{
360 if (inet_rsk(req)->ecn_ok)
361 th->ece = 1;
362}
363
364
365
366
367static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
368 struct tcphdr *th, int tcp_header_len)
369{
370 struct tcp_sock *tp = tcp_sk(sk);
371
372 if (tp->ecn_flags & TCP_ECN_OK) {
373
374 if (skb->len != tcp_header_len &&
375 !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
376 INET_ECN_xmit(sk);
377 if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) {
378 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
379 th->cwr = 1;
380 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
381 }
382 } else if (!tcp_ca_needs_ecn(sk)) {
383
384 INET_ECN_dontxmit(sk);
385 }
386 if (tp->ecn_flags & TCP_ECN_DEMAND_CWR)
387 th->ece = 1;
388 }
389}
390
391
392
393
394static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
395{
396 skb->ip_summed = CHECKSUM_PARTIAL;
397 skb->csum = 0;
398
399 TCP_SKB_CB(skb)->tcp_flags = flags;
400 TCP_SKB_CB(skb)->sacked = 0;
401
402 tcp_skb_pcount_set(skb, 1);
403
404 TCP_SKB_CB(skb)->seq = seq;
405 if (flags & (TCPHDR_SYN | TCPHDR_FIN))
406 seq++;
407 TCP_SKB_CB(skb)->end_seq = seq;
408}
409
410static inline bool tcp_urg_mode(const struct tcp_sock *tp)
411{
412 return tp->snd_una != tp->snd_up;
413}
414
415#define OPTION_SACK_ADVERTISE (1 << 0)
416#define OPTION_TS (1 << 1)
417#define OPTION_MD5 (1 << 2)
418#define OPTION_WSCALE (1 << 3)
419#define OPTION_FAST_OPEN_COOKIE (1 << 8)
420
421struct tcp_out_options {
422 u16 options;
423 u16 mss;
424 u8 ws;
425 u8 num_sack_blocks;
426 u8 hash_size;
427 __u8 *hash_location;
428 __u32 tsval, tsecr;
429 struct tcp_fastopen_cookie *fastopen_cookie;
430};
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
446 struct tcp_out_options *opts)
447{
448 u16 options = opts->options;
449
450 if (unlikely(OPTION_MD5 & options)) {
451 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
452 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
453
454 opts->hash_location = (__u8 *)ptr;
455 ptr += 4;
456 }
457
458 if (unlikely(opts->mss)) {
459 *ptr++ = htonl((TCPOPT_MSS << 24) |
460 (TCPOLEN_MSS << 16) |
461 opts->mss);
462 }
463
464 if (likely(OPTION_TS & options)) {
465 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
466 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
467 (TCPOLEN_SACK_PERM << 16) |
468 (TCPOPT_TIMESTAMP << 8) |
469 TCPOLEN_TIMESTAMP);
470 options &= ~OPTION_SACK_ADVERTISE;
471 } else {
472 *ptr++ = htonl((TCPOPT_NOP << 24) |
473 (TCPOPT_NOP << 16) |
474 (TCPOPT_TIMESTAMP << 8) |
475 TCPOLEN_TIMESTAMP);
476 }
477 *ptr++ = htonl(opts->tsval);
478 *ptr++ = htonl(opts->tsecr);
479 }
480
481 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
482 *ptr++ = htonl((TCPOPT_NOP << 24) |
483 (TCPOPT_NOP << 16) |
484 (TCPOPT_SACK_PERM << 8) |
485 TCPOLEN_SACK_PERM);
486 }
487
488 if (unlikely(OPTION_WSCALE & options)) {
489 *ptr++ = htonl((TCPOPT_NOP << 24) |
490 (TCPOPT_WINDOW << 16) |
491 (TCPOLEN_WINDOW << 8) |
492 opts->ws);
493 }
494
495 if (unlikely(opts->num_sack_blocks)) {
496 struct tcp_sack_block *sp = tp->rx_opt.dsack ?
497 tp->duplicate_sack : tp->selective_acks;
498 int this_sack;
499
500 *ptr++ = htonl((TCPOPT_NOP << 24) |
501 (TCPOPT_NOP << 16) |
502 (TCPOPT_SACK << 8) |
503 (TCPOLEN_SACK_BASE + (opts->num_sack_blocks *
504 TCPOLEN_SACK_PERBLOCK)));
505
506 for (this_sack = 0; this_sack < opts->num_sack_blocks;
507 ++this_sack) {
508 *ptr++ = htonl(sp[this_sack].start_seq);
509 *ptr++ = htonl(sp[this_sack].end_seq);
510 }
511
512 tp->rx_opt.dsack = 0;
513 }
514
515 if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) {
516 struct tcp_fastopen_cookie *foc = opts->fastopen_cookie;
517 u8 *p = (u8 *)ptr;
518 u32 len;
519
520 if (foc->exp) {
521 len = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
522 *ptr = htonl((TCPOPT_EXP << 24) | (len << 16) |
523 TCPOPT_FASTOPEN_MAGIC);
524 p += TCPOLEN_EXP_FASTOPEN_BASE;
525 } else {
526 len = TCPOLEN_FASTOPEN_BASE + foc->len;
527 *p++ = TCPOPT_FASTOPEN;
528 *p++ = len;
529 }
530
531 memcpy(p, foc->val, foc->len);
532 if ((len & 3) == 2) {
533 p[foc->len] = TCPOPT_NOP;
534 p[foc->len + 1] = TCPOPT_NOP;
535 }
536 ptr += (len + 3) >> 2;
537 }
538}
539
540
541
542
543static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
544 struct tcp_out_options *opts,
545 struct tcp_md5sig_key **md5)
546{
547 struct tcp_sock *tp = tcp_sk(sk);
548 unsigned int remaining = MAX_TCP_OPTION_SPACE;
549 struct tcp_fastopen_request *fastopen = tp->fastopen_req;
550
551#ifdef CONFIG_TCP_MD5SIG
552 *md5 = tp->af_specific->md5_lookup(sk, sk);
553 if (*md5) {
554 opts->options |= OPTION_MD5;
555 remaining -= TCPOLEN_MD5SIG_ALIGNED;
556 }
557#else
558 *md5 = NULL;
559#endif
560
561
562
563
564
565
566
567
568
569
570 opts->mss = tcp_advertise_mss(sk);
571 remaining -= TCPOLEN_MSS_ALIGNED;
572
573 if (likely(sysctl_tcp_timestamps && !*md5)) {
574 opts->options |= OPTION_TS;
575 opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
576 opts->tsecr = tp->rx_opt.ts_recent;
577 remaining -= TCPOLEN_TSTAMP_ALIGNED;
578 }
579 if (likely(sysctl_tcp_window_scaling)) {
580 opts->ws = tp->rx_opt.rcv_wscale;
581 opts->options |= OPTION_WSCALE;
582 remaining -= TCPOLEN_WSCALE_ALIGNED;
583 }
584 if (likely(sysctl_tcp_sack)) {
585 opts->options |= OPTION_SACK_ADVERTISE;
586 if (unlikely(!(OPTION_TS & opts->options)))
587 remaining -= TCPOLEN_SACKPERM_ALIGNED;
588 }
589
590 if (fastopen && fastopen->cookie.len >= 0) {
591 u32 need = fastopen->cookie.len;
592
593 need += fastopen->cookie.exp ? TCPOLEN_EXP_FASTOPEN_BASE :
594 TCPOLEN_FASTOPEN_BASE;
595 need = (need + 3) & ~3U;
596 if (remaining >= need) {
597 opts->options |= OPTION_FAST_OPEN_COOKIE;
598 opts->fastopen_cookie = &fastopen->cookie;
599 remaining -= need;
600 tp->syn_fastopen = 1;
601 tp->syn_fastopen_exp = fastopen->cookie.exp ? 1 : 0;
602 }
603 }
604
605 return MAX_TCP_OPTION_SPACE - remaining;
606}
607
608
609static unsigned int tcp_synack_options(struct request_sock *req,
610 unsigned int mss, struct sk_buff *skb,
611 struct tcp_out_options *opts,
612 const struct tcp_md5sig_key *md5,
613 struct tcp_fastopen_cookie *foc)
614{
615 struct inet_request_sock *ireq = inet_rsk(req);
616 unsigned int remaining = MAX_TCP_OPTION_SPACE;
617
618#ifdef CONFIG_TCP_MD5SIG
619 if (md5) {
620 opts->options |= OPTION_MD5;
621 remaining -= TCPOLEN_MD5SIG_ALIGNED;
622
623
624
625
626
627
628 ireq->tstamp_ok &= !ireq->sack_ok;
629 }
630#endif
631
632
633 opts->mss = mss;
634 remaining -= TCPOLEN_MSS_ALIGNED;
635
636 if (likely(ireq->wscale_ok)) {
637 opts->ws = ireq->rcv_wscale;
638 opts->options |= OPTION_WSCALE;
639 remaining -= TCPOLEN_WSCALE_ALIGNED;
640 }
641 if (likely(ireq->tstamp_ok)) {
642 opts->options |= OPTION_TS;
643 opts->tsval = tcp_skb_timestamp(skb);
644 opts->tsecr = req->ts_recent;
645 remaining -= TCPOLEN_TSTAMP_ALIGNED;
646 }
647 if (likely(ireq->sack_ok)) {
648 opts->options |= OPTION_SACK_ADVERTISE;
649 if (unlikely(!ireq->tstamp_ok))
650 remaining -= TCPOLEN_SACKPERM_ALIGNED;
651 }
652 if (foc != NULL && foc->len >= 0) {
653 u32 need = foc->len;
654
655 need += foc->exp ? TCPOLEN_EXP_FASTOPEN_BASE :
656 TCPOLEN_FASTOPEN_BASE;
657 need = (need + 3) & ~3U;
658 if (remaining >= need) {
659 opts->options |= OPTION_FAST_OPEN_COOKIE;
660 opts->fastopen_cookie = foc;
661 remaining -= need;
662 }
663 }
664
665 return MAX_TCP_OPTION_SPACE - remaining;
666}
667
668
669
670
671static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb,
672 struct tcp_out_options *opts,
673 struct tcp_md5sig_key **md5)
674{
675 struct tcp_sock *tp = tcp_sk(sk);
676 unsigned int size = 0;
677 unsigned int eff_sacks;
678
679 opts->options = 0;
680
681#ifdef CONFIG_TCP_MD5SIG
682 *md5 = tp->af_specific->md5_lookup(sk, sk);
683 if (unlikely(*md5)) {
684 opts->options |= OPTION_MD5;
685 size += TCPOLEN_MD5SIG_ALIGNED;
686 }
687#else
688 *md5 = NULL;
689#endif
690
691 if (likely(tp->rx_opt.tstamp_ok)) {
692 opts->options |= OPTION_TS;
693 opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0;
694 opts->tsecr = tp->rx_opt.ts_recent;
695 size += TCPOLEN_TSTAMP_ALIGNED;
696 }
697
698 eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
699 if (unlikely(eff_sacks)) {
700 const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
701 opts->num_sack_blocks =
702 min_t(unsigned int, eff_sacks,
703 (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
704 TCPOLEN_SACK_PERBLOCK);
705 size += TCPOLEN_SACK_BASE_ALIGNED +
706 opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
707 }
708
709 return size;
710}
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727struct tsq_tasklet {
728 struct tasklet_struct tasklet;
729 struct list_head head;
730};
731static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);
732
733static void tcp_tsq_handler(struct sock *sk)
734{
735 if ((1 << sk->sk_state) &
736 (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
737 TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
738 tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle,
739 0, GFP_ATOMIC);
740}
741
742
743
744
745
746
747static void tcp_tasklet_func(unsigned long data)
748{
749 struct tsq_tasklet *tsq = (struct tsq_tasklet *)data;
750 LIST_HEAD(list);
751 unsigned long flags;
752 struct list_head *q, *n;
753 struct tcp_sock *tp;
754 struct sock *sk;
755
756 local_irq_save(flags);
757 list_splice_init(&tsq->head, &list);
758 local_irq_restore(flags);
759
760 list_for_each_safe(q, n, &list) {
761 tp = list_entry(q, struct tcp_sock, tsq_node);
762 list_del(&tp->tsq_node);
763
764 sk = (struct sock *)tp;
765 bh_lock_sock(sk);
766
767 if (!sock_owned_by_user(sk)) {
768 tcp_tsq_handler(sk);
769 } else {
770
771 set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags);
772 }
773 bh_unlock_sock(sk);
774
775 clear_bit(TSQ_QUEUED, &tp->tsq_flags);
776 sk_free(sk);
777 }
778}
779
780#define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \
781 (1UL << TCP_WRITE_TIMER_DEFERRED) | \
782 (1UL << TCP_DELACK_TIMER_DEFERRED) | \
783 (1UL << TCP_MTU_REDUCED_DEFERRED))
784
785
786
787
788
789
790
791void tcp_release_cb(struct sock *sk)
792{
793 struct tcp_sock *tp = tcp_sk(sk);
794 unsigned long flags, nflags;
795
796
797 do {
798 flags = tp->tsq_flags;
799 if (!(flags & TCP_DEFERRED_ALL))
800 return;
801 nflags = flags & ~TCP_DEFERRED_ALL;
802 } while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags);
803
804 if (flags & (1UL << TCP_TSQ_DEFERRED))
805 tcp_tsq_handler(sk);
806
807
808
809
810
811
812
813
814
815
816 sock_release_ownership(sk);
817
818 if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
819 tcp_write_timer_handler(sk);
820 __sock_put(sk);
821 }
822 if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) {
823 tcp_delack_timer_handler(sk);
824 __sock_put(sk);
825 }
826 if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
827 inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
828 __sock_put(sk);
829 }
830}
831EXPORT_SYMBOL(tcp_release_cb);
832
833void __init tcp_tasklet_init(void)
834{
835 int i;
836
837 for_each_possible_cpu(i) {
838 struct tsq_tasklet *tsq = &per_cpu(tsq_tasklet, i);
839
840 INIT_LIST_HEAD(&tsq->head);
841 tasklet_init(&tsq->tasklet,
842 tcp_tasklet_func,
843 (unsigned long)tsq);
844 }
845}
846
847
848
849
850
851
852void tcp_wfree(struct sk_buff *skb)
853{
854 struct sock *sk = skb->sk;
855 struct tcp_sock *tp = tcp_sk(sk);
856 int wmem;
857
858
859
860
861 wmem = atomic_sub_return(skb->truesize - 1, &sk->sk_wmem_alloc);
862
863
864
865
866
867
868
869
870 if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
871 goto out;
872
873 if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) &&
874 !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) {
875 unsigned long flags;
876 struct tsq_tasklet *tsq;
877
878
879 local_irq_save(flags);
880 tsq = this_cpu_ptr(&tsq_tasklet);
881 list_add(&tp->tsq_node, &tsq->head);
882 tasklet_schedule(&tsq->tasklet);
883 local_irq_restore(flags);
884 return;
885 }
886out:
887 sk_free(sk);
888}
889
890
891
892
893
894
895
896
897
898
899
900
901static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
902 gfp_t gfp_mask)
903{
904 const struct inet_connection_sock *icsk = inet_csk(sk);
905 struct inet_sock *inet;
906 struct tcp_sock *tp;
907 struct tcp_skb_cb *tcb;
908 struct tcp_out_options opts;
909 unsigned int tcp_options_size, tcp_header_size;
910 struct tcp_md5sig_key *md5;
911 struct tcphdr *th;
912 int err;
913
914 BUG_ON(!skb || !tcp_skb_pcount(skb));
915 tp = tcp_sk(sk);
916
917 if (clone_it) {
918 skb_mstamp_get(&skb->skb_mstamp);
919 TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
920 - tp->snd_una;
921
922 if (unlikely(skb_cloned(skb)))
923 skb = pskb_copy(skb, gfp_mask);
924 else
925 skb = skb_clone(skb, gfp_mask);
926 if (unlikely(!skb))
927 return -ENOBUFS;
928 }
929
930 inet = inet_sk(sk);
931 tcb = TCP_SKB_CB(skb);
932 memset(&opts, 0, sizeof(opts));
933
934 if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
935 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
936 else
937 tcp_options_size = tcp_established_options(sk, skb, &opts,
938 &md5);
939 tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
940
941
942
943
944
945
946
947
948 skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1);
949
950 skb_push(skb, tcp_header_size);
951 skb_reset_transport_header(skb);
952
953 skb_orphan(skb);
954 skb->sk = sk;
955 skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree;
956 skb_set_hash_from_sk(skb, sk);
957 atomic_add(skb->truesize, &sk->sk_wmem_alloc);
958
959
960 th = (struct tcphdr *)skb->data;
961 th->source = inet->inet_sport;
962 th->dest = inet->inet_dport;
963 th->seq = htonl(tcb->seq);
964 th->ack_seq = htonl(tp->rcv_nxt);
965 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
966 tcb->tcp_flags);
967
968 th->check = 0;
969 th->urg_ptr = 0;
970
971
972 if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
973 if (before(tp->snd_up, tcb->seq + 0x10000)) {
974 th->urg_ptr = htons(tp->snd_up - tcb->seq);
975 th->urg = 1;
976 } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
977 th->urg_ptr = htons(0xFFFF);
978 th->urg = 1;
979 }
980 }
981
982 tcp_options_write((__be32 *)(th + 1), tp, &opts);
983 skb_shinfo(skb)->gso_type = sk->sk_gso_type;
984 if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) {
985 th->window = htons(tcp_select_window(sk));
986 tcp_ecn_send(sk, skb, th, tcp_header_size);
987 } else {
988
989
990
991 th->window = htons(min(tp->rcv_wnd, 65535U));
992 }
993#ifdef CONFIG_TCP_MD5SIG
994
995 if (md5) {
996 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
997 tp->af_specific->calc_md5_hash(opts.hash_location,
998 md5, sk, skb);
999 }
1000#endif
1001
1002 icsk->icsk_af_ops->send_check(sk, skb);
1003
1004 if (likely(tcb->tcp_flags & TCPHDR_ACK))
1005 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
1006
1007 if (skb->len != tcp_header_size) {
1008 tcp_event_data_sent(tp, sk);
1009 tp->data_segs_out += tcp_skb_pcount(skb);
1010 }
1011
1012 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
1013 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
1014 tcp_skb_pcount(skb));
1015
1016 tp->segs_out += tcp_skb_pcount(skb);
1017
1018 skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
1019 skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);
1020
1021
1022 skb->tstamp.tv64 = 0;
1023
1024
1025 memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
1026 sizeof(struct inet6_skb_parm)));
1027
1028 err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
1029
1030 if (likely(err <= 0))
1031 return err;
1032
1033 tcp_enter_cwr(sk);
1034
1035 return net_xmit_eval(err);
1036}
1037
1038
1039
1040
1041
1042
1043static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
1044{
1045 struct tcp_sock *tp = tcp_sk(sk);
1046
1047
1048 tp->write_seq = TCP_SKB_CB(skb)->end_seq;
1049 __skb_header_release(skb);
1050 tcp_add_write_queue_tail(sk, skb);
1051 sk->sk_wmem_queued += skb->truesize;
1052 sk_mem_charge(sk, skb->truesize);
1053}
1054
1055
1056static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
1057{
1058 if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) {
1059
1060
1061
1062 tcp_skb_pcount_set(skb, 1);
1063 TCP_SKB_CB(skb)->tcp_gso_size = 0;
1064 } else {
1065 tcp_skb_pcount_set(skb, DIV_ROUND_UP(skb->len, mss_now));
1066 TCP_SKB_CB(skb)->tcp_gso_size = mss_now;
1067 }
1068}
1069
1070
1071
1072
1073static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb,
1074 int decr)
1075{
1076 struct tcp_sock *tp = tcp_sk(sk);
1077
1078 if (!tp->sacked_out || tcp_is_reno(tp))
1079 return;
1080
1081 if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
1082 tp->fackets_out -= decr;
1083}
1084
1085
1086
1087
1088static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
1089{
1090 struct tcp_sock *tp = tcp_sk(sk);
1091
1092 tp->packets_out -= decr;
1093
1094 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1095 tp->sacked_out -= decr;
1096 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
1097 tp->retrans_out -= decr;
1098 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
1099 tp->lost_out -= decr;
1100
1101
1102 if (tcp_is_reno(tp) && decr > 0)
1103 tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
1104
1105 tcp_adjust_fackets_out(sk, skb, decr);
1106
1107 if (tp->lost_skb_hint &&
1108 before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
1109 (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)))
1110 tp->lost_cnt_hint -= decr;
1111
1112 tcp_verify_left_out(tp);
1113}
1114
1115static bool tcp_has_tx_tstamp(const struct sk_buff *skb)
1116{
1117 return TCP_SKB_CB(skb)->txstamp_ack ||
1118 (skb_shinfo(skb)->tx_flags & SKBTX_ANY_TSTAMP);
1119}
1120
1121static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2)
1122{
1123 struct skb_shared_info *shinfo = skb_shinfo(skb);
1124
1125 if (unlikely(tcp_has_tx_tstamp(skb)) &&
1126 !before(shinfo->tskey, TCP_SKB_CB(skb2)->seq)) {
1127 struct skb_shared_info *shinfo2 = skb_shinfo(skb2);
1128 u8 tsflags = shinfo->tx_flags & SKBTX_ANY_TSTAMP;
1129
1130 shinfo->tx_flags &= ~tsflags;
1131 shinfo2->tx_flags |= tsflags;
1132 swap(shinfo->tskey, shinfo2->tskey);
1133 TCP_SKB_CB(skb2)->txstamp_ack = TCP_SKB_CB(skb)->txstamp_ack;
1134 TCP_SKB_CB(skb)->txstamp_ack = 0;
1135 }
1136}
1137
1138static void tcp_skb_fragment_eor(struct sk_buff *skb, struct sk_buff *skb2)
1139{
1140 TCP_SKB_CB(skb2)->eor = TCP_SKB_CB(skb)->eor;
1141 TCP_SKB_CB(skb)->eor = 0;
1142}
1143
1144
1145
1146
1147
1148
1149int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1150 unsigned int mss_now, gfp_t gfp)
1151{
1152 struct tcp_sock *tp = tcp_sk(sk);
1153 struct sk_buff *buff;
1154 int nsize, old_factor;
1155 int nlen;
1156 u8 flags;
1157
1158 if (WARN_ON(len > skb->len))
1159 return -EINVAL;
1160
1161 nsize = skb_headlen(skb) - len;
1162 if (nsize < 0)
1163 nsize = 0;
1164
1165 if (skb_unclone(skb, gfp))
1166 return -ENOMEM;
1167
1168
1169 buff = sk_stream_alloc_skb(sk, nsize, gfp, true);
1170 if (!buff)
1171 return -ENOMEM;
1172
1173 sk->sk_wmem_queued += buff->truesize;
1174 sk_mem_charge(sk, buff->truesize);
1175 nlen = skb->len - len - nsize;
1176 buff->truesize += nlen;
1177 skb->truesize -= nlen;
1178
1179
1180 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1181 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1182 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1183
1184
1185 flags = TCP_SKB_CB(skb)->tcp_flags;
1186 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1187 TCP_SKB_CB(buff)->tcp_flags = flags;
1188 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
1189 tcp_skb_fragment_eor(skb, buff);
1190
1191 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
1192
1193 buff->csum = csum_partial_copy_nocheck(skb->data + len,
1194 skb_put(buff, nsize),
1195 nsize, 0);
1196
1197 skb_trim(skb, len);
1198
1199 skb->csum = csum_block_sub(skb->csum, buff->csum, len);
1200 } else {
1201 skb->ip_summed = CHECKSUM_PARTIAL;
1202 skb_split(skb, buff, len);
1203 }
1204
1205 buff->ip_summed = skb->ip_summed;
1206
1207 buff->tstamp = skb->tstamp;
1208 tcp_fragment_tstamp(skb, buff);
1209
1210 old_factor = tcp_skb_pcount(skb);
1211
1212
1213 tcp_set_skb_tso_segs(skb, mss_now);
1214 tcp_set_skb_tso_segs(buff, mss_now);
1215
1216
1217
1218
1219 if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) {
1220 int diff = old_factor - tcp_skb_pcount(skb) -
1221 tcp_skb_pcount(buff);
1222
1223 if (diff)
1224 tcp_adjust_pcount(sk, skb, diff);
1225 }
1226
1227
1228 __skb_header_release(buff);
1229 tcp_insert_write_queue_after(skb, buff, sk);
1230
1231 return 0;
1232}
1233
1234
1235
1236
1237
1238static void __pskb_trim_head(struct sk_buff *skb, int len)
1239{
1240 struct skb_shared_info *shinfo;
1241 int i, k, eat;
1242
1243 eat = min_t(int, len, skb_headlen(skb));
1244 if (eat) {
1245 __skb_pull(skb, eat);
1246 len -= eat;
1247 if (!len)
1248 return;
1249 }
1250 eat = len;
1251 k = 0;
1252 shinfo = skb_shinfo(skb);
1253 for (i = 0; i < shinfo->nr_frags; i++) {
1254 int size = skb_frag_size(&shinfo->frags[i]);
1255
1256 if (size <= eat) {
1257 skb_frag_unref(skb, i);
1258 eat -= size;
1259 } else {
1260 shinfo->frags[k] = shinfo->frags[i];
1261 if (eat) {
1262 shinfo->frags[k].page_offset += eat;
1263 skb_frag_size_sub(&shinfo->frags[k], eat);
1264 eat = 0;
1265 }
1266 k++;
1267 }
1268 }
1269 shinfo->nr_frags = k;
1270
1271 skb_reset_tail_pointer(skb);
1272 skb->data_len -= len;
1273 skb->len = skb->data_len;
1274}
1275
1276
1277int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
1278{
1279 if (skb_unclone(skb, GFP_ATOMIC))
1280 return -ENOMEM;
1281
1282 __pskb_trim_head(skb, len);
1283
1284 TCP_SKB_CB(skb)->seq += len;
1285 skb->ip_summed = CHECKSUM_PARTIAL;
1286
1287 skb->truesize -= len;
1288 sk->sk_wmem_queued -= len;
1289 sk_mem_uncharge(sk, len);
1290 sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
1291
1292
1293 if (tcp_skb_pcount(skb) > 1)
1294 tcp_set_skb_tso_segs(skb, tcp_skb_mss(skb));
1295
1296 return 0;
1297}
1298
1299
1300static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
1301{
1302 const struct tcp_sock *tp = tcp_sk(sk);
1303 const struct inet_connection_sock *icsk = inet_csk(sk);
1304 int mss_now;
1305
1306
1307
1308
1309 mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr);
1310
1311
1312 if (icsk->icsk_af_ops->net_frag_header_len) {
1313 const struct dst_entry *dst = __sk_dst_get(sk);
1314
1315 if (dst && dst_allfrag(dst))
1316 mss_now -= icsk->icsk_af_ops->net_frag_header_len;
1317 }
1318
1319
1320 if (mss_now > tp->rx_opt.mss_clamp)
1321 mss_now = tp->rx_opt.mss_clamp;
1322
1323
1324 mss_now -= icsk->icsk_ext_hdr_len;
1325
1326
1327 if (mss_now < 48)
1328 mss_now = 48;
1329 return mss_now;
1330}
1331
1332
1333int tcp_mtu_to_mss(struct sock *sk, int pmtu)
1334{
1335
1336 return __tcp_mtu_to_mss(sk, pmtu) -
1337 (tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr));
1338}
1339
1340
1341int tcp_mss_to_mtu(struct sock *sk, int mss)
1342{
1343 const struct tcp_sock *tp = tcp_sk(sk);
1344 const struct inet_connection_sock *icsk = inet_csk(sk);
1345 int mtu;
1346
1347 mtu = mss +
1348 tp->tcp_header_len +
1349 icsk->icsk_ext_hdr_len +
1350 icsk->icsk_af_ops->net_header_len;
1351
1352
1353 if (icsk->icsk_af_ops->net_frag_header_len) {
1354 const struct dst_entry *dst = __sk_dst_get(sk);
1355
1356 if (dst && dst_allfrag(dst))
1357 mtu += icsk->icsk_af_ops->net_frag_header_len;
1358 }
1359 return mtu;
1360}
1361
1362
1363void tcp_mtup_init(struct sock *sk)
1364{
1365 struct tcp_sock *tp = tcp_sk(sk);
1366 struct inet_connection_sock *icsk = inet_csk(sk);
1367 struct net *net = sock_net(sk);
1368
1369 icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
1370 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
1371 icsk->icsk_af_ops->net_header_len;
1372 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
1373 icsk->icsk_mtup.probe_size = 0;
1374 if (icsk->icsk_mtup.enabled)
1375 icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
1376}
1377EXPORT_SYMBOL(tcp_mtup_init);
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
1402{
1403 struct tcp_sock *tp = tcp_sk(sk);
1404 struct inet_connection_sock *icsk = inet_csk(sk);
1405 int mss_now;
1406
1407 if (icsk->icsk_mtup.search_high > pmtu)
1408 icsk->icsk_mtup.search_high = pmtu;
1409
1410 mss_now = tcp_mtu_to_mss(sk, pmtu);
1411 mss_now = tcp_bound_to_half_wnd(tp, mss_now);
1412
1413
1414 icsk->icsk_pmtu_cookie = pmtu;
1415 if (icsk->icsk_mtup.enabled)
1416 mss_now = min(mss_now, tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low));
1417 tp->mss_cache = mss_now;
1418
1419 return mss_now;
1420}
1421EXPORT_SYMBOL(tcp_sync_mss);
1422
1423
1424
1425
1426unsigned int tcp_current_mss(struct sock *sk)
1427{
1428 const struct tcp_sock *tp = tcp_sk(sk);
1429 const struct dst_entry *dst = __sk_dst_get(sk);
1430 u32 mss_now;
1431 unsigned int header_len;
1432 struct tcp_out_options opts;
1433 struct tcp_md5sig_key *md5;
1434
1435 mss_now = tp->mss_cache;
1436
1437 if (dst) {
1438 u32 mtu = dst_mtu(dst);
1439 if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
1440 mss_now = tcp_sync_mss(sk, mtu);
1441 }
1442
1443 header_len = tcp_established_options(sk, NULL, &opts, &md5) +
1444 sizeof(struct tcphdr);
1445
1446
1447
1448
1449 if (header_len != tp->tcp_header_len) {
1450 int delta = (int) header_len - tp->tcp_header_len;
1451 mss_now -= delta;
1452 }
1453
1454 return mss_now;
1455}
1456
1457
1458
1459
1460
1461static void tcp_cwnd_application_limited(struct sock *sk)
1462{
1463 struct tcp_sock *tp = tcp_sk(sk);
1464
1465 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
1466 sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
1467
1468 u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
1469 u32 win_used = max(tp->snd_cwnd_used, init_win);
1470 if (win_used < tp->snd_cwnd) {
1471 tp->snd_ssthresh = tcp_current_ssthresh(sk);
1472 tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
1473 }
1474 tp->snd_cwnd_used = 0;
1475 }
1476 tp->snd_cwnd_stamp = tcp_time_stamp;
1477}
1478
1479static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
1480{
1481 struct tcp_sock *tp = tcp_sk(sk);
1482
1483
1484
1485
1486 if (!before(tp->snd_una, tp->max_packets_seq) ||
1487 tp->packets_out > tp->max_packets_out) {
1488 tp->max_packets_out = tp->packets_out;
1489 tp->max_packets_seq = tp->snd_nxt;
1490 tp->is_cwnd_limited = is_cwnd_limited;
1491 }
1492
1493 if (tcp_is_cwnd_limited(sk)) {
1494
1495 tp->snd_cwnd_used = 0;
1496 tp->snd_cwnd_stamp = tcp_time_stamp;
1497 } else {
1498
1499 if (tp->packets_out > tp->snd_cwnd_used)
1500 tp->snd_cwnd_used = tp->packets_out;
1501
1502 if (sysctl_tcp_slow_start_after_idle &&
1503 (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
1504 tcp_cwnd_application_limited(sk);
1505 }
1506}
1507
1508
1509static bool tcp_minshall_check(const struct tcp_sock *tp)
1510{
1511 return after(tp->snd_sml, tp->snd_una) &&
1512 !after(tp->snd_sml, tp->snd_nxt);
1513}
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
1524 const struct sk_buff *skb)
1525{
1526 if (skb->len < tcp_skb_pcount(skb) * mss_now)
1527 tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
1528}
1529
1530
1531
1532
1533
1534
1535
1536
1537static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
1538 int nonagle)
1539{
1540 return partial &&
1541 ((nonagle & TCP_NAGLE_CORK) ||
1542 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1543}
1544
1545
1546
1547
1548static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now)
1549{
1550 u32 bytes, segs;
1551
1552 bytes = min(sk->sk_pacing_rate >> 10,
1553 sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
1554
1555
1556
1557
1558
1559
1560 segs = max_t(u32, bytes / mss_now, sysctl_tcp_min_tso_segs);
1561
1562 return min_t(u32, segs, sk->sk_gso_max_segs);
1563}
1564
1565
1566static unsigned int tcp_mss_split_point(const struct sock *sk,
1567 const struct sk_buff *skb,
1568 unsigned int mss_now,
1569 unsigned int max_segs,
1570 int nonagle)
1571{
1572 const struct tcp_sock *tp = tcp_sk(sk);
1573 u32 partial, needed, window, max_len;
1574
1575 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1576 max_len = mss_now * max_segs;
1577
1578 if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
1579 return max_len;
1580
1581 needed = min(skb->len, window);
1582
1583 if (max_len <= needed)
1584 return max_len;
1585
1586 partial = needed % mss_now;
1587
1588
1589
1590
1591 if (tcp_nagle_check(partial != 0, tp, nonagle))
1592 return needed - partial;
1593
1594 return needed;
1595}
1596
1597
1598
1599
1600static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
1601 const struct sk_buff *skb)
1602{
1603 u32 in_flight, cwnd, halfcwnd;
1604
1605
1606 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
1607 tcp_skb_pcount(skb) == 1)
1608 return 1;
1609
1610 in_flight = tcp_packets_in_flight(tp);
1611 cwnd = tp->snd_cwnd;
1612 if (in_flight >= cwnd)
1613 return 0;
1614
1615
1616
1617
1618 halfcwnd = max(cwnd >> 1, 1U);
1619 return min(halfcwnd, cwnd - in_flight);
1620}
1621
1622
1623
1624
1625
1626static int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now)
1627{
1628 int tso_segs = tcp_skb_pcount(skb);
1629
1630 if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
1631 tcp_set_skb_tso_segs(skb, mss_now);
1632 tso_segs = tcp_skb_pcount(skb);
1633 }
1634 return tso_segs;
1635}
1636
1637
1638
1639
1640
1641static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
1642 unsigned int cur_mss, int nonagle)
1643{
1644
1645
1646
1647
1648
1649
1650 if (nonagle & TCP_NAGLE_PUSH)
1651 return true;
1652
1653
1654 if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
1655 return true;
1656
1657 if (!tcp_nagle_check(skb->len < cur_mss, tp, nonagle))
1658 return true;
1659
1660 return false;
1661}
1662
1663
1664static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
1665 const struct sk_buff *skb,
1666 unsigned int cur_mss)
1667{
1668 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
1669
1670 if (skb->len > cur_mss)
1671 end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
1672
1673 return !after(end_seq, tcp_wnd_end(tp));
1674}
1675
1676
1677
1678
1679
1680static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
1681 unsigned int cur_mss, int nonagle)
1682{
1683 const struct tcp_sock *tp = tcp_sk(sk);
1684 unsigned int cwnd_quota;
1685
1686 tcp_init_tso_segs(skb, cur_mss);
1687
1688 if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
1689 return 0;
1690
1691 cwnd_quota = tcp_cwnd_test(tp, skb);
1692 if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
1693 cwnd_quota = 0;
1694
1695 return cwnd_quota;
1696}
1697
1698
1699bool tcp_may_send_now(struct sock *sk)
1700{
1701 const struct tcp_sock *tp = tcp_sk(sk);
1702 struct sk_buff *skb = tcp_send_head(sk);
1703
1704 return skb &&
1705 tcp_snd_test(sk, skb, tcp_current_mss(sk),
1706 (tcp_skb_is_last(sk, skb) ?
1707 tp->nonagle : TCP_NAGLE_PUSH));
1708}
1709
1710
1711
1712
1713
1714
1715
1716
1717static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1718 unsigned int mss_now, gfp_t gfp)
1719{
1720 struct sk_buff *buff;
1721 int nlen = skb->len - len;
1722 u8 flags;
1723
1724
1725 if (skb->len != skb->data_len)
1726 return tcp_fragment(sk, skb, len, mss_now, gfp);
1727
1728 buff = sk_stream_alloc_skb(sk, 0, gfp, true);
1729 if (unlikely(!buff))
1730 return -ENOMEM;
1731
1732 sk->sk_wmem_queued += buff->truesize;
1733 sk_mem_charge(sk, buff->truesize);
1734 buff->truesize += nlen;
1735 skb->truesize -= nlen;
1736
1737
1738 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1739 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1740 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1741
1742
1743 flags = TCP_SKB_CB(skb)->tcp_flags;
1744 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1745 TCP_SKB_CB(buff)->tcp_flags = flags;
1746
1747
1748 TCP_SKB_CB(buff)->sacked = 0;
1749
1750 tcp_skb_fragment_eor(skb, buff);
1751
1752 buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
1753 skb_split(skb, buff, len);
1754 tcp_fragment_tstamp(skb, buff);
1755
1756
1757 tcp_set_skb_tso_segs(skb, mss_now);
1758 tcp_set_skb_tso_segs(buff, mss_now);
1759
1760
1761 __skb_header_release(buff);
1762 tcp_insert_write_queue_after(skb, buff, sk);
1763
1764 return 0;
1765}
1766
1767
1768
1769
1770
1771
1772static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1773 bool *is_cwnd_limited, u32 max_segs)
1774{
1775 const struct inet_connection_sock *icsk = inet_csk(sk);
1776 u32 age, send_win, cong_win, limit, in_flight;
1777 struct tcp_sock *tp = tcp_sk(sk);
1778 struct skb_mstamp now;
1779 struct sk_buff *head;
1780 int win_divisor;
1781
1782 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1783 goto send_now;
1784
1785 if (icsk->icsk_ca_state >= TCP_CA_Recovery)
1786 goto send_now;
1787
1788
1789
1790
1791 if ((s32)(tcp_time_stamp - tp->lsndtime) > 0)
1792 goto send_now;
1793
1794 in_flight = tcp_packets_in_flight(tp);
1795
1796 BUG_ON(tcp_skb_pcount(skb) <= 1 || (tp->snd_cwnd <= in_flight));
1797
1798 send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1799
1800
1801 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
1802
1803 limit = min(send_win, cong_win);
1804
1805
1806 if (limit >= max_segs * tp->mss_cache)
1807 goto send_now;
1808
1809
1810 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
1811 goto send_now;
1812
1813 win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor);
1814 if (win_divisor) {
1815 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
1816
1817
1818
1819
1820 chunk /= win_divisor;
1821 if (limit >= chunk)
1822 goto send_now;
1823 } else {
1824
1825
1826
1827
1828
1829 if (limit > tcp_max_tso_deferred_mss(tp) * tp->mss_cache)
1830 goto send_now;
1831 }
1832
1833 head = tcp_write_queue_head(sk);
1834 skb_mstamp_get(&now);
1835 age = skb_mstamp_us_delta(&now, &head->skb_mstamp);
1836
1837 if (age < (tp->srtt_us >> 4))
1838 goto send_now;
1839
1840
1841
1842 if (cong_win < send_win && cong_win <= skb->len)
1843 *is_cwnd_limited = true;
1844
1845 return true;
1846
1847send_now:
1848 return false;
1849}
1850
1851static inline void tcp_mtu_check_reprobe(struct sock *sk)
1852{
1853 struct inet_connection_sock *icsk = inet_csk(sk);
1854 struct tcp_sock *tp = tcp_sk(sk);
1855 struct net *net = sock_net(sk);
1856 u32 interval;
1857 s32 delta;
1858
1859 interval = net->ipv4.sysctl_tcp_probe_interval;
1860 delta = tcp_time_stamp - icsk->icsk_mtup.probe_timestamp;
1861 if (unlikely(delta >= interval * HZ)) {
1862 int mss = tcp_current_mss(sk);
1863
1864
1865 icsk->icsk_mtup.probe_size = 0;
1866 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp +
1867 sizeof(struct tcphdr) +
1868 icsk->icsk_af_ops->net_header_len;
1869 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
1870
1871
1872 icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
1873 }
1874}
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885static int tcp_mtu_probe(struct sock *sk)
1886{
1887 struct tcp_sock *tp = tcp_sk(sk);
1888 struct inet_connection_sock *icsk = inet_csk(sk);
1889 struct sk_buff *skb, *nskb, *next;
1890 struct net *net = sock_net(sk);
1891 int len;
1892 int probe_size;
1893 int size_needed;
1894 int copy;
1895 int mss_now;
1896 int interval;
1897
1898
1899
1900
1901
1902 if (!icsk->icsk_mtup.enabled ||
1903 icsk->icsk_mtup.probe_size ||
1904 inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
1905 tp->snd_cwnd < 11 ||
1906 tp->rx_opt.num_sacks || tp->rx_opt.dsack)
1907 return -1;
1908
1909
1910
1911
1912
1913 mss_now = tcp_current_mss(sk);
1914 probe_size = tcp_mtu_to_mss(sk, (icsk->icsk_mtup.search_high +
1915 icsk->icsk_mtup.search_low) >> 1);
1916 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
1917 interval = icsk->icsk_mtup.search_high - icsk->icsk_mtup.search_low;
1918
1919
1920
1921
1922 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
1923 interval < net->ipv4.sysctl_tcp_probe_threshold) {
1924
1925
1926
1927 tcp_mtu_check_reprobe(sk);
1928 return -1;
1929 }
1930
1931
1932 if (tp->write_seq - tp->snd_nxt < size_needed)
1933 return -1;
1934
1935 if (tp->snd_wnd < size_needed)
1936 return -1;
1937 if (after(tp->snd_nxt + size_needed, tcp_wnd_end(tp)))
1938 return 0;
1939
1940
1941 if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
1942 if (!tcp_packets_in_flight(tp))
1943 return -1;
1944 else
1945 return 0;
1946 }
1947
1948
1949 nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
1950 if (!nskb)
1951 return -1;
1952 sk->sk_wmem_queued += nskb->truesize;
1953 sk_mem_charge(sk, nskb->truesize);
1954
1955 skb = tcp_send_head(sk);
1956
1957 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
1958 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
1959 TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
1960 TCP_SKB_CB(nskb)->sacked = 0;
1961 nskb->csum = 0;
1962 nskb->ip_summed = skb->ip_summed;
1963
1964 tcp_insert_write_queue_before(nskb, skb, sk);
1965
1966 len = 0;
1967 tcp_for_write_queue_from_safe(skb, next, sk) {
1968 copy = min_t(int, skb->len, probe_size - len);
1969 if (nskb->ip_summed) {
1970 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
1971 } else {
1972 __wsum csum = skb_copy_and_csum_bits(skb, 0,
1973 skb_put(nskb, copy),
1974 copy, 0);
1975 nskb->csum = csum_block_add(nskb->csum, csum, len);
1976 }
1977
1978 if (skb->len <= copy) {
1979
1980
1981 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1982 tcp_unlink_write_queue(skb, sk);
1983 sk_wmem_free_skb(sk, skb);
1984 } else {
1985 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
1986 ~(TCPHDR_FIN|TCPHDR_PSH);
1987 if (!skb_shinfo(skb)->nr_frags) {
1988 skb_pull(skb, copy);
1989 if (skb->ip_summed != CHECKSUM_PARTIAL)
1990 skb->csum = csum_partial(skb->data,
1991 skb->len, 0);
1992 } else {
1993 __pskb_trim_head(skb, copy);
1994 tcp_set_skb_tso_segs(skb, mss_now);
1995 }
1996 TCP_SKB_CB(skb)->seq += copy;
1997 }
1998
1999 len += copy;
2000
2001 if (len >= probe_size)
2002 break;
2003 }
2004 tcp_init_tso_segs(nskb, nskb->len);
2005
2006
2007
2008
2009 if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
2010
2011
2012 tp->snd_cwnd--;
2013 tcp_event_new_data_sent(sk, nskb);
2014
2015 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
2016 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
2017 tp->mtu_probe.probe_seq_end = TCP_SKB_CB(nskb)->end_seq;
2018
2019 return 1;
2020 }
2021
2022 return -1;
2023}
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2040 int push_one, gfp_t gfp)
2041{
2042 struct tcp_sock *tp = tcp_sk(sk);
2043 struct sk_buff *skb;
2044 unsigned int tso_segs, sent_pkts;
2045 int cwnd_quota;
2046 int result;
2047 bool is_cwnd_limited = false;
2048 u32 max_segs;
2049
2050 sent_pkts = 0;
2051
2052 if (!push_one) {
2053
2054 result = tcp_mtu_probe(sk);
2055 if (!result) {
2056 return false;
2057 } else if (result > 0) {
2058 sent_pkts = 1;
2059 }
2060 }
2061
2062 max_segs = tcp_tso_autosize(sk, mss_now);
2063 while ((skb = tcp_send_head(sk))) {
2064 unsigned int limit;
2065
2066 tso_segs = tcp_init_tso_segs(skb, mss_now);
2067 BUG_ON(!tso_segs);
2068
2069 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
2070
2071 skb_mstamp_get(&skb->skb_mstamp);
2072 goto repair;
2073 }
2074
2075 cwnd_quota = tcp_cwnd_test(tp, skb);
2076 if (!cwnd_quota) {
2077 if (push_one == 2)
2078
2079 cwnd_quota = 1;
2080 else
2081 break;
2082 }
2083
2084 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
2085 break;
2086
2087 if (tso_segs == 1) {
2088 if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
2089 (tcp_skb_is_last(sk, skb) ?
2090 nonagle : TCP_NAGLE_PUSH))))
2091 break;
2092 } else {
2093 if (!push_one &&
2094 tcp_tso_should_defer(sk, skb, &is_cwnd_limited,
2095 max_segs))
2096 break;
2097 }
2098
2099 limit = mss_now;
2100 if (tso_segs > 1 && !tcp_urg_mode(tp))
2101 limit = tcp_mss_split_point(sk, skb, mss_now,
2102 min_t(unsigned int,
2103 cwnd_quota,
2104 max_segs),
2105 nonagle);
2106
2107 if (skb->len > limit &&
2108 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
2109 break;
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121 limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
2122 limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
2123
2124 if (atomic_read(&sk->sk_wmem_alloc) > limit) {
2125 set_bit(TSQ_THROTTLED, &tp->tsq_flags);
2126
2127
2128
2129
2130 smp_mb__after_atomic();
2131 if (atomic_read(&sk->sk_wmem_alloc) > limit)
2132 break;
2133 }
2134
2135 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
2136 break;
2137
2138repair:
2139
2140
2141
2142 tcp_event_new_data_sent(sk, skb);
2143
2144 tcp_minshall_update(tp, mss_now, skb);
2145 sent_pkts += tcp_skb_pcount(skb);
2146
2147 if (push_one)
2148 break;
2149 }
2150
2151 if (likely(sent_pkts)) {
2152 if (tcp_in_cwnd_reduction(sk))
2153 tp->prr_out += sent_pkts;
2154
2155
2156 if (push_one != 2)
2157 tcp_schedule_loss_probe(sk);
2158 is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
2159 tcp_cwnd_validate(sk, is_cwnd_limited);
2160 return false;
2161 }
2162 return !tp->packets_out && tcp_send_head(sk);
2163}
2164
2165bool tcp_schedule_loss_probe(struct sock *sk)
2166{
2167 struct inet_connection_sock *icsk = inet_csk(sk);
2168 struct tcp_sock *tp = tcp_sk(sk);
2169 u32 timeout, tlp_time_stamp, rto_time_stamp;
2170 u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
2171
2172 if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS))
2173 return false;
2174
2175 if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
2176 tcp_rearm_rto(sk);
2177 return false;
2178 }
2179
2180
2181
2182 if (tp->fastopen_rsk)
2183 return false;
2184
2185
2186 if (icsk->icsk_pending != ICSK_TIME_RETRANS)
2187 return false;
2188
2189
2190
2191
2192 if (sysctl_tcp_early_retrans < 3 || !tp->packets_out ||
2193 !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
2194 return false;
2195
2196 if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
2197 tcp_send_head(sk))
2198 return false;
2199
2200
2201
2202
2203
2204 timeout = rtt << 1 ? : TCP_TIMEOUT_INIT;
2205 if (tp->packets_out == 1)
2206 timeout = max_t(u32, timeout,
2207 (rtt + (rtt >> 1) + TCP_DELACK_MAX));
2208 timeout = max_t(u32, timeout, msecs_to_jiffies(10));
2209
2210
2211 tlp_time_stamp = tcp_time_stamp + timeout;
2212 rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout;
2213 if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) {
2214 s32 delta = rto_time_stamp - tcp_time_stamp;
2215 if (delta > 0)
2216 timeout = delta;
2217 }
2218
2219 inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
2220 TCP_RTO_MAX);
2221 return true;
2222}
2223
2224
2225
2226
2227
2228static bool skb_still_in_host_queue(const struct sock *sk,
2229 const struct sk_buff *skb)
2230{
2231 if (unlikely(skb_fclone_busy(sk, skb))) {
2232 NET_INC_STATS(sock_net(sk),
2233 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
2234 return true;
2235 }
2236 return false;
2237}
2238
2239
2240
2241
2242void tcp_send_loss_probe(struct sock *sk)
2243{
2244 struct tcp_sock *tp = tcp_sk(sk);
2245 struct sk_buff *skb;
2246 int pcount;
2247 int mss = tcp_current_mss(sk);
2248
2249 skb = tcp_send_head(sk);
2250 if (skb) {
2251 if (tcp_snd_wnd_test(tp, skb, mss)) {
2252 pcount = tp->packets_out;
2253 tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
2254 if (tp->packets_out > pcount)
2255 goto probe_sent;
2256 goto rearm_timer;
2257 }
2258 skb = tcp_write_queue_prev(sk, skb);
2259 } else {
2260 skb = tcp_write_queue_tail(sk);
2261 }
2262
2263
2264 if (tp->tlp_high_seq)
2265 goto rearm_timer;
2266
2267
2268 if (WARN_ON(!skb))
2269 goto rearm_timer;
2270
2271 if (skb_still_in_host_queue(sk, skb))
2272 goto rearm_timer;
2273
2274 pcount = tcp_skb_pcount(skb);
2275 if (WARN_ON(!pcount))
2276 goto rearm_timer;
2277
2278 if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
2279 if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss,
2280 GFP_ATOMIC)))
2281 goto rearm_timer;
2282 skb = tcp_write_queue_next(sk, skb);
2283 }
2284
2285 if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
2286 goto rearm_timer;
2287
2288 if (__tcp_retransmit_skb(sk, skb, 1))
2289 goto rearm_timer;
2290
2291
2292 tp->tlp_high_seq = tp->snd_nxt;
2293
2294probe_sent:
2295 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES);
2296
2297 inet_csk(sk)->icsk_pending = 0;
2298rearm_timer:
2299 tcp_rearm_rto(sk);
2300}
2301
2302
2303
2304
2305
2306void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
2307 int nonagle)
2308{
2309
2310
2311
2312
2313 if (unlikely(sk->sk_state == TCP_CLOSE))
2314 return;
2315
2316 if (tcp_write_xmit(sk, cur_mss, nonagle, 0,
2317 sk_gfp_mask(sk, GFP_ATOMIC)))
2318 tcp_check_probe_timer(sk);
2319}
2320
2321
2322
2323
2324void tcp_push_one(struct sock *sk, unsigned int mss_now)
2325{
2326 struct sk_buff *skb = tcp_send_head(sk);
2327
2328 BUG_ON(!skb || skb->len < mss_now);
2329
2330 tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation);
2331}
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385u32 __tcp_select_window(struct sock *sk)
2386{
2387 struct inet_connection_sock *icsk = inet_csk(sk);
2388 struct tcp_sock *tp = tcp_sk(sk);
2389
2390
2391
2392
2393
2394
2395 int mss = icsk->icsk_ack.rcv_mss;
2396 int free_space = tcp_space(sk);
2397 int allowed_space = tcp_full_space(sk);
2398 int full_space = min_t(int, tp->window_clamp, allowed_space);
2399 int window;
2400
2401 if (mss > full_space)
2402 mss = full_space;
2403
2404 if (free_space < (full_space >> 1)) {
2405 icsk->icsk_ack.quick = 0;
2406
2407 if (tcp_under_memory_pressure(sk))
2408 tp->rcv_ssthresh = min(tp->rcv_ssthresh,
2409 4U * tp->advmss);
2410
2411
2412
2413
2414 free_space = round_down(free_space, 1 << tp->rx_opt.rcv_wscale);
2415
2416
2417
2418
2419
2420
2421
2422
2423 if (free_space < (allowed_space >> 4) || free_space < mss)
2424 return 0;
2425 }
2426
2427 if (free_space > tp->rcv_ssthresh)
2428 free_space = tp->rcv_ssthresh;
2429
2430
2431
2432
2433 window = tp->rcv_wnd;
2434 if (tp->rx_opt.rcv_wscale) {
2435 window = free_space;
2436
2437
2438
2439
2440
2441 if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window)
2442 window = (((window >> tp->rx_opt.rcv_wscale) + 1)
2443 << tp->rx_opt.rcv_wscale);
2444 } else {
2445
2446
2447
2448
2449
2450
2451
2452
2453 if (window <= free_space - mss || window > free_space)
2454 window = (free_space / mss) * mss;
2455 else if (mss == full_space &&
2456 free_space > window + (full_space >> 1))
2457 window = free_space;
2458 }
2459
2460 return window;
2461}
2462
2463void tcp_skb_collapse_tstamp(struct sk_buff *skb,
2464 const struct sk_buff *next_skb)
2465{
2466 if (unlikely(tcp_has_tx_tstamp(next_skb))) {
2467 const struct skb_shared_info *next_shinfo =
2468 skb_shinfo(next_skb);
2469 struct skb_shared_info *shinfo = skb_shinfo(skb);
2470
2471 shinfo->tx_flags |= next_shinfo->tx_flags & SKBTX_ANY_TSTAMP;
2472 shinfo->tskey = next_shinfo->tskey;
2473 TCP_SKB_CB(skb)->txstamp_ack |=
2474 TCP_SKB_CB(next_skb)->txstamp_ack;
2475 }
2476}
2477
2478
2479static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2480{
2481 struct tcp_sock *tp = tcp_sk(sk);
2482 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
2483 int skb_size, next_skb_size;
2484
2485 skb_size = skb->len;
2486 next_skb_size = next_skb->len;
2487
2488 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
2489
2490 tcp_highest_sack_combine(sk, next_skb, skb);
2491
2492 tcp_unlink_write_queue(next_skb, sk);
2493
2494 skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size),
2495 next_skb_size);
2496
2497 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
2498 skb->ip_summed = CHECKSUM_PARTIAL;
2499
2500 if (skb->ip_summed != CHECKSUM_PARTIAL)
2501 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
2502
2503
2504 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
2505
2506
2507 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags;
2508
2509
2510
2511
2512 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
2513 TCP_SKB_CB(skb)->eor = TCP_SKB_CB(next_skb)->eor;
2514
2515
2516 tcp_clear_retrans_hints_partial(tp);
2517 if (next_skb == tp->retransmit_skb_hint)
2518 tp->retransmit_skb_hint = skb;
2519
2520 tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb));
2521
2522 tcp_skb_collapse_tstamp(skb, next_skb);
2523
2524 sk_wmem_free_skb(sk, next_skb);
2525}
2526
2527
2528static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
2529{
2530 if (tcp_skb_pcount(skb) > 1)
2531 return false;
2532
2533 if (skb_shinfo(skb)->nr_frags != 0)
2534 return false;
2535 if (skb_cloned(skb))
2536 return false;
2537 if (skb == tcp_send_head(sk))
2538 return false;
2539
2540 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2541 return false;
2542
2543 return true;
2544}
2545
2546
2547
2548
2549static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2550 int space)
2551{
2552 struct tcp_sock *tp = tcp_sk(sk);
2553 struct sk_buff *skb = to, *tmp;
2554 bool first = true;
2555
2556 if (!sysctl_tcp_retrans_collapse)
2557 return;
2558 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2559 return;
2560
2561 tcp_for_write_queue_from_safe(skb, tmp, sk) {
2562 if (!tcp_can_collapse(sk, skb))
2563 break;
2564
2565 if (!tcp_skb_can_collapse_to(to))
2566 break;
2567
2568 space -= skb->len;
2569
2570 if (first) {
2571 first = false;
2572 continue;
2573 }
2574
2575 if (space < 0)
2576 break;
2577
2578
2579
2580 if (skb->len > skb_availroom(to))
2581 break;
2582
2583 if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
2584 break;
2585
2586 tcp_collapse_retrans(sk, to);
2587 }
2588}
2589
2590
2591
2592
2593
2594int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2595{
2596 struct inet_connection_sock *icsk = inet_csk(sk);
2597 struct tcp_sock *tp = tcp_sk(sk);
2598 unsigned int cur_mss;
2599 int diff, len, err;
2600
2601
2602
2603 if (icsk->icsk_mtup.probe_size)
2604 icsk->icsk_mtup.probe_size = 0;
2605
2606
2607
2608
2609 if (atomic_read(&sk->sk_wmem_alloc) >
2610 min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2),
2611 sk->sk_sndbuf))
2612 return -EAGAIN;
2613
2614 if (skb_still_in_host_queue(sk, skb))
2615 return -EBUSY;
2616
2617 if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
2618 if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
2619 BUG();
2620 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
2621 return -ENOMEM;
2622 }
2623
2624 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
2625 return -EHOSTUNREACH;
2626
2627 cur_mss = tcp_current_mss(sk);
2628
2629
2630
2631
2632
2633
2634 if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) &&
2635 TCP_SKB_CB(skb)->seq != tp->snd_una)
2636 return -EAGAIN;
2637
2638 len = cur_mss * segs;
2639 if (skb->len > len) {
2640 if (tcp_fragment(sk, skb, len, cur_mss, GFP_ATOMIC))
2641 return -ENOMEM;
2642 } else {
2643 if (skb_unclone(skb, GFP_ATOMIC))
2644 return -ENOMEM;
2645
2646 diff = tcp_skb_pcount(skb);
2647 tcp_set_skb_tso_segs(skb, cur_mss);
2648 diff -= tcp_skb_pcount(skb);
2649 if (diff)
2650 tcp_adjust_pcount(sk, skb, diff);
2651 if (skb->len < cur_mss)
2652 tcp_retrans_try_collapse(sk, skb, cur_mss);
2653 }
2654
2655
2656 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN_ECN) == TCPHDR_SYN_ECN)
2657 tcp_ecn_clear_syn(sk, skb);
2658
2659
2660
2661
2662
2663 if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) ||
2664 skb_headroom(skb) >= 0xFFFF)) {
2665 struct sk_buff *nskb;
2666
2667 skb_mstamp_get(&skb->skb_mstamp);
2668 nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
2669 err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
2670 -ENOBUFS;
2671 } else {
2672 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2673 }
2674
2675 if (likely(!err)) {
2676 segs = tcp_skb_pcount(skb);
2677
2678 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
2679
2680 TCP_ADD_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS, segs);
2681 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2682 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
2683 tp->total_retrans += segs;
2684 }
2685 return err;
2686}
2687
2688int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2689{
2690 struct tcp_sock *tp = tcp_sk(sk);
2691 int err = __tcp_retransmit_skb(sk, skb, segs);
2692
2693 if (err == 0) {
2694#if FASTRETRANS_DEBUG > 0
2695 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2696 net_dbg_ratelimited("retrans_out leaked\n");
2697 }
2698#endif
2699 TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
2700 tp->retrans_out += tcp_skb_pcount(skb);
2701
2702
2703 if (!tp->retrans_stamp)
2704 tp->retrans_stamp = tcp_skb_timestamp(skb);
2705
2706 } else if (err != -EBUSY) {
2707 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
2708 }
2709
2710 if (tp->undo_retrans < 0)
2711 tp->undo_retrans = 0;
2712 tp->undo_retrans += tcp_skb_pcount(skb);
2713 return err;
2714}
2715
2716
2717
2718
2719static bool tcp_can_forward_retransmit(struct sock *sk)
2720{
2721 const struct inet_connection_sock *icsk = inet_csk(sk);
2722 const struct tcp_sock *tp = tcp_sk(sk);
2723
2724
2725 if (icsk->icsk_ca_state != TCP_CA_Recovery)
2726 return false;
2727
2728
2729 if (tcp_is_reno(tp))
2730 return false;
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740 if (tcp_may_send_now(sk))
2741 return false;
2742
2743 return true;
2744}
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754void tcp_xmit_retransmit_queue(struct sock *sk)
2755{
2756 const struct inet_connection_sock *icsk = inet_csk(sk);
2757 struct tcp_sock *tp = tcp_sk(sk);
2758 struct sk_buff *skb;
2759 struct sk_buff *hole = NULL;
2760 u32 max_segs, last_lost;
2761 int mib_idx;
2762 int fwd_rexmitting = 0;
2763
2764 if (!tp->packets_out)
2765 return;
2766
2767 if (!tp->lost_out)
2768 tp->retransmit_high = tp->snd_una;
2769
2770 if (tp->retransmit_skb_hint) {
2771 skb = tp->retransmit_skb_hint;
2772 last_lost = TCP_SKB_CB(skb)->end_seq;
2773 if (after(last_lost, tp->retransmit_high))
2774 last_lost = tp->retransmit_high;
2775 } else {
2776 skb = tcp_write_queue_head(sk);
2777 last_lost = tp->snd_una;
2778 }
2779
2780 max_segs = tcp_tso_autosize(sk, tcp_current_mss(sk));
2781 tcp_for_write_queue_from(skb, sk) {
2782 __u8 sacked = TCP_SKB_CB(skb)->sacked;
2783 int segs;
2784
2785 if (skb == tcp_send_head(sk))
2786 break;
2787
2788 if (!hole)
2789 tp->retransmit_skb_hint = skb;
2790
2791 segs = tp->snd_cwnd - tcp_packets_in_flight(tp);
2792 if (segs <= 0)
2793 return;
2794
2795
2796
2797 segs = min_t(int, segs, max_segs);
2798
2799 if (fwd_rexmitting) {
2800begin_fwd:
2801 if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
2802 break;
2803 mib_idx = LINUX_MIB_TCPFORWARDRETRANS;
2804
2805 } else if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high)) {
2806 tp->retransmit_high = last_lost;
2807 if (!tcp_can_forward_retransmit(sk))
2808 break;
2809
2810 if (hole) {
2811 skb = hole;
2812 hole = NULL;
2813 }
2814 fwd_rexmitting = 1;
2815 goto begin_fwd;
2816
2817 } else if (!(sacked & TCPCB_LOST)) {
2818 if (!hole && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
2819 hole = skb;
2820 continue;
2821
2822 } else {
2823 last_lost = TCP_SKB_CB(skb)->end_seq;
2824 if (icsk->icsk_ca_state != TCP_CA_Loss)
2825 mib_idx = LINUX_MIB_TCPFASTRETRANS;
2826 else
2827 mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
2828 }
2829
2830 if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
2831 continue;
2832
2833 if (tcp_retransmit_skb(sk, skb, segs))
2834 return;
2835
2836 NET_ADD_STATS(sock_net(sk), mib_idx, tcp_skb_pcount(skb));
2837
2838 if (tcp_in_cwnd_reduction(sk))
2839 tp->prr_out += tcp_skb_pcount(skb);
2840
2841 if (skb == tcp_write_queue_head(sk))
2842 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2843 inet_csk(sk)->icsk_rto,
2844 TCP_RTO_MAX);
2845 }
2846}
2847
2848
2849
2850
2851
2852
2853
2854
2855void sk_forced_mem_schedule(struct sock *sk, int size)
2856{
2857 int amt;
2858
2859 if (size <= sk->sk_forward_alloc)
2860 return;
2861 amt = sk_mem_pages(size);
2862 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
2863 sk_memory_allocated_add(sk, amt);
2864
2865 if (mem_cgroup_sockets_enabled && sk->sk_memcg)
2866 mem_cgroup_charge_skmem(sk->sk_memcg, amt);
2867}
2868
2869
2870
2871
2872void tcp_send_fin(struct sock *sk)
2873{
2874 struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk);
2875 struct tcp_sock *tp = tcp_sk(sk);
2876
2877
2878
2879
2880
2881
2882 if (tskb && (tcp_send_head(sk) || tcp_under_memory_pressure(sk))) {
2883coalesce:
2884 TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
2885 TCP_SKB_CB(tskb)->end_seq++;
2886 tp->write_seq++;
2887 if (!tcp_send_head(sk)) {
2888
2889
2890
2891
2892
2893
2894 tp->snd_nxt++;
2895 return;
2896 }
2897 } else {
2898 skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation);
2899 if (unlikely(!skb)) {
2900 if (tskb)
2901 goto coalesce;
2902 return;
2903 }
2904 skb_reserve(skb, MAX_TCP_HEADER);
2905 sk_forced_mem_schedule(sk, skb->truesize);
2906
2907 tcp_init_nondata_skb(skb, tp->write_seq,
2908 TCPHDR_ACK | TCPHDR_FIN);
2909 tcp_queue_skb(sk, skb);
2910 }
2911 __tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF);
2912}
2913
2914
2915
2916
2917
2918
2919void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2920{
2921 struct sk_buff *skb;
2922
2923
2924 skb = alloc_skb(MAX_TCP_HEADER, priority);
2925 if (!skb) {
2926 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2927 return;
2928 }
2929
2930
2931 skb_reserve(skb, MAX_TCP_HEADER);
2932 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
2933 TCPHDR_ACK | TCPHDR_RST);
2934 skb_mstamp_get(&skb->skb_mstamp);
2935
2936 if (tcp_transmit_skb(sk, skb, 0, priority))
2937 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2938
2939 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
2940}
2941
2942
2943
2944
2945
2946
2947
2948int tcp_send_synack(struct sock *sk)
2949{
2950 struct sk_buff *skb;
2951
2952 skb = tcp_write_queue_head(sk);
2953 if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
2954 pr_debug("%s: wrong queue state\n", __func__);
2955 return -EFAULT;
2956 }
2957 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
2958 if (skb_cloned(skb)) {
2959 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
2960 if (!nskb)
2961 return -ENOMEM;
2962 tcp_unlink_write_queue(skb, sk);
2963 __skb_header_release(nskb);
2964 __tcp_add_write_queue_head(sk, nskb);
2965 sk_wmem_free_skb(sk, skb);
2966 sk->sk_wmem_queued += nskb->truesize;
2967 sk_mem_charge(sk, nskb->truesize);
2968 skb = nskb;
2969 }
2970
2971 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK;
2972 tcp_ecn_send_synack(sk, skb);
2973 }
2974 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2975}
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
2987 struct request_sock *req,
2988 struct tcp_fastopen_cookie *foc,
2989 enum tcp_synack_type synack_type)
2990{
2991 struct inet_request_sock *ireq = inet_rsk(req);
2992 const struct tcp_sock *tp = tcp_sk(sk);
2993 struct tcp_md5sig_key *md5 = NULL;
2994 struct tcp_out_options opts;
2995 struct sk_buff *skb;
2996 int tcp_header_size;
2997 struct tcphdr *th;
2998 u16 user_mss;
2999 int mss;
3000
3001 skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
3002 if (unlikely(!skb)) {
3003 dst_release(dst);
3004 return NULL;
3005 }
3006
3007 skb_reserve(skb, MAX_TCP_HEADER);
3008
3009 switch (synack_type) {
3010 case TCP_SYNACK_NORMAL:
3011 skb_set_owner_w(skb, req_to_sk(req));
3012 break;
3013 case TCP_SYNACK_COOKIE:
3014
3015
3016
3017 break;
3018 case TCP_SYNACK_FASTOPEN:
3019
3020
3021
3022
3023 skb_set_owner_w(skb, (struct sock *)sk);
3024 break;
3025 }
3026 skb_dst_set(skb, dst);
3027
3028 mss = dst_metric_advmss(dst);
3029 user_mss = READ_ONCE(tp->rx_opt.user_mss);
3030 if (user_mss && user_mss < mss)
3031 mss = user_mss;
3032
3033 memset(&opts, 0, sizeof(opts));
3034#ifdef CONFIG_SYN_COOKIES
3035 if (unlikely(req->cookie_ts))
3036 skb->skb_mstamp.stamp_jiffies = cookie_init_timestamp(req);
3037 else
3038#endif
3039 skb_mstamp_get(&skb->skb_mstamp);
3040
3041#ifdef CONFIG_TCP_MD5SIG
3042 rcu_read_lock();
3043 md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
3044#endif
3045 skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
3046 tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) +
3047 sizeof(*th);
3048
3049 skb_push(skb, tcp_header_size);
3050 skb_reset_transport_header(skb);
3051
3052 th = (struct tcphdr *)skb->data;
3053 memset(th, 0, sizeof(struct tcphdr));
3054 th->syn = 1;
3055 th->ack = 1;
3056 tcp_ecn_make_synack(req, th);
3057 th->source = htons(ireq->ir_num);
3058 th->dest = ireq->ir_rmt_port;
3059
3060
3061
3062 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
3063 TCPHDR_SYN | TCPHDR_ACK);
3064
3065 th->seq = htonl(TCP_SKB_CB(skb)->seq);
3066
3067 th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
3068
3069
3070 th->window = htons(min(req->rsk_rcv_wnd, 65535U));
3071 tcp_options_write((__be32 *)(th + 1), NULL, &opts);
3072 th->doff = (tcp_header_size >> 2);
3073 __TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
3074
3075#ifdef CONFIG_TCP_MD5SIG
3076
3077 if (md5)
3078 tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
3079 md5, req_to_sk(req), skb);
3080 rcu_read_unlock();
3081#endif
3082
3083
3084 skb->tstamp.tv64 = 0;
3085 return skb;
3086}
3087EXPORT_SYMBOL(tcp_make_synack);
3088
3089static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst)
3090{
3091 struct inet_connection_sock *icsk = inet_csk(sk);
3092 const struct tcp_congestion_ops *ca;
3093 u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
3094
3095 if (ca_key == TCP_CA_UNSPEC)
3096 return;
3097
3098 rcu_read_lock();
3099 ca = tcp_ca_find_key(ca_key);
3100 if (likely(ca && try_module_get(ca->owner))) {
3101 module_put(icsk->icsk_ca_ops->owner);
3102 icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst);
3103 icsk->icsk_ca_ops = ca;
3104 }
3105 rcu_read_unlock();
3106}
3107
3108
3109static void tcp_connect_init(struct sock *sk)
3110{
3111 const struct dst_entry *dst = __sk_dst_get(sk);
3112 struct tcp_sock *tp = tcp_sk(sk);
3113 __u8 rcv_wscale;
3114
3115
3116
3117
3118 tp->tcp_header_len = sizeof(struct tcphdr) +
3119 (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
3120
3121#ifdef CONFIG_TCP_MD5SIG
3122 if (tp->af_specific->md5_lookup(sk, sk))
3123 tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
3124#endif
3125
3126
3127 if (tp->rx_opt.user_mss)
3128 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
3129 tp->max_window = 0;
3130 tcp_mtup_init(sk);
3131 tcp_sync_mss(sk, dst_mtu(dst));
3132
3133 tcp_ca_dst_init(sk, dst);
3134
3135 if (!tp->window_clamp)
3136 tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
3137 tp->advmss = dst_metric_advmss(dst);
3138 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss)
3139 tp->advmss = tp->rx_opt.user_mss;
3140
3141 tcp_initialize_rcv_mss(sk);
3142
3143
3144 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
3145 (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
3146 tp->window_clamp = tcp_full_space(sk);
3147
3148 tcp_select_initial_window(tcp_full_space(sk),
3149 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
3150 &tp->rcv_wnd,
3151 &tp->window_clamp,
3152 sysctl_tcp_window_scaling,
3153 &rcv_wscale,
3154 dst_metric(dst, RTAX_INITRWND));
3155
3156 tp->rx_opt.rcv_wscale = rcv_wscale;
3157 tp->rcv_ssthresh = tp->rcv_wnd;
3158
3159 sk->sk_err = 0;
3160 sock_reset_flag(sk, SOCK_DONE);
3161 tp->snd_wnd = 0;
3162 tcp_init_wl(tp, 0);
3163 tp->snd_una = tp->write_seq;
3164 tp->snd_sml = tp->write_seq;
3165 tp->snd_up = tp->write_seq;
3166 tp->snd_nxt = tp->write_seq;
3167
3168 if (likely(!tp->repair))
3169 tp->rcv_nxt = 0;
3170 else
3171 tp->rcv_tstamp = tcp_time_stamp;
3172 tp->rcv_wup = tp->rcv_nxt;
3173 tp->copied_seq = tp->rcv_nxt;
3174
3175 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
3176 inet_csk(sk)->icsk_retransmits = 0;
3177 tcp_clear_retrans(tp);
3178}
3179
3180static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
3181{
3182 struct tcp_sock *tp = tcp_sk(sk);
3183 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
3184
3185 tcb->end_seq += skb->len;
3186 __skb_header_release(skb);
3187 __tcp_add_write_queue_tail(sk, skb);
3188 sk->sk_wmem_queued += skb->truesize;
3189 sk_mem_charge(sk, skb->truesize);
3190 tp->write_seq = tcb->end_seq;
3191 tp->packets_out += tcp_skb_pcount(skb);
3192}
3193
3194
3195
3196
3197
3198
3199
3200
3201static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3202{
3203 struct tcp_sock *tp = tcp_sk(sk);
3204 struct tcp_fastopen_request *fo = tp->fastopen_req;
3205 int syn_loss = 0, space, err = 0;
3206 unsigned long last_syn_loss = 0;
3207 struct sk_buff *syn_data;
3208
3209 tp->rx_opt.mss_clamp = tp->advmss;
3210 tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie,
3211 &syn_loss, &last_syn_loss);
3212
3213 if (syn_loss > 1 &&
3214 time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) {
3215 fo->cookie.len = -1;
3216 goto fallback;
3217 }
3218
3219 if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE)
3220 fo->cookie.len = -1;
3221 else if (fo->cookie.len <= 0)
3222 goto fallback;
3223
3224
3225
3226
3227
3228 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->rx_opt.mss_clamp)
3229 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
3230 space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
3231 MAX_TCP_OPTION_SPACE;
3232
3233 space = min_t(size_t, space, fo->size);
3234
3235
3236 space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));
3237
3238 syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation, false);
3239 if (!syn_data)
3240 goto fallback;
3241 syn_data->ip_summed = CHECKSUM_PARTIAL;
3242 memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
3243 if (space) {
3244 int copied = copy_from_iter(skb_put(syn_data, space), space,
3245 &fo->data->msg_iter);
3246 if (unlikely(!copied)) {
3247 kfree_skb(syn_data);
3248 goto fallback;
3249 }
3250 if (copied != space) {
3251 skb_trim(syn_data, copied);
3252 space = copied;
3253 }
3254 }
3255
3256 if (space == fo->size)
3257 fo->data = NULL;
3258 fo->copied = space;
3259
3260 tcp_connect_queue_skb(sk, syn_data);
3261
3262 err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
3263
3264 syn->skb_mstamp = syn_data->skb_mstamp;
3265
3266
3267
3268
3269
3270
3271 TCP_SKB_CB(syn_data)->seq++;
3272 TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH;
3273 if (!err) {
3274 tp->syn_data = (fo->copied > 0);
3275 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
3276 goto done;
3277 }
3278
3279fallback:
3280
3281 if (fo->cookie.len > 0)
3282 fo->cookie.len = 0;
3283 err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation);
3284 if (err)
3285 tp->syn_fastopen = 0;
3286done:
3287 fo->cookie.len = -1;
3288 return err;
3289}
3290
3291
3292int tcp_connect(struct sock *sk)
3293{
3294 struct tcp_sock *tp = tcp_sk(sk);
3295 struct sk_buff *buff;
3296 int err;
3297
3298 tcp_connect_init(sk);
3299
3300 if (unlikely(tp->repair)) {
3301 tcp_finish_connect(sk, NULL);
3302 return 0;
3303 }
3304
3305 buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, true);
3306 if (unlikely(!buff))
3307 return -ENOBUFS;
3308
3309 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
3310 tp->retrans_stamp = tcp_time_stamp;
3311 tcp_connect_queue_skb(sk, buff);
3312 tcp_ecn_send_syn(sk, buff);
3313
3314
3315 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
3316 tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
3317 if (err == -ECONNREFUSED)
3318 return err;
3319
3320
3321
3322
3323 tp->snd_nxt = tp->write_seq;
3324 tp->pushed_seq = tp->write_seq;
3325 TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
3326
3327
3328 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3329 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
3330 return 0;
3331}
3332EXPORT_SYMBOL(tcp_connect);
3333
3334
3335
3336
3337
3338void tcp_send_delayed_ack(struct sock *sk)
3339{
3340 struct inet_connection_sock *icsk = inet_csk(sk);
3341 int ato = icsk->icsk_ack.ato;
3342 unsigned long timeout;
3343
3344 tcp_ca_event(sk, CA_EVENT_DELAYED_ACK);
3345
3346 if (ato > TCP_DELACK_MIN) {
3347 const struct tcp_sock *tp = tcp_sk(sk);
3348 int max_ato = HZ / 2;
3349
3350 if (icsk->icsk_ack.pingpong ||
3351 (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
3352 max_ato = TCP_DELACK_MAX;
3353
3354
3355
3356
3357
3358
3359
3360 if (tp->srtt_us) {
3361 int rtt = max_t(int, usecs_to_jiffies(tp->srtt_us >> 3),
3362 TCP_DELACK_MIN);
3363
3364 if (rtt < max_ato)
3365 max_ato = rtt;
3366 }
3367
3368 ato = min(ato, max_ato);
3369 }
3370
3371
3372 timeout = jiffies + ato;
3373
3374
3375 if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
3376
3377
3378
3379 if (icsk->icsk_ack.blocked ||
3380 time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
3381 tcp_send_ack(sk);
3382 return;
3383 }
3384
3385 if (!time_before(timeout, icsk->icsk_ack.timeout))
3386 timeout = icsk->icsk_ack.timeout;
3387 }
3388 icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
3389 icsk->icsk_ack.timeout = timeout;
3390 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
3391}
3392
3393
3394void tcp_send_ack(struct sock *sk)
3395{
3396 struct sk_buff *buff;
3397
3398
3399 if (sk->sk_state == TCP_CLOSE)
3400 return;
3401
3402 tcp_ca_event(sk, CA_EVENT_NON_DELAYED_ACK);
3403
3404
3405
3406
3407
3408 buff = alloc_skb(MAX_TCP_HEADER,
3409 sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN));
3410 if (unlikely(!buff)) {
3411 inet_csk_schedule_ack(sk);
3412 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
3413 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
3414 TCP_DELACK_MAX, TCP_RTO_MAX);
3415 return;
3416 }
3417
3418
3419 skb_reserve(buff, MAX_TCP_HEADER);
3420 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
3421
3422
3423
3424
3425
3426
3427
3428 skb_set_tcp_pure_ack(buff);
3429
3430
3431 skb_mstamp_get(&buff->skb_mstamp);
3432 tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0);
3433}
3434EXPORT_SYMBOL_GPL(tcp_send_ack);
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
3448{
3449 struct tcp_sock *tp = tcp_sk(sk);
3450 struct sk_buff *skb;
3451
3452
3453 skb = alloc_skb(MAX_TCP_HEADER,
3454 sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN));
3455 if (!skb)
3456 return -1;
3457
3458
3459 skb_reserve(skb, MAX_TCP_HEADER);
3460
3461
3462
3463
3464 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
3465 skb_mstamp_get(&skb->skb_mstamp);
3466 NET_INC_STATS(sock_net(sk), mib);
3467 return tcp_transmit_skb(sk, skb, 0, (__force gfp_t)0);
3468}
3469
3470void tcp_send_window_probe(struct sock *sk)
3471{
3472 if (sk->sk_state == TCP_ESTABLISHED) {
3473 tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
3474 tcp_xmit_probe_skb(sk, 0, LINUX_MIB_TCPWINPROBE);
3475 }
3476}
3477
3478
3479int tcp_write_wakeup(struct sock *sk, int mib)
3480{
3481 struct tcp_sock *tp = tcp_sk(sk);
3482 struct sk_buff *skb;
3483
3484 if (sk->sk_state == TCP_CLOSE)
3485 return -1;
3486
3487 skb = tcp_send_head(sk);
3488 if (skb && before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
3489 int err;
3490 unsigned int mss = tcp_current_mss(sk);
3491 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
3492
3493 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
3494 tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
3495
3496
3497
3498
3499
3500 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
3501 skb->len > mss) {
3502 seg_size = min(seg_size, mss);
3503 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3504 if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC))
3505 return -1;
3506 } else if (!tcp_skb_pcount(skb))
3507 tcp_set_skb_tso_segs(skb, mss);
3508
3509 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3510 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
3511 if (!err)
3512 tcp_event_new_data_sent(sk, skb);
3513 return err;
3514 } else {
3515 if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
3516 tcp_xmit_probe_skb(sk, 1, mib);
3517 return tcp_xmit_probe_skb(sk, 0, mib);
3518 }
3519}
3520
3521
3522
3523
3524void tcp_send_probe0(struct sock *sk)
3525{
3526 struct inet_connection_sock *icsk = inet_csk(sk);
3527 struct tcp_sock *tp = tcp_sk(sk);
3528 struct net *net = sock_net(sk);
3529 unsigned long probe_max;
3530 int err;
3531
3532 err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
3533
3534 if (tp->packets_out || !tcp_send_head(sk)) {
3535
3536 icsk->icsk_probes_out = 0;
3537 icsk->icsk_backoff = 0;
3538 return;
3539 }
3540
3541 if (err <= 0) {
3542 if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2)
3543 icsk->icsk_backoff++;
3544 icsk->icsk_probes_out++;
3545 probe_max = TCP_RTO_MAX;
3546 } else {
3547
3548
3549
3550
3551
3552
3553 if (!icsk->icsk_probes_out)
3554 icsk->icsk_probes_out = 1;
3555 probe_max = TCP_RESOURCE_PROBE_INTERVAL;
3556 }
3557 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3558 tcp_probe0_when(sk, probe_max),
3559 TCP_RTO_MAX);
3560}
3561
3562int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
3563{
3564 const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific;
3565 struct flowi fl;
3566 int res;
3567
3568 tcp_rsk(req)->txhash = net_tx_rndhash();
3569 res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL);
3570 if (!res) {
3571 __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
3572 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
3573 if (unlikely(tcp_passive_fastopen(sk)))
3574 tcp_sk(sk)->total_retrans++;
3575 }
3576 return res;
3577}
3578EXPORT_SYMBOL(tcp_rtx_synack);
3579