1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37#define pr_fmt(fmt) "TCP: " fmt
38
39#include <net/tcp.h>
40
41#include <linux/compiler.h>
42#include <linux/gfp.h>
43#include <linux/module.h>
44
45
46int sysctl_tcp_retrans_collapse __read_mostly = 1;
47
48
49
50
51int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
52
53
54int sysctl_tcp_limit_output_bytes __read_mostly = 131072;
55
56
57
58
59
60int sysctl_tcp_tso_win_divisor __read_mostly = 3;
61
62
63int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
64
65unsigned int sysctl_tcp_notsent_lowat __read_mostly = UINT_MAX;
66EXPORT_SYMBOL(sysctl_tcp_notsent_lowat);
67
68static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
69 int push_one, gfp_t gfp);
70
71
72static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
73{
74 struct inet_connection_sock *icsk = inet_csk(sk);
75 struct tcp_sock *tp = tcp_sk(sk);
76 unsigned int prior_packets = tp->packets_out;
77
78 tcp_advance_send_head(sk, skb);
79 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
80
81 tp->packets_out += tcp_skb_pcount(skb);
82 if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
83 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
84 tcp_rearm_rto(sk);
85 }
86
87 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
88 tcp_skb_pcount(skb));
89}
90
91
92
93
94
95
96
97static inline __u32 tcp_acceptable_seq(const struct sock *sk)
98{
99 const struct tcp_sock *tp = tcp_sk(sk);
100
101 if (!before(tcp_wnd_end(tp), tp->snd_nxt))
102 return tp->snd_nxt;
103 else
104 return tcp_wnd_end(tp);
105}
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121static __u16 tcp_advertise_mss(struct sock *sk)
122{
123 struct tcp_sock *tp = tcp_sk(sk);
124 const struct dst_entry *dst = __sk_dst_get(sk);
125 int mss = tp->advmss;
126
127 if (dst) {
128 unsigned int metric = dst_metric_advmss(dst);
129
130 if (metric < mss) {
131 mss = metric;
132 tp->advmss = mss;
133 }
134 }
135
136 return (__u16)mss;
137}
138
139
140
141static void tcp_cwnd_restart(struct sock *sk, const struct dst_entry *dst)
142{
143 struct tcp_sock *tp = tcp_sk(sk);
144 s32 delta = tcp_time_stamp - tp->lsndtime;
145 u32 restart_cwnd = tcp_init_cwnd(tp, dst);
146 u32 cwnd = tp->snd_cwnd;
147
148 tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
149
150 tp->snd_ssthresh = tcp_current_ssthresh(sk);
151 restart_cwnd = min(restart_cwnd, cwnd);
152
153 while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
154 cwnd >>= 1;
155 tp->snd_cwnd = max(cwnd, restart_cwnd);
156 tp->snd_cwnd_stamp = tcp_time_stamp;
157 tp->snd_cwnd_used = 0;
158}
159
160
161static void tcp_event_data_sent(struct tcp_sock *tp,
162 struct sock *sk)
163{
164 struct inet_connection_sock *icsk = inet_csk(sk);
165 const u32 now = tcp_time_stamp;
166 const struct dst_entry *dst = __sk_dst_get(sk);
167
168 if (sysctl_tcp_slow_start_after_idle &&
169 (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto))
170 tcp_cwnd_restart(sk, __sk_dst_get(sk));
171
172 tp->lsndtime = now;
173
174
175
176
177 if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato &&
178 (!dst || !dst_metric(dst, RTAX_QUICKACK)))
179 icsk->icsk_ack.pingpong = 1;
180}
181
182
183static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
184{
185 tcp_dec_quickack_mode(sk, pkts);
186 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
187}
188
189
190u32 tcp_default_init_rwnd(u32 mss)
191{
192
193
194
195
196
197 u32 init_rwnd = TCP_INIT_CWND * 2;
198
199 if (mss > 1460)
200 init_rwnd = max((1460 * init_rwnd) / mss, 2U);
201 return init_rwnd;
202}
203
204
205
206
207
208
209
210
211void tcp_select_initial_window(int __space, __u32 mss,
212 __u32 *rcv_wnd, __u32 *window_clamp,
213 int wscale_ok, __u8 *rcv_wscale,
214 __u32 init_rcv_wnd)
215{
216 unsigned int space = (__space < 0 ? 0 : __space);
217
218
219 if (*window_clamp == 0)
220 (*window_clamp) = (65535 << 14);
221 space = min(*window_clamp, space);
222
223
224 if (space > mss)
225 space = (space / mss) * mss;
226
227
228
229
230
231
232
233
234
235 if (sysctl_tcp_workaround_signed_windows)
236 (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
237 else
238 (*rcv_wnd) = space;
239
240 (*rcv_wscale) = 0;
241 if (wscale_ok) {
242
243
244
245 space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
246 space = min_t(u32, space, *window_clamp);
247 while (space > 65535 && (*rcv_wscale) < 14) {
248 space >>= 1;
249 (*rcv_wscale)++;
250 }
251 }
252
253 if (mss > (1 << *rcv_wscale)) {
254 if (!init_rcv_wnd)
255 init_rcv_wnd = tcp_default_init_rwnd(mss);
256 *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
257 }
258
259
260 (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
261}
262EXPORT_SYMBOL(tcp_select_initial_window);
263
264
265
266
267
268
269static u16 tcp_select_window(struct sock *sk)
270{
271 struct tcp_sock *tp = tcp_sk(sk);
272 u32 old_win = tp->rcv_wnd;
273 u32 cur_win = tcp_receive_window(tp);
274 u32 new_win = __tcp_select_window(sk);
275
276
277 if (new_win < cur_win) {
278
279
280
281
282
283
284
285 if (new_win == 0)
286 NET_INC_STATS(sock_net(sk),
287 LINUX_MIB_TCPWANTZEROWINDOWADV);
288 new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
289 }
290 tp->rcv_wnd = new_win;
291 tp->rcv_wup = tp->rcv_nxt;
292
293
294
295
296 if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
297 new_win = min(new_win, MAX_TCP_WINDOW);
298 else
299 new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
300
301
302 new_win >>= tp->rx_opt.rcv_wscale;
303
304
305 if (new_win == 0) {
306 tp->pred_flags = 0;
307 if (old_win)
308 NET_INC_STATS(sock_net(sk),
309 LINUX_MIB_TCPTOZEROWINDOWADV);
310 } else if (old_win == 0) {
311 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV);
312 }
313
314 return new_win;
315}
316
317
318static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
319{
320 const struct tcp_sock *tp = tcp_sk(sk);
321
322 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
323 if (!(tp->ecn_flags & TCP_ECN_OK))
324 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
325 else if (tcp_ca_needs_ecn(sk))
326 INET_ECN_xmit(sk);
327}
328
329
330static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
331{
332 struct tcp_sock *tp = tcp_sk(sk);
333 bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
334 tcp_ca_needs_ecn(sk);
335
336 if (!use_ecn) {
337 const struct dst_entry *dst = __sk_dst_get(sk);
338
339 if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
340 use_ecn = true;
341 }
342
343 tp->ecn_flags = 0;
344
345 if (use_ecn) {
346 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
347 tp->ecn_flags = TCP_ECN_OK;
348 if (tcp_ca_needs_ecn(sk))
349 INET_ECN_xmit(sk);
350 }
351}
352
353static void
354tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th,
355 struct sock *sk)
356{
357 if (inet_rsk(req)->ecn_ok) {
358 th->ece = 1;
359 if (tcp_ca_needs_ecn(sk))
360 INET_ECN_xmit(sk);
361 }
362}
363
364
365
366
367static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
368 int tcp_header_len)
369{
370 struct tcp_sock *tp = tcp_sk(sk);
371
372 if (tp->ecn_flags & TCP_ECN_OK) {
373
374 if (skb->len != tcp_header_len &&
375 !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
376 INET_ECN_xmit(sk);
377 if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) {
378 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
379 tcp_hdr(skb)->cwr = 1;
380 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
381 }
382 } else if (!tcp_ca_needs_ecn(sk)) {
383
384 INET_ECN_dontxmit(sk);
385 }
386 if (tp->ecn_flags & TCP_ECN_DEMAND_CWR)
387 tcp_hdr(skb)->ece = 1;
388 }
389}
390
391
392
393
394static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
395{
396 struct skb_shared_info *shinfo = skb_shinfo(skb);
397
398 skb->ip_summed = CHECKSUM_PARTIAL;
399 skb->csum = 0;
400
401 TCP_SKB_CB(skb)->tcp_flags = flags;
402 TCP_SKB_CB(skb)->sacked = 0;
403
404 tcp_skb_pcount_set(skb, 1);
405 shinfo->gso_size = 0;
406 shinfo->gso_type = 0;
407
408 TCP_SKB_CB(skb)->seq = seq;
409 if (flags & (TCPHDR_SYN | TCPHDR_FIN))
410 seq++;
411 TCP_SKB_CB(skb)->end_seq = seq;
412}
413
414static inline bool tcp_urg_mode(const struct tcp_sock *tp)
415{
416 return tp->snd_una != tp->snd_up;
417}
418
419#define OPTION_SACK_ADVERTISE (1 << 0)
420#define OPTION_TS (1 << 1)
421#define OPTION_MD5 (1 << 2)
422#define OPTION_WSCALE (1 << 3)
423#define OPTION_FAST_OPEN_COOKIE (1 << 8)
424
425struct tcp_out_options {
426 u16 options;
427 u16 mss;
428 u8 ws;
429 u8 num_sack_blocks;
430 u8 hash_size;
431 __u8 *hash_location;
432 __u32 tsval, tsecr;
433 struct tcp_fastopen_cookie *fastopen_cookie;
434};
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
450 struct tcp_out_options *opts)
451{
452 u16 options = opts->options;
453
454 if (unlikely(OPTION_MD5 & options)) {
455 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
456 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
457
458 opts->hash_location = (__u8 *)ptr;
459 ptr += 4;
460 }
461
462 if (unlikely(opts->mss)) {
463 *ptr++ = htonl((TCPOPT_MSS << 24) |
464 (TCPOLEN_MSS << 16) |
465 opts->mss);
466 }
467
468 if (likely(OPTION_TS & options)) {
469 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
470 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
471 (TCPOLEN_SACK_PERM << 16) |
472 (TCPOPT_TIMESTAMP << 8) |
473 TCPOLEN_TIMESTAMP);
474 options &= ~OPTION_SACK_ADVERTISE;
475 } else {
476 *ptr++ = htonl((TCPOPT_NOP << 24) |
477 (TCPOPT_NOP << 16) |
478 (TCPOPT_TIMESTAMP << 8) |
479 TCPOLEN_TIMESTAMP);
480 }
481 *ptr++ = htonl(opts->tsval);
482 *ptr++ = htonl(opts->tsecr);
483 }
484
485 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
486 *ptr++ = htonl((TCPOPT_NOP << 24) |
487 (TCPOPT_NOP << 16) |
488 (TCPOPT_SACK_PERM << 8) |
489 TCPOLEN_SACK_PERM);
490 }
491
492 if (unlikely(OPTION_WSCALE & options)) {
493 *ptr++ = htonl((TCPOPT_NOP << 24) |
494 (TCPOPT_WINDOW << 16) |
495 (TCPOLEN_WINDOW << 8) |
496 opts->ws);
497 }
498
499 if (unlikely(opts->num_sack_blocks)) {
500 struct tcp_sack_block *sp = tp->rx_opt.dsack ?
501 tp->duplicate_sack : tp->selective_acks;
502 int this_sack;
503
504 *ptr++ = htonl((TCPOPT_NOP << 24) |
505 (TCPOPT_NOP << 16) |
506 (TCPOPT_SACK << 8) |
507 (TCPOLEN_SACK_BASE + (opts->num_sack_blocks *
508 TCPOLEN_SACK_PERBLOCK)));
509
510 for (this_sack = 0; this_sack < opts->num_sack_blocks;
511 ++this_sack) {
512 *ptr++ = htonl(sp[this_sack].start_seq);
513 *ptr++ = htonl(sp[this_sack].end_seq);
514 }
515
516 tp->rx_opt.dsack = 0;
517 }
518
519 if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) {
520 struct tcp_fastopen_cookie *foc = opts->fastopen_cookie;
521
522 *ptr++ = htonl((TCPOPT_EXP << 24) |
523 ((TCPOLEN_EXP_FASTOPEN_BASE + foc->len) << 16) |
524 TCPOPT_FASTOPEN_MAGIC);
525
526 memcpy(ptr, foc->val, foc->len);
527 if ((foc->len & 3) == 2) {
528 u8 *align = ((u8 *)ptr) + foc->len;
529 align[0] = align[1] = TCPOPT_NOP;
530 }
531 ptr += (foc->len + 3) >> 2;
532 }
533}
534
535
536
537
538static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
539 struct tcp_out_options *opts,
540 struct tcp_md5sig_key **md5)
541{
542 struct tcp_sock *tp = tcp_sk(sk);
543 unsigned int remaining = MAX_TCP_OPTION_SPACE;
544 struct tcp_fastopen_request *fastopen = tp->fastopen_req;
545
546#ifdef CONFIG_TCP_MD5SIG
547 *md5 = tp->af_specific->md5_lookup(sk, sk);
548 if (*md5) {
549 opts->options |= OPTION_MD5;
550 remaining -= TCPOLEN_MD5SIG_ALIGNED;
551 }
552#else
553 *md5 = NULL;
554#endif
555
556
557
558
559
560
561
562
563
564
565 opts->mss = tcp_advertise_mss(sk);
566 remaining -= TCPOLEN_MSS_ALIGNED;
567
568 if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
569 opts->options |= OPTION_TS;
570 opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
571 opts->tsecr = tp->rx_opt.ts_recent;
572 remaining -= TCPOLEN_TSTAMP_ALIGNED;
573 }
574 if (likely(sysctl_tcp_window_scaling)) {
575 opts->ws = tp->rx_opt.rcv_wscale;
576 opts->options |= OPTION_WSCALE;
577 remaining -= TCPOLEN_WSCALE_ALIGNED;
578 }
579 if (likely(sysctl_tcp_sack)) {
580 opts->options |= OPTION_SACK_ADVERTISE;
581 if (unlikely(!(OPTION_TS & opts->options)))
582 remaining -= TCPOLEN_SACKPERM_ALIGNED;
583 }
584
585 if (fastopen && fastopen->cookie.len >= 0) {
586 u32 need = TCPOLEN_EXP_FASTOPEN_BASE + fastopen->cookie.len;
587 need = (need + 3) & ~3U;
588 if (remaining >= need) {
589 opts->options |= OPTION_FAST_OPEN_COOKIE;
590 opts->fastopen_cookie = &fastopen->cookie;
591 remaining -= need;
592 tp->syn_fastopen = 1;
593 }
594 }
595
596 return MAX_TCP_OPTION_SPACE - remaining;
597}
598
599
600static unsigned int tcp_synack_options(struct sock *sk,
601 struct request_sock *req,
602 unsigned int mss, struct sk_buff *skb,
603 struct tcp_out_options *opts,
604 struct tcp_md5sig_key **md5,
605 struct tcp_fastopen_cookie *foc)
606{
607 struct inet_request_sock *ireq = inet_rsk(req);
608 unsigned int remaining = MAX_TCP_OPTION_SPACE;
609
610#ifdef CONFIG_TCP_MD5SIG
611 *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
612 if (*md5) {
613 opts->options |= OPTION_MD5;
614 remaining -= TCPOLEN_MD5SIG_ALIGNED;
615
616
617
618
619
620
621 ireq->tstamp_ok &= !ireq->sack_ok;
622 }
623#else
624 *md5 = NULL;
625#endif
626
627
628 opts->mss = mss;
629 remaining -= TCPOLEN_MSS_ALIGNED;
630
631 if (likely(ireq->wscale_ok)) {
632 opts->ws = ireq->rcv_wscale;
633 opts->options |= OPTION_WSCALE;
634 remaining -= TCPOLEN_WSCALE_ALIGNED;
635 }
636 if (likely(ireq->tstamp_ok)) {
637 opts->options |= OPTION_TS;
638 opts->tsval = tcp_skb_timestamp(skb);
639 opts->tsecr = req->ts_recent;
640 remaining -= TCPOLEN_TSTAMP_ALIGNED;
641 }
642 if (likely(ireq->sack_ok)) {
643 opts->options |= OPTION_SACK_ADVERTISE;
644 if (unlikely(!ireq->tstamp_ok))
645 remaining -= TCPOLEN_SACKPERM_ALIGNED;
646 }
647 if (foc != NULL && foc->len >= 0) {
648 u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
649 need = (need + 3) & ~3U;
650 if (remaining >= need) {
651 opts->options |= OPTION_FAST_OPEN_COOKIE;
652 opts->fastopen_cookie = foc;
653 remaining -= need;
654 }
655 }
656
657 return MAX_TCP_OPTION_SPACE - remaining;
658}
659
660
661
662
663static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb,
664 struct tcp_out_options *opts,
665 struct tcp_md5sig_key **md5)
666{
667 struct tcp_sock *tp = tcp_sk(sk);
668 unsigned int size = 0;
669 unsigned int eff_sacks;
670
671 opts->options = 0;
672
673#ifdef CONFIG_TCP_MD5SIG
674 *md5 = tp->af_specific->md5_lookup(sk, sk);
675 if (unlikely(*md5)) {
676 opts->options |= OPTION_MD5;
677 size += TCPOLEN_MD5SIG_ALIGNED;
678 }
679#else
680 *md5 = NULL;
681#endif
682
683 if (likely(tp->rx_opt.tstamp_ok)) {
684 opts->options |= OPTION_TS;
685 opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0;
686 opts->tsecr = tp->rx_opt.ts_recent;
687 size += TCPOLEN_TSTAMP_ALIGNED;
688 }
689
690 eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
691 if (unlikely(eff_sacks)) {
692 const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
693 opts->num_sack_blocks =
694 min_t(unsigned int, eff_sacks,
695 (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
696 TCPOLEN_SACK_PERBLOCK);
697 size += TCPOLEN_SACK_BASE_ALIGNED +
698 opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
699 }
700
701 return size;
702}
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719struct tsq_tasklet {
720 struct tasklet_struct tasklet;
721 struct list_head head;
722};
723static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);
724
725static void tcp_tsq_handler(struct sock *sk)
726{
727 if ((1 << sk->sk_state) &
728 (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
729 TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
730 tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle,
731 0, GFP_ATOMIC);
732}
733
734
735
736
737
738
739static void tcp_tasklet_func(unsigned long data)
740{
741 struct tsq_tasklet *tsq = (struct tsq_tasklet *)data;
742 LIST_HEAD(list);
743 unsigned long flags;
744 struct list_head *q, *n;
745 struct tcp_sock *tp;
746 struct sock *sk;
747
748 local_irq_save(flags);
749 list_splice_init(&tsq->head, &list);
750 local_irq_restore(flags);
751
752 list_for_each_safe(q, n, &list) {
753 tp = list_entry(q, struct tcp_sock, tsq_node);
754 list_del(&tp->tsq_node);
755
756 sk = (struct sock *)tp;
757 bh_lock_sock(sk);
758
759 if (!sock_owned_by_user(sk)) {
760 tcp_tsq_handler(sk);
761 } else {
762
763 set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags);
764 }
765 bh_unlock_sock(sk);
766
767 clear_bit(TSQ_QUEUED, &tp->tsq_flags);
768 sk_free(sk);
769 }
770}
771
772#define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \
773 (1UL << TCP_WRITE_TIMER_DEFERRED) | \
774 (1UL << TCP_DELACK_TIMER_DEFERRED) | \
775 (1UL << TCP_MTU_REDUCED_DEFERRED))
776
777
778
779
780
781
782
783void tcp_release_cb(struct sock *sk)
784{
785 struct tcp_sock *tp = tcp_sk(sk);
786 unsigned long flags, nflags;
787
788
789 do {
790 flags = tp->tsq_flags;
791 if (!(flags & TCP_DEFERRED_ALL))
792 return;
793 nflags = flags & ~TCP_DEFERRED_ALL;
794 } while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags);
795
796 if (flags & (1UL << TCP_TSQ_DEFERRED))
797 tcp_tsq_handler(sk);
798
799
800
801
802
803
804
805
806
807
808 sock_release_ownership(sk);
809
810 if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
811 tcp_write_timer_handler(sk);
812 __sock_put(sk);
813 }
814 if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) {
815 tcp_delack_timer_handler(sk);
816 __sock_put(sk);
817 }
818 if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
819 inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
820 __sock_put(sk);
821 }
822}
823EXPORT_SYMBOL(tcp_release_cb);
824
825void __init tcp_tasklet_init(void)
826{
827 int i;
828
829 for_each_possible_cpu(i) {
830 struct tsq_tasklet *tsq = &per_cpu(tsq_tasklet, i);
831
832 INIT_LIST_HEAD(&tsq->head);
833 tasklet_init(&tsq->tasklet,
834 tcp_tasklet_func,
835 (unsigned long)tsq);
836 }
837}
838
839
840
841
842
843
844void tcp_wfree(struct sk_buff *skb)
845{
846 struct sock *sk = skb->sk;
847 struct tcp_sock *tp = tcp_sk(sk);
848 int wmem;
849
850
851
852
853 wmem = atomic_sub_return(skb->truesize - 1, &sk->sk_wmem_alloc);
854
855
856
857
858
859
860
861
862 if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
863 goto out;
864
865 if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) &&
866 !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) {
867 unsigned long flags;
868 struct tsq_tasklet *tsq;
869
870
871 local_irq_save(flags);
872 tsq = this_cpu_ptr(&tsq_tasklet);
873 list_add(&tp->tsq_node, &tsq->head);
874 tasklet_schedule(&tsq->tasklet);
875 local_irq_restore(flags);
876 return;
877 }
878out:
879 sk_free(sk);
880}
881
882
883
884
885
886
887
888
889
890
891
892
893static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
894 gfp_t gfp_mask)
895{
896 const struct inet_connection_sock *icsk = inet_csk(sk);
897 struct inet_sock *inet;
898 struct tcp_sock *tp;
899 struct tcp_skb_cb *tcb;
900 struct tcp_out_options opts;
901 unsigned int tcp_options_size, tcp_header_size;
902 struct tcp_md5sig_key *md5;
903 struct tcphdr *th;
904 int err;
905
906 BUG_ON(!skb || !tcp_skb_pcount(skb));
907
908 if (clone_it) {
909 skb_mstamp_get(&skb->skb_mstamp);
910
911 if (unlikely(skb_cloned(skb)))
912 skb = pskb_copy(skb, gfp_mask);
913 else
914 skb = skb_clone(skb, gfp_mask);
915 if (unlikely(!skb))
916 return -ENOBUFS;
917 }
918
919 inet = inet_sk(sk);
920 tp = tcp_sk(sk);
921 tcb = TCP_SKB_CB(skb);
922 memset(&opts, 0, sizeof(opts));
923
924 if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
925 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
926 else
927 tcp_options_size = tcp_established_options(sk, skb, &opts,
928 &md5);
929 tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
930
931 if (tcp_packets_in_flight(tp) == 0)
932 tcp_ca_event(sk, CA_EVENT_TX_START);
933
934
935
936
937
938
939
940
941 skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1);
942
943 skb_push(skb, tcp_header_size);
944 skb_reset_transport_header(skb);
945
946 skb_orphan(skb);
947 skb->sk = sk;
948 skb->destructor = skb_is_tcp_pure_ack(skb) ? sock_wfree : tcp_wfree;
949 skb_set_hash_from_sk(skb, sk);
950 atomic_add(skb->truesize, &sk->sk_wmem_alloc);
951
952
953 th = tcp_hdr(skb);
954 th->source = inet->inet_sport;
955 th->dest = inet->inet_dport;
956 th->seq = htonl(tcb->seq);
957 th->ack_seq = htonl(tp->rcv_nxt);
958 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
959 tcb->tcp_flags);
960
961 if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
962
963
964
965 th->window = htons(min(tp->rcv_wnd, 65535U));
966 } else {
967 th->window = htons(tcp_select_window(sk));
968 }
969 th->check = 0;
970 th->urg_ptr = 0;
971
972
973 if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
974 if (before(tp->snd_up, tcb->seq + 0x10000)) {
975 th->urg_ptr = htons(tp->snd_up - tcb->seq);
976 th->urg = 1;
977 } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
978 th->urg_ptr = htons(0xFFFF);
979 th->urg = 1;
980 }
981 }
982
983 tcp_options_write((__be32 *)(th + 1), tp, &opts);
984 if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0))
985 tcp_ecn_send(sk, skb, tcp_header_size);
986
987#ifdef CONFIG_TCP_MD5SIG
988
989 if (md5) {
990 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
991 tp->af_specific->calc_md5_hash(opts.hash_location,
992 md5, sk, NULL, skb);
993 }
994#endif
995
996 icsk->icsk_af_ops->send_check(sk, skb);
997
998 if (likely(tcb->tcp_flags & TCPHDR_ACK))
999 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
1000
1001 if (skb->len != tcp_header_size)
1002 tcp_event_data_sent(tp, sk);
1003
1004 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
1005 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
1006 tcp_skb_pcount(skb));
1007
1008
1009 skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
1010
1011
1012 skb->tstamp.tv64 = 0;
1013
1014
1015 memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
1016 sizeof(struct inet6_skb_parm)));
1017
1018 err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
1019
1020 if (likely(err <= 0))
1021 return err;
1022
1023 tcp_enter_cwr(sk);
1024
1025 return net_xmit_eval(err);
1026}
1027
1028
1029
1030
1031
1032
1033static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
1034{
1035 struct tcp_sock *tp = tcp_sk(sk);
1036
1037
1038 tp->write_seq = TCP_SKB_CB(skb)->end_seq;
1039 __skb_header_release(skb);
1040 tcp_add_write_queue_tail(sk, skb);
1041 sk->sk_wmem_queued += skb->truesize;
1042 sk_mem_charge(sk, skb->truesize);
1043}
1044
1045
1046static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
1047 unsigned int mss_now)
1048{
1049 struct skb_shared_info *shinfo = skb_shinfo(skb);
1050
1051
1052 WARN_ON_ONCE(skb_cloned(skb));
1053
1054 if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) {
1055
1056
1057
1058 tcp_skb_pcount_set(skb, 1);
1059 shinfo->gso_size = 0;
1060 shinfo->gso_type = 0;
1061 } else {
1062 tcp_skb_pcount_set(skb, DIV_ROUND_UP(skb->len, mss_now));
1063 shinfo->gso_size = mss_now;
1064 shinfo->gso_type = sk->sk_gso_type;
1065 }
1066}
1067
1068
1069
1070
1071static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb,
1072 int decr)
1073{
1074 struct tcp_sock *tp = tcp_sk(sk);
1075
1076 if (!tp->sacked_out || tcp_is_reno(tp))
1077 return;
1078
1079 if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
1080 tp->fackets_out -= decr;
1081}
1082
1083
1084
1085
1086static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
1087{
1088 struct tcp_sock *tp = tcp_sk(sk);
1089
1090 tp->packets_out -= decr;
1091
1092 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1093 tp->sacked_out -= decr;
1094 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
1095 tp->retrans_out -= decr;
1096 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
1097 tp->lost_out -= decr;
1098
1099
1100 if (tcp_is_reno(tp) && decr > 0)
1101 tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
1102
1103 tcp_adjust_fackets_out(sk, skb, decr);
1104
1105 if (tp->lost_skb_hint &&
1106 before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
1107 (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)))
1108 tp->lost_cnt_hint -= decr;
1109
1110 tcp_verify_left_out(tp);
1111}
1112
1113static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2)
1114{
1115 struct skb_shared_info *shinfo = skb_shinfo(skb);
1116
1117 if (unlikely(shinfo->tx_flags & SKBTX_ANY_TSTAMP) &&
1118 !before(shinfo->tskey, TCP_SKB_CB(skb2)->seq)) {
1119 struct skb_shared_info *shinfo2 = skb_shinfo(skb2);
1120 u8 tsflags = shinfo->tx_flags & SKBTX_ANY_TSTAMP;
1121
1122 shinfo->tx_flags &= ~tsflags;
1123 shinfo2->tx_flags |= tsflags;
1124 swap(shinfo->tskey, shinfo2->tskey);
1125 }
1126}
1127
1128
1129
1130
1131
1132
1133int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1134 unsigned int mss_now, gfp_t gfp)
1135{
1136 struct tcp_sock *tp = tcp_sk(sk);
1137 struct sk_buff *buff;
1138 int nsize, old_factor;
1139 int nlen;
1140 u8 flags;
1141
1142 if (WARN_ON(len > skb->len))
1143 return -EINVAL;
1144
1145 nsize = skb_headlen(skb) - len;
1146 if (nsize < 0)
1147 nsize = 0;
1148
1149 if (skb_unclone(skb, gfp))
1150 return -ENOMEM;
1151
1152
1153 buff = sk_stream_alloc_skb(sk, nsize, gfp);
1154 if (buff == NULL)
1155 return -ENOMEM;
1156
1157 sk->sk_wmem_queued += buff->truesize;
1158 sk_mem_charge(sk, buff->truesize);
1159 nlen = skb->len - len - nsize;
1160 buff->truesize += nlen;
1161 skb->truesize -= nlen;
1162
1163
1164 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1165 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1166 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1167
1168
1169 flags = TCP_SKB_CB(skb)->tcp_flags;
1170 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1171 TCP_SKB_CB(buff)->tcp_flags = flags;
1172 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
1173
1174 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
1175
1176 buff->csum = csum_partial_copy_nocheck(skb->data + len,
1177 skb_put(buff, nsize),
1178 nsize, 0);
1179
1180 skb_trim(skb, len);
1181
1182 skb->csum = csum_block_sub(skb->csum, buff->csum, len);
1183 } else {
1184 skb->ip_summed = CHECKSUM_PARTIAL;
1185 skb_split(skb, buff, len);
1186 }
1187
1188 buff->ip_summed = skb->ip_summed;
1189
1190 buff->tstamp = skb->tstamp;
1191 tcp_fragment_tstamp(skb, buff);
1192
1193 old_factor = tcp_skb_pcount(skb);
1194
1195
1196 tcp_set_skb_tso_segs(sk, skb, mss_now);
1197 tcp_set_skb_tso_segs(sk, buff, mss_now);
1198
1199
1200
1201
1202 if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) {
1203 int diff = old_factor - tcp_skb_pcount(skb) -
1204 tcp_skb_pcount(buff);
1205
1206 if (diff)
1207 tcp_adjust_pcount(sk, skb, diff);
1208 }
1209
1210
1211 __skb_header_release(buff);
1212 tcp_insert_write_queue_after(skb, buff, sk);
1213
1214 return 0;
1215}
1216
1217
1218
1219
1220
1221static void __pskb_trim_head(struct sk_buff *skb, int len)
1222{
1223 struct skb_shared_info *shinfo;
1224 int i, k, eat;
1225
1226 eat = min_t(int, len, skb_headlen(skb));
1227 if (eat) {
1228 __skb_pull(skb, eat);
1229 len -= eat;
1230 if (!len)
1231 return;
1232 }
1233 eat = len;
1234 k = 0;
1235 shinfo = skb_shinfo(skb);
1236 for (i = 0; i < shinfo->nr_frags; i++) {
1237 int size = skb_frag_size(&shinfo->frags[i]);
1238
1239 if (size <= eat) {
1240 skb_frag_unref(skb, i);
1241 eat -= size;
1242 } else {
1243 shinfo->frags[k] = shinfo->frags[i];
1244 if (eat) {
1245 shinfo->frags[k].page_offset += eat;
1246 skb_frag_size_sub(&shinfo->frags[k], eat);
1247 eat = 0;
1248 }
1249 k++;
1250 }
1251 }
1252 shinfo->nr_frags = k;
1253
1254 skb_reset_tail_pointer(skb);
1255 skb->data_len -= len;
1256 skb->len = skb->data_len;
1257}
1258
1259
1260int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
1261{
1262 if (skb_unclone(skb, GFP_ATOMIC))
1263 return -ENOMEM;
1264
1265 __pskb_trim_head(skb, len);
1266
1267 TCP_SKB_CB(skb)->seq += len;
1268 skb->ip_summed = CHECKSUM_PARTIAL;
1269
1270 skb->truesize -= len;
1271 sk->sk_wmem_queued -= len;
1272 sk_mem_uncharge(sk, len);
1273 sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
1274
1275
1276 if (tcp_skb_pcount(skb) > 1)
1277 tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb));
1278
1279 return 0;
1280}
1281
1282
1283static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
1284{
1285 const struct tcp_sock *tp = tcp_sk(sk);
1286 const struct inet_connection_sock *icsk = inet_csk(sk);
1287 int mss_now;
1288
1289
1290
1291
1292 mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr);
1293
1294
1295 if (icsk->icsk_af_ops->net_frag_header_len) {
1296 const struct dst_entry *dst = __sk_dst_get(sk);
1297
1298 if (dst && dst_allfrag(dst))
1299 mss_now -= icsk->icsk_af_ops->net_frag_header_len;
1300 }
1301
1302
1303 if (mss_now > tp->rx_opt.mss_clamp)
1304 mss_now = tp->rx_opt.mss_clamp;
1305
1306
1307 mss_now -= icsk->icsk_ext_hdr_len;
1308
1309
1310 if (mss_now < 48)
1311 mss_now = 48;
1312 return mss_now;
1313}
1314
1315
1316int tcp_mtu_to_mss(struct sock *sk, int pmtu)
1317{
1318
1319 return __tcp_mtu_to_mss(sk, pmtu) -
1320 (tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr));
1321}
1322
1323
1324int tcp_mss_to_mtu(struct sock *sk, int mss)
1325{
1326 const struct tcp_sock *tp = tcp_sk(sk);
1327 const struct inet_connection_sock *icsk = inet_csk(sk);
1328 int mtu;
1329
1330 mtu = mss +
1331 tp->tcp_header_len +
1332 icsk->icsk_ext_hdr_len +
1333 icsk->icsk_af_ops->net_header_len;
1334
1335
1336 if (icsk->icsk_af_ops->net_frag_header_len) {
1337 const struct dst_entry *dst = __sk_dst_get(sk);
1338
1339 if (dst && dst_allfrag(dst))
1340 mtu += icsk->icsk_af_ops->net_frag_header_len;
1341 }
1342 return mtu;
1343}
1344
1345
1346void tcp_mtup_init(struct sock *sk)
1347{
1348 struct tcp_sock *tp = tcp_sk(sk);
1349 struct inet_connection_sock *icsk = inet_csk(sk);
1350 struct net *net = sock_net(sk);
1351
1352 icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
1353 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
1354 icsk->icsk_af_ops->net_header_len;
1355 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
1356 icsk->icsk_mtup.probe_size = 0;
1357}
1358EXPORT_SYMBOL(tcp_mtup_init);
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
1383{
1384 struct tcp_sock *tp = tcp_sk(sk);
1385 struct inet_connection_sock *icsk = inet_csk(sk);
1386 int mss_now;
1387
1388 if (icsk->icsk_mtup.search_high > pmtu)
1389 icsk->icsk_mtup.search_high = pmtu;
1390
1391 mss_now = tcp_mtu_to_mss(sk, pmtu);
1392 mss_now = tcp_bound_to_half_wnd(tp, mss_now);
1393
1394
1395 icsk->icsk_pmtu_cookie = pmtu;
1396 if (icsk->icsk_mtup.enabled)
1397 mss_now = min(mss_now, tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low));
1398 tp->mss_cache = mss_now;
1399
1400 return mss_now;
1401}
1402EXPORT_SYMBOL(tcp_sync_mss);
1403
1404
1405
1406
1407unsigned int tcp_current_mss(struct sock *sk)
1408{
1409 const struct tcp_sock *tp = tcp_sk(sk);
1410 const struct dst_entry *dst = __sk_dst_get(sk);
1411 u32 mss_now;
1412 unsigned int header_len;
1413 struct tcp_out_options opts;
1414 struct tcp_md5sig_key *md5;
1415
1416 mss_now = tp->mss_cache;
1417
1418 if (dst) {
1419 u32 mtu = dst_mtu(dst);
1420 if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
1421 mss_now = tcp_sync_mss(sk, mtu);
1422 }
1423
1424 header_len = tcp_established_options(sk, NULL, &opts, &md5) +
1425 sizeof(struct tcphdr);
1426
1427
1428
1429
1430 if (header_len != tp->tcp_header_len) {
1431 int delta = (int) header_len - tp->tcp_header_len;
1432 mss_now -= delta;
1433 }
1434
1435 return mss_now;
1436}
1437
1438
1439
1440
1441
1442static void tcp_cwnd_application_limited(struct sock *sk)
1443{
1444 struct tcp_sock *tp = tcp_sk(sk);
1445
1446 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
1447 sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
1448
1449 u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
1450 u32 win_used = max(tp->snd_cwnd_used, init_win);
1451 if (win_used < tp->snd_cwnd) {
1452 tp->snd_ssthresh = tcp_current_ssthresh(sk);
1453 tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
1454 }
1455 tp->snd_cwnd_used = 0;
1456 }
1457 tp->snd_cwnd_stamp = tcp_time_stamp;
1458}
1459
1460static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
1461{
1462 struct tcp_sock *tp = tcp_sk(sk);
1463
1464
1465
1466
1467 if (!before(tp->snd_una, tp->max_packets_seq) ||
1468 tp->packets_out > tp->max_packets_out) {
1469 tp->max_packets_out = tp->packets_out;
1470 tp->max_packets_seq = tp->snd_nxt;
1471 tp->is_cwnd_limited = is_cwnd_limited;
1472 }
1473
1474 if (tcp_is_cwnd_limited(sk)) {
1475
1476 tp->snd_cwnd_used = 0;
1477 tp->snd_cwnd_stamp = tcp_time_stamp;
1478 } else {
1479
1480 if (tp->packets_out > tp->snd_cwnd_used)
1481 tp->snd_cwnd_used = tp->packets_out;
1482
1483 if (sysctl_tcp_slow_start_after_idle &&
1484 (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
1485 tcp_cwnd_application_limited(sk);
1486 }
1487}
1488
1489
1490static bool tcp_minshall_check(const struct tcp_sock *tp)
1491{
1492 return after(tp->snd_sml, tp->snd_una) &&
1493 !after(tp->snd_sml, tp->snd_nxt);
1494}
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
1505 const struct sk_buff *skb)
1506{
1507 if (skb->len < tcp_skb_pcount(skb) * mss_now)
1508 tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
1509}
1510
1511
1512
1513
1514
1515
1516
1517
1518static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
1519 int nonagle)
1520{
1521 return partial &&
1522 ((nonagle & TCP_NAGLE_CORK) ||
1523 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1524}
1525
1526
1527
1528
1529static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now)
1530{
1531 u32 bytes, segs;
1532
1533 bytes = min(sk->sk_pacing_rate >> 10,
1534 sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
1535
1536
1537
1538
1539
1540
1541 segs = max_t(u32, bytes / mss_now, sysctl_tcp_min_tso_segs);
1542
1543 return min_t(u32, segs, sk->sk_gso_max_segs);
1544}
1545
1546
1547static unsigned int tcp_mss_split_point(const struct sock *sk,
1548 const struct sk_buff *skb,
1549 unsigned int mss_now,
1550 unsigned int max_segs,
1551 int nonagle)
1552{
1553 const struct tcp_sock *tp = tcp_sk(sk);
1554 u32 partial, needed, window, max_len;
1555
1556 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1557 max_len = mss_now * max_segs;
1558
1559 if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
1560 return max_len;
1561
1562 needed = min(skb->len, window);
1563
1564 if (max_len <= needed)
1565 return max_len;
1566
1567 partial = needed % mss_now;
1568
1569
1570
1571
1572 if (tcp_nagle_check(partial != 0, tp, nonagle))
1573 return needed - partial;
1574
1575 return needed;
1576}
1577
1578
1579
1580
1581static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
1582 const struct sk_buff *skb)
1583{
1584 u32 in_flight, cwnd, halfcwnd;
1585
1586
1587 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
1588 tcp_skb_pcount(skb) == 1)
1589 return 1;
1590
1591 in_flight = tcp_packets_in_flight(tp);
1592 cwnd = tp->snd_cwnd;
1593 if (in_flight >= cwnd)
1594 return 0;
1595
1596
1597
1598
1599 halfcwnd = max(cwnd >> 1, 1U);
1600 return min(halfcwnd, cwnd - in_flight);
1601}
1602
1603
1604
1605
1606
1607static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb,
1608 unsigned int mss_now)
1609{
1610 int tso_segs = tcp_skb_pcount(skb);
1611
1612 if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
1613 tcp_set_skb_tso_segs(sk, skb, mss_now);
1614 tso_segs = tcp_skb_pcount(skb);
1615 }
1616 return tso_segs;
1617}
1618
1619
1620
1621
1622
1623static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
1624 unsigned int cur_mss, int nonagle)
1625{
1626
1627
1628
1629
1630
1631
1632 if (nonagle & TCP_NAGLE_PUSH)
1633 return true;
1634
1635
1636 if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
1637 return true;
1638
1639 if (!tcp_nagle_check(skb->len < cur_mss, tp, nonagle))
1640 return true;
1641
1642 return false;
1643}
1644
1645
1646static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
1647 const struct sk_buff *skb,
1648 unsigned int cur_mss)
1649{
1650 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
1651
1652 if (skb->len > cur_mss)
1653 end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
1654
1655 return !after(end_seq, tcp_wnd_end(tp));
1656}
1657
1658
1659
1660
1661
1662static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
1663 unsigned int cur_mss, int nonagle)
1664{
1665 const struct tcp_sock *tp = tcp_sk(sk);
1666 unsigned int cwnd_quota;
1667
1668 tcp_init_tso_segs(sk, skb, cur_mss);
1669
1670 if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
1671 return 0;
1672
1673 cwnd_quota = tcp_cwnd_test(tp, skb);
1674 if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
1675 cwnd_quota = 0;
1676
1677 return cwnd_quota;
1678}
1679
1680
1681bool tcp_may_send_now(struct sock *sk)
1682{
1683 const struct tcp_sock *tp = tcp_sk(sk);
1684 struct sk_buff *skb = tcp_send_head(sk);
1685
1686 return skb &&
1687 tcp_snd_test(sk, skb, tcp_current_mss(sk),
1688 (tcp_skb_is_last(sk, skb) ?
1689 tp->nonagle : TCP_NAGLE_PUSH));
1690}
1691
1692
1693
1694
1695
1696
1697
1698
1699static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1700 unsigned int mss_now, gfp_t gfp)
1701{
1702 struct sk_buff *buff;
1703 int nlen = skb->len - len;
1704 u8 flags;
1705
1706
1707 if (skb->len != skb->data_len)
1708 return tcp_fragment(sk, skb, len, mss_now, gfp);
1709
1710 buff = sk_stream_alloc_skb(sk, 0, gfp);
1711 if (unlikely(buff == NULL))
1712 return -ENOMEM;
1713
1714 sk->sk_wmem_queued += buff->truesize;
1715 sk_mem_charge(sk, buff->truesize);
1716 buff->truesize += nlen;
1717 skb->truesize -= nlen;
1718
1719
1720 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1721 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1722 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1723
1724
1725 flags = TCP_SKB_CB(skb)->tcp_flags;
1726 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1727 TCP_SKB_CB(buff)->tcp_flags = flags;
1728
1729
1730 TCP_SKB_CB(buff)->sacked = 0;
1731
1732 buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
1733 skb_split(skb, buff, len);
1734 tcp_fragment_tstamp(skb, buff);
1735
1736
1737 tcp_set_skb_tso_segs(sk, skb, mss_now);
1738 tcp_set_skb_tso_segs(sk, buff, mss_now);
1739
1740
1741 __skb_header_release(buff);
1742 tcp_insert_write_queue_after(skb, buff, sk);
1743
1744 return 0;
1745}
1746
1747
1748
1749
1750
1751
1752static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1753 bool *is_cwnd_limited, u32 max_segs)
1754{
1755 struct tcp_sock *tp = tcp_sk(sk);
1756 const struct inet_connection_sock *icsk = inet_csk(sk);
1757 u32 send_win, cong_win, limit, in_flight;
1758 int win_divisor;
1759
1760 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1761 goto send_now;
1762
1763 if (icsk->icsk_ca_state != TCP_CA_Open)
1764 goto send_now;
1765
1766
1767 if (tp->tso_deferred &&
1768 (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1)
1769 goto send_now;
1770
1771 in_flight = tcp_packets_in_flight(tp);
1772
1773 BUG_ON(tcp_skb_pcount(skb) <= 1 || (tp->snd_cwnd <= in_flight));
1774
1775 send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1776
1777
1778 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
1779
1780 limit = min(send_win, cong_win);
1781
1782
1783 if (limit >= max_segs * tp->mss_cache)
1784 goto send_now;
1785
1786
1787 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
1788 goto send_now;
1789
1790 win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor);
1791 if (win_divisor) {
1792 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
1793
1794
1795
1796
1797 chunk /= win_divisor;
1798 if (limit >= chunk)
1799 goto send_now;
1800 } else {
1801
1802
1803
1804
1805
1806 if (limit > tcp_max_tso_deferred_mss(tp) * tp->mss_cache)
1807 goto send_now;
1808 }
1809
1810
1811
1812
1813 if (!tp->tso_deferred)
1814 tp->tso_deferred = 1 | (jiffies << 1);
1815
1816 if (cong_win < send_win && cong_win < skb->len)
1817 *is_cwnd_limited = true;
1818
1819 return true;
1820
1821send_now:
1822 tp->tso_deferred = 0;
1823 return false;
1824}
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835static int tcp_mtu_probe(struct sock *sk)
1836{
1837 struct tcp_sock *tp = tcp_sk(sk);
1838 struct inet_connection_sock *icsk = inet_csk(sk);
1839 struct sk_buff *skb, *nskb, *next;
1840 int len;
1841 int probe_size;
1842 int size_needed;
1843 int copy;
1844 int mss_now;
1845
1846
1847
1848
1849
1850 if (!icsk->icsk_mtup.enabled ||
1851 icsk->icsk_mtup.probe_size ||
1852 inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
1853 tp->snd_cwnd < 11 ||
1854 tp->rx_opt.num_sacks || tp->rx_opt.dsack)
1855 return -1;
1856
1857
1858 mss_now = tcp_current_mss(sk);
1859 probe_size = 2 * tp->mss_cache;
1860 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
1861 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
1862
1863 return -1;
1864 }
1865
1866
1867 if (tp->write_seq - tp->snd_nxt < size_needed)
1868 return -1;
1869
1870 if (tp->snd_wnd < size_needed)
1871 return -1;
1872 if (after(tp->snd_nxt + size_needed, tcp_wnd_end(tp)))
1873 return 0;
1874
1875
1876 if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
1877 if (!tcp_packets_in_flight(tp))
1878 return -1;
1879 else
1880 return 0;
1881 }
1882
1883
1884 if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL)
1885 return -1;
1886 sk->sk_wmem_queued += nskb->truesize;
1887 sk_mem_charge(sk, nskb->truesize);
1888
1889 skb = tcp_send_head(sk);
1890
1891 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
1892 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
1893 TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
1894 TCP_SKB_CB(nskb)->sacked = 0;
1895 nskb->csum = 0;
1896 nskb->ip_summed = skb->ip_summed;
1897
1898 tcp_insert_write_queue_before(nskb, skb, sk);
1899
1900 len = 0;
1901 tcp_for_write_queue_from_safe(skb, next, sk) {
1902 copy = min_t(int, skb->len, probe_size - len);
1903 if (nskb->ip_summed)
1904 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
1905 else
1906 nskb->csum = skb_copy_and_csum_bits(skb, 0,
1907 skb_put(nskb, copy),
1908 copy, nskb->csum);
1909
1910 if (skb->len <= copy) {
1911
1912
1913 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1914 tcp_unlink_write_queue(skb, sk);
1915 sk_wmem_free_skb(sk, skb);
1916 } else {
1917 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
1918 ~(TCPHDR_FIN|TCPHDR_PSH);
1919 if (!skb_shinfo(skb)->nr_frags) {
1920 skb_pull(skb, copy);
1921 if (skb->ip_summed != CHECKSUM_PARTIAL)
1922 skb->csum = csum_partial(skb->data,
1923 skb->len, 0);
1924 } else {
1925 __pskb_trim_head(skb, copy);
1926 tcp_set_skb_tso_segs(sk, skb, mss_now);
1927 }
1928 TCP_SKB_CB(skb)->seq += copy;
1929 }
1930
1931 len += copy;
1932
1933 if (len >= probe_size)
1934 break;
1935 }
1936 tcp_init_tso_segs(sk, nskb, nskb->len);
1937
1938
1939
1940
1941 if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
1942
1943
1944 tp->snd_cwnd--;
1945 tcp_event_new_data_sent(sk, nskb);
1946
1947 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
1948 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
1949 tp->mtu_probe.probe_seq_end = TCP_SKB_CB(nskb)->end_seq;
1950
1951 return 1;
1952 }
1953
1954 return -1;
1955}
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1972 int push_one, gfp_t gfp)
1973{
1974 struct tcp_sock *tp = tcp_sk(sk);
1975 struct sk_buff *skb;
1976 unsigned int tso_segs, sent_pkts;
1977 int cwnd_quota;
1978 int result;
1979 bool is_cwnd_limited = false;
1980 u32 max_segs;
1981
1982 sent_pkts = 0;
1983
1984 if (!push_one) {
1985
1986 result = tcp_mtu_probe(sk);
1987 if (!result) {
1988 return false;
1989 } else if (result > 0) {
1990 sent_pkts = 1;
1991 }
1992 }
1993
1994 max_segs = tcp_tso_autosize(sk, mss_now);
1995 while ((skb = tcp_send_head(sk))) {
1996 unsigned int limit;
1997
1998 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
1999 BUG_ON(!tso_segs);
2000
2001 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
2002
2003 skb_mstamp_get(&skb->skb_mstamp);
2004 goto repair;
2005 }
2006
2007 cwnd_quota = tcp_cwnd_test(tp, skb);
2008 if (!cwnd_quota) {
2009 is_cwnd_limited = true;
2010 if (push_one == 2)
2011
2012 cwnd_quota = 1;
2013 else
2014 break;
2015 }
2016
2017 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
2018 break;
2019
2020 if (tso_segs == 1 || !max_segs) {
2021 if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
2022 (tcp_skb_is_last(sk, skb) ?
2023 nonagle : TCP_NAGLE_PUSH))))
2024 break;
2025 } else {
2026 if (!push_one &&
2027 tcp_tso_should_defer(sk, skb, &is_cwnd_limited,
2028 max_segs))
2029 break;
2030 }
2031
2032 limit = mss_now;
2033 if (tso_segs > 1 && max_segs && !tcp_urg_mode(tp))
2034 limit = tcp_mss_split_point(sk, skb, mss_now,
2035 min_t(unsigned int,
2036 cwnd_quota,
2037 max_segs),
2038 nonagle);
2039
2040 if (skb->len > limit &&
2041 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
2042 break;
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054 limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
2055 limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
2056
2057 if (atomic_read(&sk->sk_wmem_alloc) > limit) {
2058 set_bit(TSQ_THROTTLED, &tp->tsq_flags);
2059
2060
2061
2062
2063 smp_mb__after_atomic();
2064 if (atomic_read(&sk->sk_wmem_alloc) > limit)
2065 break;
2066 }
2067
2068 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
2069 break;
2070
2071repair:
2072
2073
2074
2075 tcp_event_new_data_sent(sk, skb);
2076
2077 tcp_minshall_update(tp, mss_now, skb);
2078 sent_pkts += tcp_skb_pcount(skb);
2079
2080 if (push_one)
2081 break;
2082 }
2083
2084 if (likely(sent_pkts)) {
2085 if (tcp_in_cwnd_reduction(sk))
2086 tp->prr_out += sent_pkts;
2087
2088
2089 if (push_one != 2)
2090 tcp_schedule_loss_probe(sk);
2091 tcp_cwnd_validate(sk, is_cwnd_limited);
2092 return false;
2093 }
2094 return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));
2095}
2096
2097bool tcp_schedule_loss_probe(struct sock *sk)
2098{
2099 struct inet_connection_sock *icsk = inet_csk(sk);
2100 struct tcp_sock *tp = tcp_sk(sk);
2101 u32 timeout, tlp_time_stamp, rto_time_stamp;
2102 u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
2103
2104 if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS))
2105 return false;
2106
2107 if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
2108 tcp_rearm_rto(sk);
2109 return false;
2110 }
2111
2112
2113
2114 if (sk->sk_state == TCP_SYN_RECV)
2115 return false;
2116
2117
2118 if (icsk->icsk_pending != ICSK_TIME_RETRANS)
2119 return false;
2120
2121
2122
2123
2124 if (sysctl_tcp_early_retrans < 3 || !tp->srtt_us || !tp->packets_out ||
2125 !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
2126 return false;
2127
2128 if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
2129 tcp_send_head(sk))
2130 return false;
2131
2132
2133
2134
2135 timeout = rtt << 1;
2136 if (tp->packets_out == 1)
2137 timeout = max_t(u32, timeout,
2138 (rtt + (rtt >> 1) + TCP_DELACK_MAX));
2139 timeout = max_t(u32, timeout, msecs_to_jiffies(10));
2140
2141
2142 tlp_time_stamp = tcp_time_stamp + timeout;
2143 rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout;
2144 if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) {
2145 s32 delta = rto_time_stamp - tcp_time_stamp;
2146 if (delta > 0)
2147 timeout = delta;
2148 }
2149
2150 inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
2151 TCP_RTO_MAX);
2152 return true;
2153}
2154
2155
2156
2157
2158
2159
2160static bool skb_still_in_host_queue(const struct sock *sk,
2161 const struct sk_buff *skb)
2162{
2163 if (unlikely(skb_fclone_busy(sk, skb))) {
2164 NET_INC_STATS_BH(sock_net(sk),
2165 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
2166 return true;
2167 }
2168 return false;
2169}
2170
2171
2172
2173
2174void tcp_send_loss_probe(struct sock *sk)
2175{
2176 struct tcp_sock *tp = tcp_sk(sk);
2177 struct sk_buff *skb;
2178 int pcount;
2179 int mss = tcp_current_mss(sk);
2180 int err = -1;
2181
2182 if (tcp_send_head(sk) != NULL) {
2183 err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
2184 goto rearm_timer;
2185 }
2186
2187
2188 if (tp->tlp_high_seq)
2189 goto rearm_timer;
2190
2191
2192 skb = tcp_write_queue_tail(sk);
2193 if (WARN_ON(!skb))
2194 goto rearm_timer;
2195
2196 if (skb_still_in_host_queue(sk, skb))
2197 goto rearm_timer;
2198
2199 pcount = tcp_skb_pcount(skb);
2200 if (WARN_ON(!pcount))
2201 goto rearm_timer;
2202
2203 if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
2204 if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss,
2205 GFP_ATOMIC)))
2206 goto rearm_timer;
2207 skb = tcp_write_queue_tail(sk);
2208 }
2209
2210 if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
2211 goto rearm_timer;
2212
2213 err = __tcp_retransmit_skb(sk, skb);
2214
2215
2216 if (likely(!err))
2217 tp->tlp_high_seq = tp->snd_nxt;
2218
2219rearm_timer:
2220 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2221 inet_csk(sk)->icsk_rto,
2222 TCP_RTO_MAX);
2223
2224 if (likely(!err))
2225 NET_INC_STATS_BH(sock_net(sk),
2226 LINUX_MIB_TCPLOSSPROBES);
2227}
2228
2229
2230
2231
2232
2233void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
2234 int nonagle)
2235{
2236
2237
2238
2239
2240 if (unlikely(sk->sk_state == TCP_CLOSE))
2241 return;
2242
2243 if (tcp_write_xmit(sk, cur_mss, nonagle, 0,
2244 sk_gfp_atomic(sk, GFP_ATOMIC)))
2245 tcp_check_probe_timer(sk);
2246}
2247
2248
2249
2250
2251void tcp_push_one(struct sock *sk, unsigned int mss_now)
2252{
2253 struct sk_buff *skb = tcp_send_head(sk);
2254
2255 BUG_ON(!skb || skb->len < mss_now);
2256
2257 tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation);
2258}
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312u32 __tcp_select_window(struct sock *sk)
2313{
2314 struct inet_connection_sock *icsk = inet_csk(sk);
2315 struct tcp_sock *tp = tcp_sk(sk);
2316
2317
2318
2319
2320
2321
2322 int mss = icsk->icsk_ack.rcv_mss;
2323 int free_space = tcp_space(sk);
2324 int allowed_space = tcp_full_space(sk);
2325 int full_space = min_t(int, tp->window_clamp, allowed_space);
2326 int window;
2327
2328 if (mss > full_space)
2329 mss = full_space;
2330
2331 if (free_space < (full_space >> 1)) {
2332 icsk->icsk_ack.quick = 0;
2333
2334 if (sk_under_memory_pressure(sk))
2335 tp->rcv_ssthresh = min(tp->rcv_ssthresh,
2336 4U * tp->advmss);
2337
2338
2339
2340
2341 free_space = round_down(free_space, 1 << tp->rx_opt.rcv_wscale);
2342
2343
2344
2345
2346
2347
2348
2349
2350 if (free_space < (allowed_space >> 4) || free_space < mss)
2351 return 0;
2352 }
2353
2354 if (free_space > tp->rcv_ssthresh)
2355 free_space = tp->rcv_ssthresh;
2356
2357
2358
2359
2360 window = tp->rcv_wnd;
2361 if (tp->rx_opt.rcv_wscale) {
2362 window = free_space;
2363
2364
2365
2366
2367
2368 if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window)
2369 window = (((window >> tp->rx_opt.rcv_wscale) + 1)
2370 << tp->rx_opt.rcv_wscale);
2371 } else {
2372
2373
2374
2375
2376
2377
2378
2379
2380 if (window <= free_space - mss || window > free_space)
2381 window = (free_space / mss) * mss;
2382 else if (mss == full_space &&
2383 free_space > window + (full_space >> 1))
2384 window = free_space;
2385 }
2386
2387 return window;
2388}
2389
2390
2391static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2392{
2393 struct tcp_sock *tp = tcp_sk(sk);
2394 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
2395 int skb_size, next_skb_size;
2396
2397 skb_size = skb->len;
2398 next_skb_size = next_skb->len;
2399
2400 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
2401
2402 tcp_highest_sack_combine(sk, next_skb, skb);
2403
2404 tcp_unlink_write_queue(next_skb, sk);
2405
2406 skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size),
2407 next_skb_size);
2408
2409 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
2410 skb->ip_summed = CHECKSUM_PARTIAL;
2411
2412 if (skb->ip_summed != CHECKSUM_PARTIAL)
2413 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
2414
2415
2416 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
2417
2418
2419 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags;
2420
2421
2422
2423
2424 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
2425
2426
2427 tcp_clear_retrans_hints_partial(tp);
2428 if (next_skb == tp->retransmit_skb_hint)
2429 tp->retransmit_skb_hint = skb;
2430
2431 tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb));
2432
2433 sk_wmem_free_skb(sk, next_skb);
2434}
2435
2436
2437static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
2438{
2439 if (tcp_skb_pcount(skb) > 1)
2440 return false;
2441
2442 if (skb_shinfo(skb)->nr_frags != 0)
2443 return false;
2444 if (skb_cloned(skb))
2445 return false;
2446 if (skb == tcp_send_head(sk))
2447 return false;
2448
2449 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2450 return false;
2451
2452 return true;
2453}
2454
2455
2456
2457
2458static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2459 int space)
2460{
2461 struct tcp_sock *tp = tcp_sk(sk);
2462 struct sk_buff *skb = to, *tmp;
2463 bool first = true;
2464
2465 if (!sysctl_tcp_retrans_collapse)
2466 return;
2467 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2468 return;
2469
2470 tcp_for_write_queue_from_safe(skb, tmp, sk) {
2471 if (!tcp_can_collapse(sk, skb))
2472 break;
2473
2474 space -= skb->len;
2475
2476 if (first) {
2477 first = false;
2478 continue;
2479 }
2480
2481 if (space < 0)
2482 break;
2483
2484
2485
2486 if (skb->len > skb_availroom(to))
2487 break;
2488
2489 if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
2490 break;
2491
2492 tcp_collapse_retrans(sk, to);
2493 }
2494}
2495
2496
2497
2498
2499
2500int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2501{
2502 struct tcp_sock *tp = tcp_sk(sk);
2503 struct inet_connection_sock *icsk = inet_csk(sk);
2504 unsigned int cur_mss;
2505 int err;
2506
2507
2508 if (icsk->icsk_mtup.probe_size) {
2509 icsk->icsk_mtup.probe_size = 0;
2510 }
2511
2512
2513
2514
2515 if (atomic_read(&sk->sk_wmem_alloc) >
2516 min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
2517 return -EAGAIN;
2518
2519 if (skb_still_in_host_queue(sk, skb))
2520 return -EBUSY;
2521
2522 if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
2523 if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
2524 BUG();
2525 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
2526 return -ENOMEM;
2527 }
2528
2529 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
2530 return -EHOSTUNREACH;
2531
2532 cur_mss = tcp_current_mss(sk);
2533
2534
2535
2536
2537
2538
2539 if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) &&
2540 TCP_SKB_CB(skb)->seq != tp->snd_una)
2541 return -EAGAIN;
2542
2543 if (skb->len > cur_mss) {
2544 if (tcp_fragment(sk, skb, cur_mss, cur_mss, GFP_ATOMIC))
2545 return -ENOMEM;
2546 } else {
2547 int oldpcount = tcp_skb_pcount(skb);
2548
2549 if (unlikely(oldpcount > 1)) {
2550 if (skb_unclone(skb, GFP_ATOMIC))
2551 return -ENOMEM;
2552 tcp_init_tso_segs(sk, skb, cur_mss);
2553 tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
2554 }
2555 }
2556
2557 tcp_retrans_try_collapse(sk, skb, cur_mss);
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567 if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) ||
2568 skb_headroom(skb) >= 0xFFFF)) {
2569 struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
2570 GFP_ATOMIC);
2571 err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
2572 -ENOBUFS;
2573 } else {
2574 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2575 }
2576
2577 if (likely(!err)) {
2578 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
2579
2580 TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
2581 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2582 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
2583 tp->total_retrans++;
2584 }
2585 return err;
2586}
2587
2588int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2589{
2590 struct tcp_sock *tp = tcp_sk(sk);
2591 int err = __tcp_retransmit_skb(sk, skb);
2592
2593 if (err == 0) {
2594#if FASTRETRANS_DEBUG > 0
2595 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2596 net_dbg_ratelimited("retrans_out leaked\n");
2597 }
2598#endif
2599 if (!tp->retrans_out)
2600 tp->lost_retrans_low = tp->snd_nxt;
2601 TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
2602 tp->retrans_out += tcp_skb_pcount(skb);
2603
2604
2605 if (!tp->retrans_stamp)
2606 tp->retrans_stamp = tcp_skb_timestamp(skb);
2607
2608
2609
2610
2611 TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
2612 } else if (err != -EBUSY) {
2613 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
2614 }
2615
2616 if (tp->undo_retrans < 0)
2617 tp->undo_retrans = 0;
2618 tp->undo_retrans += tcp_skb_pcount(skb);
2619 return err;
2620}
2621
2622
2623
2624
2625static bool tcp_can_forward_retransmit(struct sock *sk)
2626{
2627 const struct inet_connection_sock *icsk = inet_csk(sk);
2628 const struct tcp_sock *tp = tcp_sk(sk);
2629
2630
2631 if (icsk->icsk_ca_state != TCP_CA_Recovery)
2632 return false;
2633
2634
2635 if (tcp_is_reno(tp))
2636 return false;
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646 if (tcp_may_send_now(sk))
2647 return false;
2648
2649 return true;
2650}
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660void tcp_xmit_retransmit_queue(struct sock *sk)
2661{
2662 const struct inet_connection_sock *icsk = inet_csk(sk);
2663 struct tcp_sock *tp = tcp_sk(sk);
2664 struct sk_buff *skb;
2665 struct sk_buff *hole = NULL;
2666 u32 last_lost;
2667 int mib_idx;
2668 int fwd_rexmitting = 0;
2669
2670 if (!tp->packets_out)
2671 return;
2672
2673 if (!tp->lost_out)
2674 tp->retransmit_high = tp->snd_una;
2675
2676 if (tp->retransmit_skb_hint) {
2677 skb = tp->retransmit_skb_hint;
2678 last_lost = TCP_SKB_CB(skb)->end_seq;
2679 if (after(last_lost, tp->retransmit_high))
2680 last_lost = tp->retransmit_high;
2681 } else {
2682 skb = tcp_write_queue_head(sk);
2683 last_lost = tp->snd_una;
2684 }
2685
2686 tcp_for_write_queue_from(skb, sk) {
2687 __u8 sacked = TCP_SKB_CB(skb)->sacked;
2688
2689 if (skb == tcp_send_head(sk))
2690 break;
2691
2692 if (hole == NULL)
2693 tp->retransmit_skb_hint = skb;
2694
2695
2696
2697
2698
2699
2700
2701
2702 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
2703 return;
2704
2705 if (fwd_rexmitting) {
2706begin_fwd:
2707 if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
2708 break;
2709 mib_idx = LINUX_MIB_TCPFORWARDRETRANS;
2710
2711 } else if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high)) {
2712 tp->retransmit_high = last_lost;
2713 if (!tcp_can_forward_retransmit(sk))
2714 break;
2715
2716 if (hole != NULL) {
2717 skb = hole;
2718 hole = NULL;
2719 }
2720 fwd_rexmitting = 1;
2721 goto begin_fwd;
2722
2723 } else if (!(sacked & TCPCB_LOST)) {
2724 if (hole == NULL && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
2725 hole = skb;
2726 continue;
2727
2728 } else {
2729 last_lost = TCP_SKB_CB(skb)->end_seq;
2730 if (icsk->icsk_ca_state != TCP_CA_Loss)
2731 mib_idx = LINUX_MIB_TCPFASTRETRANS;
2732 else
2733 mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
2734 }
2735
2736 if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
2737 continue;
2738
2739 if (tcp_retransmit_skb(sk, skb))
2740 return;
2741
2742 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2743
2744 if (tcp_in_cwnd_reduction(sk))
2745 tp->prr_out += tcp_skb_pcount(skb);
2746
2747 if (skb == tcp_write_queue_head(sk))
2748 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2749 inet_csk(sk)->icsk_rto,
2750 TCP_RTO_MAX);
2751 }
2752}
2753
2754
2755
2756
2757void tcp_send_fin(struct sock *sk)
2758{
2759 struct tcp_sock *tp = tcp_sk(sk);
2760 struct sk_buff *skb = tcp_write_queue_tail(sk);
2761 int mss_now;
2762
2763
2764
2765
2766
2767 mss_now = tcp_current_mss(sk);
2768
2769 if (tcp_send_head(sk) != NULL) {
2770 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN;
2771 TCP_SKB_CB(skb)->end_seq++;
2772 tp->write_seq++;
2773 } else {
2774
2775 for (;;) {
2776 skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
2777 if (skb)
2778 break;
2779 yield();
2780 }
2781
2782 tcp_init_nondata_skb(skb, tp->write_seq,
2783 TCPHDR_ACK | TCPHDR_FIN);
2784 tcp_queue_skb(sk, skb);
2785 }
2786 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
2787}
2788
2789
2790
2791
2792
2793
2794void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2795{
2796 struct sk_buff *skb;
2797
2798
2799 skb = alloc_skb(MAX_TCP_HEADER, priority);
2800 if (!skb) {
2801 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2802 return;
2803 }
2804
2805
2806 skb_reserve(skb, MAX_TCP_HEADER);
2807 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
2808 TCPHDR_ACK | TCPHDR_RST);
2809
2810 if (tcp_transmit_skb(sk, skb, 0, priority))
2811 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2812
2813 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
2814}
2815
2816
2817
2818
2819
2820
2821
2822int tcp_send_synack(struct sock *sk)
2823{
2824 struct sk_buff *skb;
2825
2826 skb = tcp_write_queue_head(sk);
2827 if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
2828 pr_debug("%s: wrong queue state\n", __func__);
2829 return -EFAULT;
2830 }
2831 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
2832 if (skb_cloned(skb)) {
2833 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
2834 if (nskb == NULL)
2835 return -ENOMEM;
2836 tcp_unlink_write_queue(skb, sk);
2837 __skb_header_release(nskb);
2838 __tcp_add_write_queue_head(sk, nskb);
2839 sk_wmem_free_skb(sk, skb);
2840 sk->sk_wmem_queued += nskb->truesize;
2841 sk_mem_charge(sk, nskb->truesize);
2842 skb = nskb;
2843 }
2844
2845 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK;
2846 tcp_ecn_send_synack(sk, skb);
2847 }
2848 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2849}
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2861 struct request_sock *req,
2862 struct tcp_fastopen_cookie *foc)
2863{
2864 struct tcp_out_options opts;
2865 struct inet_request_sock *ireq = inet_rsk(req);
2866 struct tcp_sock *tp = tcp_sk(sk);
2867 struct tcphdr *th;
2868 struct sk_buff *skb;
2869 struct tcp_md5sig_key *md5;
2870 int tcp_header_size;
2871 int mss;
2872
2873 skb = sock_wmalloc(sk, MAX_TCP_HEADER, 1, GFP_ATOMIC);
2874 if (unlikely(!skb)) {
2875 dst_release(dst);
2876 return NULL;
2877 }
2878
2879 skb_reserve(skb, MAX_TCP_HEADER);
2880
2881 skb_dst_set(skb, dst);
2882 security_skb_owned_by(skb, sk);
2883
2884 mss = dst_metric_advmss(dst);
2885 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
2886 mss = tp->rx_opt.user_mss;
2887
2888 memset(&opts, 0, sizeof(opts));
2889#ifdef CONFIG_SYN_COOKIES
2890 if (unlikely(req->cookie_ts))
2891 skb->skb_mstamp.stamp_jiffies = cookie_init_timestamp(req);
2892 else
2893#endif
2894 skb_mstamp_get(&skb->skb_mstamp);
2895 tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5,
2896 foc) + sizeof(*th);
2897
2898 skb_push(skb, tcp_header_size);
2899 skb_reset_transport_header(skb);
2900
2901 th = tcp_hdr(skb);
2902 memset(th, 0, sizeof(struct tcphdr));
2903 th->syn = 1;
2904 th->ack = 1;
2905 tcp_ecn_make_synack(req, th, sk);
2906 th->source = htons(ireq->ir_num);
2907 th->dest = ireq->ir_rmt_port;
2908
2909
2910
2911 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
2912 TCPHDR_SYN | TCPHDR_ACK);
2913
2914 th->seq = htonl(TCP_SKB_CB(skb)->seq);
2915
2916 th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
2917
2918
2919 th->window = htons(min(req->rcv_wnd, 65535U));
2920 tcp_options_write((__be32 *)(th + 1), tp, &opts);
2921 th->doff = (tcp_header_size >> 2);
2922 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_OUTSEGS);
2923
2924#ifdef CONFIG_TCP_MD5SIG
2925
2926 if (md5) {
2927 tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
2928 md5, NULL, req, skb);
2929 }
2930#endif
2931
2932 return skb;
2933}
2934EXPORT_SYMBOL(tcp_make_synack);
2935
2936static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst)
2937{
2938 struct inet_connection_sock *icsk = inet_csk(sk);
2939 const struct tcp_congestion_ops *ca;
2940 u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
2941
2942 if (ca_key == TCP_CA_UNSPEC)
2943 return;
2944
2945 rcu_read_lock();
2946 ca = tcp_ca_find_key(ca_key);
2947 if (likely(ca && try_module_get(ca->owner))) {
2948 module_put(icsk->icsk_ca_ops->owner);
2949 icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst);
2950 icsk->icsk_ca_ops = ca;
2951 }
2952 rcu_read_unlock();
2953}
2954
2955
2956static void tcp_connect_init(struct sock *sk)
2957{
2958 const struct dst_entry *dst = __sk_dst_get(sk);
2959 struct tcp_sock *tp = tcp_sk(sk);
2960 __u8 rcv_wscale;
2961
2962
2963
2964
2965 tp->tcp_header_len = sizeof(struct tcphdr) +
2966 (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
2967
2968#ifdef CONFIG_TCP_MD5SIG
2969 if (tp->af_specific->md5_lookup(sk, sk) != NULL)
2970 tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
2971#endif
2972
2973
2974 if (tp->rx_opt.user_mss)
2975 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
2976 tp->max_window = 0;
2977 tcp_mtup_init(sk);
2978 tcp_sync_mss(sk, dst_mtu(dst));
2979
2980 tcp_ca_dst_init(sk, dst);
2981
2982 if (!tp->window_clamp)
2983 tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
2984 tp->advmss = dst_metric_advmss(dst);
2985 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss)
2986 tp->advmss = tp->rx_opt.user_mss;
2987
2988 tcp_initialize_rcv_mss(sk);
2989
2990
2991 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2992 (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
2993 tp->window_clamp = tcp_full_space(sk);
2994
2995 tcp_select_initial_window(tcp_full_space(sk),
2996 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
2997 &tp->rcv_wnd,
2998 &tp->window_clamp,
2999 sysctl_tcp_window_scaling,
3000 &rcv_wscale,
3001 dst_metric(dst, RTAX_INITRWND));
3002
3003 tp->rx_opt.rcv_wscale = rcv_wscale;
3004 tp->rcv_ssthresh = tp->rcv_wnd;
3005
3006 sk->sk_err = 0;
3007 sock_reset_flag(sk, SOCK_DONE);
3008 tp->snd_wnd = 0;
3009 tcp_init_wl(tp, 0);
3010 tp->snd_una = tp->write_seq;
3011 tp->snd_sml = tp->write_seq;
3012 tp->snd_up = tp->write_seq;
3013 tp->snd_nxt = tp->write_seq;
3014
3015 if (likely(!tp->repair))
3016 tp->rcv_nxt = 0;
3017 else
3018 tp->rcv_tstamp = tcp_time_stamp;
3019 tp->rcv_wup = tp->rcv_nxt;
3020 tp->copied_seq = tp->rcv_nxt;
3021
3022 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
3023 inet_csk(sk)->icsk_retransmits = 0;
3024 tcp_clear_retrans(tp);
3025}
3026
3027static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
3028{
3029 struct tcp_sock *tp = tcp_sk(sk);
3030 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
3031
3032 tcb->end_seq += skb->len;
3033 __skb_header_release(skb);
3034 __tcp_add_write_queue_tail(sk, skb);
3035 sk->sk_wmem_queued += skb->truesize;
3036 sk_mem_charge(sk, skb->truesize);
3037 tp->write_seq = tcb->end_seq;
3038 tp->packets_out += tcp_skb_pcount(skb);
3039}
3040
3041
3042
3043
3044
3045
3046
3047
3048static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3049{
3050 struct tcp_sock *tp = tcp_sk(sk);
3051 struct tcp_fastopen_request *fo = tp->fastopen_req;
3052 int syn_loss = 0, space, err = 0, copied;
3053 unsigned long last_syn_loss = 0;
3054 struct sk_buff *syn_data;
3055
3056 tp->rx_opt.mss_clamp = tp->advmss;
3057 tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie,
3058 &syn_loss, &last_syn_loss);
3059
3060 if (syn_loss > 1 &&
3061 time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) {
3062 fo->cookie.len = -1;
3063 goto fallback;
3064 }
3065
3066 if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE)
3067 fo->cookie.len = -1;
3068 else if (fo->cookie.len <= 0)
3069 goto fallback;
3070
3071
3072
3073
3074
3075 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->rx_opt.mss_clamp)
3076 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
3077 space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
3078 MAX_TCP_OPTION_SPACE;
3079
3080 space = min_t(size_t, space, fo->size);
3081
3082
3083 space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));
3084
3085 syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation);
3086 if (!syn_data)
3087 goto fallback;
3088 syn_data->ip_summed = CHECKSUM_PARTIAL;
3089 memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
3090 copied = copy_from_iter(skb_put(syn_data, space), space,
3091 &fo->data->msg_iter);
3092 if (unlikely(!copied)) {
3093 kfree_skb(syn_data);
3094 goto fallback;
3095 }
3096 if (copied != space) {
3097 skb_trim(syn_data, copied);
3098 space = copied;
3099 }
3100
3101
3102 if (space == fo->size)
3103 fo->data = NULL;
3104 fo->copied = space;
3105
3106 tcp_connect_queue_skb(sk, syn_data);
3107
3108 err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
3109
3110 syn->skb_mstamp = syn_data->skb_mstamp;
3111
3112
3113
3114
3115
3116
3117 TCP_SKB_CB(syn_data)->seq++;
3118 TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH;
3119 if (!err) {
3120 tp->syn_data = (fo->copied > 0);
3121 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
3122 goto done;
3123 }
3124
3125fallback:
3126
3127 if (fo->cookie.len > 0)
3128 fo->cookie.len = 0;
3129 err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation);
3130 if (err)
3131 tp->syn_fastopen = 0;
3132done:
3133 fo->cookie.len = -1;
3134 return err;
3135}
3136
3137
3138int tcp_connect(struct sock *sk)
3139{
3140 struct tcp_sock *tp = tcp_sk(sk);
3141 struct sk_buff *buff;
3142 int err;
3143
3144 tcp_connect_init(sk);
3145
3146 if (unlikely(tp->repair)) {
3147 tcp_finish_connect(sk, NULL);
3148 return 0;
3149 }
3150
3151 buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
3152 if (unlikely(!buff))
3153 return -ENOBUFS;
3154
3155 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
3156 tp->retrans_stamp = tcp_time_stamp;
3157 tcp_connect_queue_skb(sk, buff);
3158 tcp_ecn_send_syn(sk, buff);
3159
3160
3161 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
3162 tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
3163 if (err == -ECONNREFUSED)
3164 return err;
3165
3166
3167
3168
3169 tp->snd_nxt = tp->write_seq;
3170 tp->pushed_seq = tp->write_seq;
3171 TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
3172
3173
3174 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3175 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
3176 return 0;
3177}
3178EXPORT_SYMBOL(tcp_connect);
3179
3180
3181
3182
3183
3184void tcp_send_delayed_ack(struct sock *sk)
3185{
3186 struct inet_connection_sock *icsk = inet_csk(sk);
3187 int ato = icsk->icsk_ack.ato;
3188 unsigned long timeout;
3189
3190 tcp_ca_event(sk, CA_EVENT_DELAYED_ACK);
3191
3192 if (ato > TCP_DELACK_MIN) {
3193 const struct tcp_sock *tp = tcp_sk(sk);
3194 int max_ato = HZ / 2;
3195
3196 if (icsk->icsk_ack.pingpong ||
3197 (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
3198 max_ato = TCP_DELACK_MAX;
3199
3200
3201
3202
3203
3204
3205
3206 if (tp->srtt_us) {
3207 int rtt = max_t(int, usecs_to_jiffies(tp->srtt_us >> 3),
3208 TCP_DELACK_MIN);
3209
3210 if (rtt < max_ato)
3211 max_ato = rtt;
3212 }
3213
3214 ato = min(ato, max_ato);
3215 }
3216
3217
3218 timeout = jiffies + ato;
3219
3220
3221 if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
3222
3223
3224
3225 if (icsk->icsk_ack.blocked ||
3226 time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
3227 tcp_send_ack(sk);
3228 return;
3229 }
3230
3231 if (!time_before(timeout, icsk->icsk_ack.timeout))
3232 timeout = icsk->icsk_ack.timeout;
3233 }
3234 icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
3235 icsk->icsk_ack.timeout = timeout;
3236 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
3237}
3238
3239
3240void tcp_send_ack(struct sock *sk)
3241{
3242 struct sk_buff *buff;
3243
3244
3245 if (sk->sk_state == TCP_CLOSE)
3246 return;
3247
3248 tcp_ca_event(sk, CA_EVENT_NON_DELAYED_ACK);
3249
3250
3251
3252
3253
3254 buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
3255 if (buff == NULL) {
3256 inet_csk_schedule_ack(sk);
3257 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
3258 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
3259 TCP_DELACK_MAX, TCP_RTO_MAX);
3260 return;
3261 }
3262
3263
3264 skb_reserve(buff, MAX_TCP_HEADER);
3265 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
3266
3267
3268
3269
3270
3271
3272
3273 skb_set_tcp_pure_ack(buff);
3274
3275
3276 skb_mstamp_get(&buff->skb_mstamp);
3277 tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));
3278}
3279EXPORT_SYMBOL_GPL(tcp_send_ack);
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
3293{
3294 struct tcp_sock *tp = tcp_sk(sk);
3295 struct sk_buff *skb;
3296
3297
3298 skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
3299 if (skb == NULL)
3300 return -1;
3301
3302
3303 skb_reserve(skb, MAX_TCP_HEADER);
3304
3305
3306
3307
3308 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
3309 skb_mstamp_get(&skb->skb_mstamp);
3310 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
3311}
3312
3313void tcp_send_window_probe(struct sock *sk)
3314{
3315 if (sk->sk_state == TCP_ESTABLISHED) {
3316 tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
3317 tcp_xmit_probe_skb(sk, 0);
3318 }
3319}
3320
3321
3322int tcp_write_wakeup(struct sock *sk)
3323{
3324 struct tcp_sock *tp = tcp_sk(sk);
3325 struct sk_buff *skb;
3326
3327 if (sk->sk_state == TCP_CLOSE)
3328 return -1;
3329
3330 if ((skb = tcp_send_head(sk)) != NULL &&
3331 before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
3332 int err;
3333 unsigned int mss = tcp_current_mss(sk);
3334 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
3335
3336 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
3337 tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
3338
3339
3340
3341
3342
3343 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
3344 skb->len > mss) {
3345 seg_size = min(seg_size, mss);
3346 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3347 if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC))
3348 return -1;
3349 } else if (!tcp_skb_pcount(skb))
3350 tcp_set_skb_tso_segs(sk, skb, mss);
3351
3352 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3353 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
3354 if (!err)
3355 tcp_event_new_data_sent(sk, skb);
3356 return err;
3357 } else {
3358 if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
3359 tcp_xmit_probe_skb(sk, 1);
3360 return tcp_xmit_probe_skb(sk, 0);
3361 }
3362}
3363
3364
3365
3366
3367void tcp_send_probe0(struct sock *sk)
3368{
3369 struct inet_connection_sock *icsk = inet_csk(sk);
3370 struct tcp_sock *tp = tcp_sk(sk);
3371 unsigned long probe_max;
3372 int err;
3373
3374 err = tcp_write_wakeup(sk);
3375
3376 if (tp->packets_out || !tcp_send_head(sk)) {
3377
3378 icsk->icsk_probes_out = 0;
3379 icsk->icsk_backoff = 0;
3380 return;
3381 }
3382
3383 if (err <= 0) {
3384 if (icsk->icsk_backoff < sysctl_tcp_retries2)
3385 icsk->icsk_backoff++;
3386 icsk->icsk_probes_out++;
3387 probe_max = TCP_RTO_MAX;
3388 } else {
3389
3390
3391
3392
3393
3394
3395 if (!icsk->icsk_probes_out)
3396 icsk->icsk_probes_out = 1;
3397 probe_max = TCP_RESOURCE_PROBE_INTERVAL;
3398 }
3399 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3400 inet_csk_rto_backoff(icsk, probe_max),
3401 TCP_RTO_MAX);
3402}
3403
3404int tcp_rtx_synack(struct sock *sk, struct request_sock *req)
3405{
3406 const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific;
3407 struct flowi fl;
3408 int res;
3409
3410 res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL);
3411 if (!res) {
3412 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
3413 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
3414 }
3415 return res;
3416}
3417EXPORT_SYMBOL(tcp_rtx_synack);
3418