1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37#define pr_fmt(fmt) "TCP: " fmt
38
39#include <net/tcp.h>
40
41#include <linux/compiler.h>
42#include <linux/gfp.h>
43#include <linux/module.h>
44
45
46int sysctl_tcp_retrans_collapse __read_mostly = 1;
47
48
49
50
51int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
52
53
54int sysctl_tcp_limit_output_bytes __read_mostly = 262144;
55
56
57
58
59
60int sysctl_tcp_tso_win_divisor __read_mostly = 3;
61
62int sysctl_tcp_mtu_probing __read_mostly = 0;
63int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
64
65
66int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
67
68unsigned int sysctl_tcp_notsent_lowat __read_mostly = UINT_MAX;
69EXPORT_SYMBOL(sysctl_tcp_notsent_lowat);
70
71static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
72 int push_one, gfp_t gfp);
73
74
75static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
76{
77 struct inet_connection_sock *icsk = inet_csk(sk);
78 struct tcp_sock *tp = tcp_sk(sk);
79 unsigned int prior_packets = tp->packets_out;
80
81 tcp_advance_send_head(sk, skb);
82 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
83
84 tp->packets_out += tcp_skb_pcount(skb);
85 if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
86 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
87 tcp_rearm_rto(sk);
88 }
89
90 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
91 tcp_skb_pcount(skb));
92}
93
94
95
96
97
98
99
100static inline __u32 tcp_acceptable_seq(const struct sock *sk)
101{
102 const struct tcp_sock *tp = tcp_sk(sk);
103
104 if (!before(tcp_wnd_end(tp), tp->snd_nxt))
105 return tp->snd_nxt;
106 else
107 return tcp_wnd_end(tp);
108}
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124static __u16 tcp_advertise_mss(struct sock *sk)
125{
126 struct tcp_sock *tp = tcp_sk(sk);
127 const struct dst_entry *dst = __sk_dst_get(sk);
128 int mss = tp->advmss;
129
130 if (dst) {
131 unsigned int metric = dst_metric_advmss(dst);
132
133 if (metric < mss) {
134 mss = metric;
135 tp->advmss = mss;
136 }
137 }
138
139 return (__u16)mss;
140}
141
142
143
144static void tcp_cwnd_restart(struct sock *sk, const struct dst_entry *dst)
145{
146 struct tcp_sock *tp = tcp_sk(sk);
147 s32 delta = tcp_time_stamp - tp->lsndtime;
148 u32 restart_cwnd = tcp_init_cwnd(tp, dst);
149 u32 cwnd = tp->snd_cwnd;
150
151 tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
152
153 tp->snd_ssthresh = tcp_current_ssthresh(sk);
154 restart_cwnd = min(restart_cwnd, cwnd);
155
156 while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
157 cwnd >>= 1;
158 tp->snd_cwnd = max(cwnd, restart_cwnd);
159 tp->snd_cwnd_stamp = tcp_time_stamp;
160 tp->snd_cwnd_used = 0;
161}
162
163
164static void tcp_event_data_sent(struct tcp_sock *tp,
165 struct sock *sk)
166{
167 struct inet_connection_sock *icsk = inet_csk(sk);
168 const u32 now = tcp_time_stamp;
169
170 if (sysctl_tcp_slow_start_after_idle &&
171 (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto))
172 tcp_cwnd_restart(sk, __sk_dst_get(sk));
173
174 tp->lsndtime = now;
175
176
177
178
179 if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
180 icsk->icsk_ack.pingpong = 1;
181}
182
183
184static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
185{
186 tcp_dec_quickack_mode(sk, pkts);
187 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
188}
189
190
191u32 tcp_default_init_rwnd(u32 mss)
192{
193
194
195
196
197
198 u32 init_rwnd = TCP_INIT_CWND * 2;
199
200 if (mss > 1460)
201 init_rwnd = max((1460 * init_rwnd) / mss, 2U);
202 return init_rwnd;
203}
204
205
206
207
208
209
210
211
212void tcp_select_initial_window(int __space, __u32 mss,
213 __u32 *rcv_wnd, __u32 *window_clamp,
214 int wscale_ok, __u8 *rcv_wscale,
215 __u32 init_rcv_wnd)
216{
217 unsigned int space = (__space < 0 ? 0 : __space);
218
219
220 if (*window_clamp == 0)
221 (*window_clamp) = (65535 << 14);
222 space = min(*window_clamp, space);
223
224
225 if (space > mss)
226 space = (space / mss) * mss;
227
228
229
230
231
232
233
234
235
236 if (sysctl_tcp_workaround_signed_windows)
237 (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
238 else
239 (*rcv_wnd) = space;
240
241 (*rcv_wscale) = 0;
242 if (wscale_ok) {
243
244
245
246 space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
247 space = min_t(u32, space, *window_clamp);
248 while (space > 65535 && (*rcv_wscale) < 14) {
249 space >>= 1;
250 (*rcv_wscale)++;
251 }
252 }
253
254 if (mss > (1 << *rcv_wscale)) {
255 if (!init_rcv_wnd)
256 init_rcv_wnd = tcp_default_init_rwnd(mss);
257 *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
258 }
259
260
261 (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
262}
263EXPORT_SYMBOL(tcp_select_initial_window);
264
265
266
267
268
269
270static u16 tcp_select_window(struct sock *sk)
271{
272 struct tcp_sock *tp = tcp_sk(sk);
273 u32 old_win = tp->rcv_wnd;
274 u32 cur_win = tcp_receive_window(tp);
275 u32 new_win = __tcp_select_window(sk);
276
277
278 if (new_win < cur_win) {
279
280
281
282
283
284
285
286 if (new_win == 0)
287 NET_INC_STATS(sock_net(sk),
288 LINUX_MIB_TCPWANTZEROWINDOWADV);
289 new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
290 }
291 tp->rcv_wnd = new_win;
292 tp->rcv_wup = tp->rcv_nxt;
293
294
295
296
297 if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
298 new_win = min(new_win, MAX_TCP_WINDOW);
299 else
300 new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
301
302
303 new_win >>= tp->rx_opt.rcv_wscale;
304
305
306 if (new_win == 0) {
307 tp->pred_flags = 0;
308 if (old_win)
309 NET_INC_STATS(sock_net(sk),
310 LINUX_MIB_TCPTOZEROWINDOWADV);
311 } else if (old_win == 0) {
312 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV);
313 }
314
315 return new_win;
316}
317
318
319static inline void TCP_ECN_send_synack(struct sock *sk, struct sk_buff *skb)
320{
321 const struct tcp_sock *tp = tcp_sk(sk);
322
323 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
324 if (!(tp->ecn_flags & TCP_ECN_OK))
325 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
326 else if (tcp_ca_needs_ecn(sk))
327 INET_ECN_xmit(sk);
328}
329
330
331static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
332{
333 struct tcp_sock *tp = tcp_sk(sk);
334 bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
335 tcp_ca_needs_ecn(sk);
336
337 if (!use_ecn) {
338 const struct dst_entry *dst = __sk_dst_get(sk);
339
340 if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
341 use_ecn = true;
342 }
343
344 tp->ecn_flags = 0;
345
346 if (use_ecn) {
347 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
348 tp->ecn_flags = TCP_ECN_OK;
349 if (tcp_ca_needs_ecn(sk))
350 INET_ECN_xmit(sk);
351 }
352}
353
354static __inline__ void
355TCP_ECN_make_synack(const struct request_sock *req, struct tcphdr *th,
356 struct sock *sk)
357{
358 if (inet_rsk(req)->ecn_ok) {
359 th->ece = 1;
360 if (tcp_ca_needs_ecn(sk))
361 INET_ECN_xmit(sk);
362 }
363}
364
365
366
367
368static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
369 int tcp_header_len)
370{
371 struct tcp_sock *tp = tcp_sk(sk);
372
373 if (tp->ecn_flags & TCP_ECN_OK) {
374
375 if (skb->len != tcp_header_len &&
376 !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
377 INET_ECN_xmit(sk);
378 if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) {
379 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
380 tcp_hdr(skb)->cwr = 1;
381 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
382 }
383 } else if (!tcp_ca_needs_ecn(sk)) {
384
385 INET_ECN_dontxmit(sk);
386 }
387 if (tp->ecn_flags & TCP_ECN_DEMAND_CWR)
388 tcp_hdr(skb)->ece = 1;
389 }
390}
391
392
393
394
395static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
396{
397 struct skb_shared_info *shinfo = skb_shinfo(skb);
398
399 skb->ip_summed = CHECKSUM_PARTIAL;
400 skb->csum = 0;
401
402 TCP_SKB_CB(skb)->tcp_flags = flags;
403 TCP_SKB_CB(skb)->sacked = 0;
404
405 shinfo->gso_segs = 1;
406 shinfo->gso_size = 0;
407 shinfo->gso_type = 0;
408
409 TCP_SKB_CB(skb)->seq = seq;
410 if (flags & (TCPHDR_SYN | TCPHDR_FIN))
411 seq++;
412 TCP_SKB_CB(skb)->end_seq = seq;
413}
414
415static inline bool tcp_urg_mode(const struct tcp_sock *tp)
416{
417 return tp->snd_una != tp->snd_up;
418}
419
420#define OPTION_SACK_ADVERTISE (1 << 0)
421#define OPTION_TS (1 << 1)
422#define OPTION_MD5 (1 << 2)
423#define OPTION_WSCALE (1 << 3)
424#define OPTION_FAST_OPEN_COOKIE (1 << 8)
425
426struct tcp_out_options {
427 u16 options;
428 u16 mss;
429 u8 ws;
430 u8 num_sack_blocks;
431 u8 hash_size;
432 __u8 *hash_location;
433 __u32 tsval, tsecr;
434 struct tcp_fastopen_cookie *fastopen_cookie;
435};
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
451 struct tcp_out_options *opts)
452{
453 u16 options = opts->options;
454
455 if (unlikely(OPTION_MD5 & options)) {
456 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
457 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
458
459 opts->hash_location = (__u8 *)ptr;
460 ptr += 4;
461 }
462
463 if (unlikely(opts->mss)) {
464 *ptr++ = htonl((TCPOPT_MSS << 24) |
465 (TCPOLEN_MSS << 16) |
466 opts->mss);
467 }
468
469 if (likely(OPTION_TS & options)) {
470 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
471 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
472 (TCPOLEN_SACK_PERM << 16) |
473 (TCPOPT_TIMESTAMP << 8) |
474 TCPOLEN_TIMESTAMP);
475 options &= ~OPTION_SACK_ADVERTISE;
476 } else {
477 *ptr++ = htonl((TCPOPT_NOP << 24) |
478 (TCPOPT_NOP << 16) |
479 (TCPOPT_TIMESTAMP << 8) |
480 TCPOLEN_TIMESTAMP);
481 }
482 *ptr++ = htonl(opts->tsval);
483 *ptr++ = htonl(opts->tsecr);
484 }
485
486 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
487 *ptr++ = htonl((TCPOPT_NOP << 24) |
488 (TCPOPT_NOP << 16) |
489 (TCPOPT_SACK_PERM << 8) |
490 TCPOLEN_SACK_PERM);
491 }
492
493 if (unlikely(OPTION_WSCALE & options)) {
494 *ptr++ = htonl((TCPOPT_NOP << 24) |
495 (TCPOPT_WINDOW << 16) |
496 (TCPOLEN_WINDOW << 8) |
497 opts->ws);
498 }
499
500 if (unlikely(opts->num_sack_blocks)) {
501 struct tcp_sack_block *sp = tp->rx_opt.dsack ?
502 tp->duplicate_sack : tp->selective_acks;
503 int this_sack;
504
505 *ptr++ = htonl((TCPOPT_NOP << 24) |
506 (TCPOPT_NOP << 16) |
507 (TCPOPT_SACK << 8) |
508 (TCPOLEN_SACK_BASE + (opts->num_sack_blocks *
509 TCPOLEN_SACK_PERBLOCK)));
510
511 for (this_sack = 0; this_sack < opts->num_sack_blocks;
512 ++this_sack) {
513 *ptr++ = htonl(sp[this_sack].start_seq);
514 *ptr++ = htonl(sp[this_sack].end_seq);
515 }
516
517 tp->rx_opt.dsack = 0;
518 }
519
520 if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) {
521 struct tcp_fastopen_cookie *foc = opts->fastopen_cookie;
522 u8 *p = (u8 *)ptr;
523 u32 len;
524
525 if (foc->exp) {
526 len = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
527 *ptr = htonl((TCPOPT_EXP << 24) | (len << 16) |
528 TCPOPT_FASTOPEN_MAGIC);
529 p += TCPOLEN_EXP_FASTOPEN_BASE;
530 } else {
531 len = TCPOLEN_FASTOPEN_BASE + foc->len;
532 *p++ = TCPOPT_FASTOPEN;
533 *p++ = len;
534 }
535
536 memcpy(p, foc->val, foc->len);
537 if ((len & 3) == 2) {
538 p[foc->len] = TCPOPT_NOP;
539 p[foc->len + 1] = TCPOPT_NOP;
540 }
541 ptr += (len + 3) >> 2;
542 }
543}
544
545
546
547
548static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
549 struct tcp_out_options *opts,
550 struct tcp_md5sig_key **md5)
551{
552 struct tcp_sock *tp = tcp_sk(sk);
553 unsigned int remaining = MAX_TCP_OPTION_SPACE;
554 struct tcp_fastopen_request *fastopen = tp->fastopen_req;
555
556#ifdef CONFIG_TCP_MD5SIG
557 *md5 = tp->af_specific->md5_lookup(sk, sk);
558 if (*md5) {
559 opts->options |= OPTION_MD5;
560 remaining -= TCPOLEN_MD5SIG_ALIGNED;
561 }
562#else
563 *md5 = NULL;
564#endif
565
566
567
568
569
570
571
572
573
574
575 opts->mss = tcp_advertise_mss(sk);
576 remaining -= TCPOLEN_MSS_ALIGNED;
577
578 if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
579 opts->options |= OPTION_TS;
580 opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
581 opts->tsecr = tp->rx_opt.ts_recent;
582 remaining -= TCPOLEN_TSTAMP_ALIGNED;
583 }
584 if (likely(sysctl_tcp_window_scaling)) {
585 opts->ws = tp->rx_opt.rcv_wscale;
586 opts->options |= OPTION_WSCALE;
587 remaining -= TCPOLEN_WSCALE_ALIGNED;
588 }
589 if (likely(sysctl_tcp_sack)) {
590 opts->options |= OPTION_SACK_ADVERTISE;
591 if (unlikely(!(OPTION_TS & opts->options)))
592 remaining -= TCPOLEN_SACKPERM_ALIGNED;
593 }
594
595 if (fastopen && fastopen->cookie.len >= 0) {
596 u32 need = fastopen->cookie.len;
597
598 need += fastopen->cookie.exp ? TCPOLEN_EXP_FASTOPEN_BASE :
599 TCPOLEN_FASTOPEN_BASE;
600 need = (need + 3) & ~3U;
601 if (remaining >= need) {
602 opts->options |= OPTION_FAST_OPEN_COOKIE;
603 opts->fastopen_cookie = &fastopen->cookie;
604 remaining -= need;
605 tp->syn_fastopen = 1;
606 tp->syn_fastopen_exp = fastopen->cookie.exp ? 1 : 0;
607 }
608 }
609
610 return MAX_TCP_OPTION_SPACE - remaining;
611}
612
613
614static unsigned int tcp_synack_options(struct sock *sk,
615 struct request_sock *req,
616 unsigned int mss, struct sk_buff *skb,
617 struct tcp_out_options *opts,
618 struct tcp_md5sig_key **md5,
619 struct tcp_fastopen_cookie *foc)
620{
621 struct inet_request_sock *ireq = inet_rsk(req);
622 unsigned int remaining = MAX_TCP_OPTION_SPACE;
623
624#ifdef CONFIG_TCP_MD5SIG
625 *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
626 if (*md5) {
627 opts->options |= OPTION_MD5;
628 remaining -= TCPOLEN_MD5SIG_ALIGNED;
629
630
631
632
633
634
635 ireq->tstamp_ok &= !ireq->sack_ok;
636 }
637#else
638 *md5 = NULL;
639#endif
640
641
642 opts->mss = mss;
643 remaining -= TCPOLEN_MSS_ALIGNED;
644
645 if (likely(ireq->wscale_ok)) {
646 opts->ws = ireq->rcv_wscale;
647 opts->options |= OPTION_WSCALE;
648 remaining -= TCPOLEN_WSCALE_ALIGNED;
649 }
650 if (likely(ireq->tstamp_ok)) {
651 opts->options |= OPTION_TS;
652 opts->tsval = tcp_skb_timestamp(skb);
653 opts->tsecr = req->ts_recent;
654 remaining -= TCPOLEN_TSTAMP_ALIGNED;
655 }
656 if (likely(ireq->sack_ok)) {
657 opts->options |= OPTION_SACK_ADVERTISE;
658 if (unlikely(!ireq->tstamp_ok))
659 remaining -= TCPOLEN_SACKPERM_ALIGNED;
660 }
661 if (foc != NULL && foc->len >= 0) {
662 u32 need = foc->len;
663
664 need += foc->exp ? TCPOLEN_EXP_FASTOPEN_BASE :
665 TCPOLEN_FASTOPEN_BASE;
666 need = (need + 3) & ~3U;
667 if (remaining >= need) {
668 opts->options |= OPTION_FAST_OPEN_COOKIE;
669 opts->fastopen_cookie = foc;
670 remaining -= need;
671 }
672 }
673
674 return MAX_TCP_OPTION_SPACE - remaining;
675}
676
677
678
679
680static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb,
681 struct tcp_out_options *opts,
682 struct tcp_md5sig_key **md5)
683{
684 struct tcp_sock *tp = tcp_sk(sk);
685 unsigned int size = 0;
686 unsigned int eff_sacks;
687
688 opts->options = 0;
689
690#ifdef CONFIG_TCP_MD5SIG
691 *md5 = tp->af_specific->md5_lookup(sk, sk);
692 if (unlikely(*md5)) {
693 opts->options |= OPTION_MD5;
694 size += TCPOLEN_MD5SIG_ALIGNED;
695 }
696#else
697 *md5 = NULL;
698#endif
699
700 if (likely(tp->rx_opt.tstamp_ok)) {
701 opts->options |= OPTION_TS;
702 opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0;
703 opts->tsecr = tp->rx_opt.ts_recent;
704 size += TCPOLEN_TSTAMP_ALIGNED;
705 }
706
707 eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
708 if (unlikely(eff_sacks)) {
709 const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
710 opts->num_sack_blocks =
711 min_t(unsigned int, eff_sacks,
712 (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
713 TCPOLEN_SACK_PERBLOCK);
714 size += TCPOLEN_SACK_BASE_ALIGNED +
715 opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
716 }
717
718 return size;
719}
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736struct tsq_tasklet {
737 struct tasklet_struct tasklet;
738 struct list_head head;
739};
740static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);
741
742static void tcp_tsq_handler(struct sock *sk)
743{
744 if ((1 << sk->sk_state) &
745 (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
746 TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
747 tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle,
748 0, GFP_ATOMIC);
749}
750
751
752
753
754
755
756static void tcp_tasklet_func(unsigned long data)
757{
758 struct tsq_tasklet *tsq = (struct tsq_tasklet *)data;
759 LIST_HEAD(list);
760 unsigned long flags;
761 struct list_head *q, *n;
762 struct tcp_sock *tp;
763 struct sock *sk;
764
765 local_irq_save(flags);
766 list_splice_init(&tsq->head, &list);
767 local_irq_restore(flags);
768
769 list_for_each_safe(q, n, &list) {
770 tp = list_entry(q, struct tcp_sock, tsq_node);
771 list_del(&tp->tsq_node);
772
773 sk = (struct sock *)tp;
774 bh_lock_sock(sk);
775
776 if (!sock_owned_by_user(sk)) {
777 tcp_tsq_handler(sk);
778 } else {
779
780 set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags);
781 }
782 bh_unlock_sock(sk);
783
784 clear_bit(TSQ_QUEUED, &tp->tsq_flags);
785 sk_free(sk);
786 }
787}
788
789#define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \
790 (1UL << TCP_WRITE_TIMER_DEFERRED) | \
791 (1UL << TCP_DELACK_TIMER_DEFERRED) | \
792 (1UL << TCP_MTU_REDUCED_DEFERRED))
793
794
795
796
797
798
799
800void tcp_release_cb(struct sock *sk)
801{
802 struct tcp_sock *tp = tcp_sk(sk);
803 unsigned long flags, nflags;
804
805
806 do {
807 flags = tp->tsq_flags;
808 if (!(flags & TCP_DEFERRED_ALL))
809 return;
810 nflags = flags & ~TCP_DEFERRED_ALL;
811 } while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags);
812
813 if (flags & (1UL << TCP_TSQ_DEFERRED))
814 tcp_tsq_handler(sk);
815
816
817
818
819
820
821
822
823
824
825 sock_release_ownership(sk);
826
827 if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
828 tcp_write_timer_handler(sk);
829 __sock_put(sk);
830 }
831 if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) {
832 tcp_delack_timer_handler(sk);
833 __sock_put(sk);
834 }
835 if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
836 inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
837 __sock_put(sk);
838 }
839}
840EXPORT_SYMBOL(tcp_release_cb);
841
842void __init tcp_tasklet_init(void)
843{
844 int i;
845
846 for_each_possible_cpu(i) {
847 struct tsq_tasklet *tsq = &per_cpu(tsq_tasklet, i);
848
849 INIT_LIST_HEAD(&tsq->head);
850 tasklet_init(&tsq->tasklet,
851 tcp_tasklet_func,
852 (unsigned long)tsq);
853 }
854}
855
856
857
858
859
860
861void tcp_wfree(struct sk_buff *skb)
862{
863 struct sock *sk = skb->sk;
864 struct tcp_sock *tp = tcp_sk(sk);
865
866 if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) &&
867 !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) {
868 unsigned long flags;
869 struct tsq_tasklet *tsq;
870
871
872
873
874 atomic_sub(skb->truesize - 1, &sk->sk_wmem_alloc);
875
876
877 local_irq_save(flags);
878 tsq = &__get_cpu_var(tsq_tasklet);
879 list_add(&tp->tsq_node, &tsq->head);
880 tasklet_schedule(&tsq->tasklet);
881 local_irq_restore(flags);
882 } else {
883 sock_wfree(skb);
884 }
885}
886
887
888
889
890
891
892
893
894
895
896
897
898static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
899 gfp_t gfp_mask)
900{
901 const struct inet_connection_sock *icsk = inet_csk(sk);
902 struct inet_sock *inet;
903 struct tcp_sock *tp;
904 struct tcp_skb_cb *tcb;
905 struct tcp_out_options opts;
906 unsigned int tcp_options_size, tcp_header_size;
907 struct tcp_md5sig_key *md5;
908 struct tcphdr *th;
909 int err;
910
911 BUG_ON(!skb || !tcp_skb_pcount(skb));
912
913 if (clone_it) {
914 skb_mstamp_get(&skb->skb_mstamp);
915
916 if (unlikely(skb_cloned(skb)))
917 skb = pskb_copy(skb, gfp_mask);
918 else
919 skb = skb_clone(skb, gfp_mask);
920 if (unlikely(!skb))
921 return -ENOBUFS;
922 }
923
924 inet = inet_sk(sk);
925 tp = tcp_sk(sk);
926 tcb = TCP_SKB_CB(skb);
927 memset(&opts, 0, sizeof(opts));
928
929 if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
930 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
931 else
932 tcp_options_size = tcp_established_options(sk, skb, &opts,
933 &md5);
934 tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
935
936 if (tcp_packets_in_flight(tp) == 0)
937 tcp_ca_event(sk, CA_EVENT_TX_START);
938
939
940
941
942 skb->ooo_okay = sk_wmem_alloc_get(sk) == 0;
943
944 skb_push(skb, tcp_header_size);
945 skb_reset_transport_header(skb);
946
947 skb_orphan(skb);
948 skb->sk = sk;
949 skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree;
950 skb_set_hash_from_sk(skb, sk);
951 atomic_add(skb->truesize, &sk->sk_wmem_alloc);
952
953 skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm);
954
955
956 th = tcp_hdr(skb);
957 th->source = inet->inet_sport;
958 th->dest = inet->inet_dport;
959 th->seq = htonl(tcb->seq);
960 th->ack_seq = htonl(tp->rcv_nxt);
961 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
962 tcb->tcp_flags);
963
964 if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
965
966
967
968 th->window = htons(min(tp->rcv_wnd, 65535U));
969 } else {
970 th->window = htons(tcp_select_window(sk));
971 }
972 th->check = 0;
973 th->urg_ptr = 0;
974
975
976 if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
977 if (before(tp->snd_up, tcb->seq + 0x10000)) {
978 th->urg_ptr = htons(tp->snd_up - tcb->seq);
979 th->urg = 1;
980 } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
981 th->urg_ptr = htons(0xFFFF);
982 th->urg = 1;
983 }
984 }
985
986 tcp_options_write((__be32 *)(th + 1), tp, &opts);
987 if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0))
988 TCP_ECN_send(sk, skb, tcp_header_size);
989
990#ifdef CONFIG_TCP_MD5SIG
991
992 if (md5) {
993 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
994 tp->af_specific->calc_md5_hash(opts.hash_location,
995 md5, sk, NULL, skb);
996 }
997#endif
998
999 icsk->icsk_af_ops->send_check(sk, skb);
1000
1001 if (likely(tcb->tcp_flags & TCPHDR_ACK))
1002 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
1003
1004 if (skb->len != tcp_header_size)
1005 tcp_event_data_sent(tp, sk);
1006
1007 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
1008 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
1009 tcp_skb_pcount(skb));
1010
1011 tp->segs_out += tcp_skb_pcount(skb);
1012
1013 skb->tstamp.tv64 = 0;
1014 err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl);
1015
1016 if (likely(err <= 0))
1017 return err;
1018
1019 tcp_enter_cwr(sk);
1020
1021 return net_xmit_eval(err);
1022}
1023
1024
1025
1026
1027
1028
1029static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
1030{
1031 struct tcp_sock *tp = tcp_sk(sk);
1032
1033
1034 tp->write_seq = TCP_SKB_CB(skb)->end_seq;
1035 __skb_header_release(skb);
1036 tcp_add_write_queue_tail(sk, skb);
1037 sk->sk_wmem_queued += skb->truesize;
1038 sk_mem_charge(sk, skb->truesize);
1039}
1040
1041
1042static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
1043 unsigned int mss_now)
1044{
1045 struct skb_shared_info *shinfo = skb_shinfo(skb);
1046
1047
1048 WARN_ON_ONCE(skb_cloned(skb));
1049
1050 if (skb->len <= mss_now || !sk_can_gso(sk) ||
1051 skb->ip_summed == CHECKSUM_NONE) {
1052
1053
1054
1055 shinfo->gso_segs = 1;
1056 shinfo->gso_size = 0;
1057 shinfo->gso_type = 0;
1058 } else {
1059 shinfo->gso_segs = DIV_ROUND_UP(skb->len, mss_now);
1060 shinfo->gso_size = mss_now;
1061 shinfo->gso_type = sk->sk_gso_type;
1062 }
1063}
1064
1065
1066
1067
1068static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb,
1069 int decr)
1070{
1071 struct tcp_sock *tp = tcp_sk(sk);
1072
1073 if (!tp->sacked_out || tcp_is_reno(tp))
1074 return;
1075
1076 if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
1077 tp->fackets_out -= decr;
1078}
1079
1080
1081
1082
1083static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
1084{
1085 struct tcp_sock *tp = tcp_sk(sk);
1086
1087 tp->packets_out -= decr;
1088
1089 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1090 tp->sacked_out -= decr;
1091 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
1092 tp->retrans_out -= decr;
1093 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
1094 tp->lost_out -= decr;
1095
1096
1097 if (tcp_is_reno(tp) && decr > 0)
1098 tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
1099
1100 tcp_adjust_fackets_out(sk, skb, decr);
1101
1102 if (tp->lost_skb_hint &&
1103 before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
1104 (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)))
1105 tp->lost_cnt_hint -= decr;
1106
1107 tcp_verify_left_out(tp);
1108}
1109
1110
1111
1112
1113
1114
1115int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1116 unsigned int mss_now)
1117{
1118 struct tcp_sock *tp = tcp_sk(sk);
1119 struct sk_buff *buff;
1120 int nsize, old_factor;
1121 int nlen;
1122 u8 flags;
1123
1124 if (WARN_ON(len > skb->len))
1125 return -EINVAL;
1126
1127 nsize = skb_headlen(skb) - len;
1128 if (nsize < 0)
1129 nsize = 0;
1130
1131 if (skb_unclone(skb, GFP_ATOMIC))
1132 return -ENOMEM;
1133
1134
1135 buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
1136 if (buff == NULL)
1137 return -ENOMEM;
1138
1139 sk->sk_wmem_queued += buff->truesize;
1140 sk_mem_charge(sk, buff->truesize);
1141 nlen = skb->len - len - nsize;
1142 buff->truesize += nlen;
1143 skb->truesize -= nlen;
1144
1145
1146 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1147 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1148 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1149
1150
1151 flags = TCP_SKB_CB(skb)->tcp_flags;
1152 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1153 TCP_SKB_CB(buff)->tcp_flags = flags;
1154 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
1155
1156 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
1157
1158 buff->csum = csum_partial_copy_nocheck(skb->data + len,
1159 skb_put(buff, nsize),
1160 nsize, 0);
1161
1162 skb_trim(skb, len);
1163
1164 skb->csum = csum_block_sub(skb->csum, buff->csum, len);
1165 } else {
1166 skb->ip_summed = CHECKSUM_PARTIAL;
1167 skb_split(skb, buff, len);
1168 }
1169
1170 buff->ip_summed = skb->ip_summed;
1171
1172 buff->tstamp = skb->tstamp;
1173
1174 old_factor = tcp_skb_pcount(skb);
1175
1176
1177 tcp_set_skb_tso_segs(sk, skb, mss_now);
1178 tcp_set_skb_tso_segs(sk, buff, mss_now);
1179
1180
1181
1182
1183 if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) {
1184 int diff = old_factor - tcp_skb_pcount(skb) -
1185 tcp_skb_pcount(buff);
1186
1187 if (diff)
1188 tcp_adjust_pcount(sk, skb, diff);
1189 }
1190
1191
1192 __skb_header_release(buff);
1193 tcp_insert_write_queue_after(skb, buff, sk);
1194
1195 return 0;
1196}
1197
1198
1199
1200
1201
1202static int __pskb_trim_head(struct sk_buff *skb, int len)
1203{
1204 struct skb_shared_info *shinfo;
1205 int i, k, eat;
1206
1207 eat = min_t(int, len, skb_headlen(skb));
1208 if (eat) {
1209 __skb_pull(skb, eat);
1210 len -= eat;
1211 if (!len)
1212 return 0;
1213 }
1214 eat = len;
1215 k = 0;
1216 shinfo = skb_shinfo(skb);
1217 for (i = 0; i < shinfo->nr_frags; i++) {
1218 int size = skb_frag_size(&shinfo->frags[i]);
1219
1220 if (size <= eat) {
1221 skb_frag_unref(skb, i);
1222 eat -= size;
1223 } else {
1224 shinfo->frags[k] = shinfo->frags[i];
1225 if (eat) {
1226 shinfo->frags[k].page_offset += eat;
1227 skb_frag_size_sub(&shinfo->frags[k], eat);
1228 eat = 0;
1229 }
1230 k++;
1231 }
1232 }
1233 shinfo->nr_frags = k;
1234
1235 skb_reset_tail_pointer(skb);
1236 skb->data_len -= len;
1237 skb->len = skb->data_len;
1238 return len;
1239}
1240
1241
1242int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
1243{
1244 u32 delta_truesize;
1245
1246 if (skb_unclone(skb, GFP_ATOMIC))
1247 return -ENOMEM;
1248
1249 delta_truesize = __pskb_trim_head(skb, len);
1250
1251 TCP_SKB_CB(skb)->seq += len;
1252 skb->ip_summed = CHECKSUM_PARTIAL;
1253
1254 if (delta_truesize) {
1255 skb->truesize -= delta_truesize;
1256 sk->sk_wmem_queued -= delta_truesize;
1257 sk_mem_uncharge(sk, delta_truesize);
1258 sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
1259 }
1260
1261
1262 if (tcp_skb_pcount(skb) > 1)
1263 tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb));
1264
1265 return 0;
1266}
1267
1268
1269static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
1270{
1271 const struct tcp_sock *tp = tcp_sk(sk);
1272 const struct inet_connection_sock *icsk = inet_csk(sk);
1273 int mss_now;
1274
1275
1276
1277
1278 mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr);
1279
1280
1281 if (icsk->icsk_af_ops->net_frag_header_len) {
1282 const struct dst_entry *dst = __sk_dst_get(sk);
1283
1284 if (dst && dst_allfrag(dst))
1285 mss_now -= icsk->icsk_af_ops->net_frag_header_len;
1286 }
1287
1288
1289 if (mss_now > tp->rx_opt.mss_clamp)
1290 mss_now = tp->rx_opt.mss_clamp;
1291
1292
1293 mss_now -= icsk->icsk_ext_hdr_len;
1294
1295
1296 if (mss_now < 48)
1297 mss_now = 48;
1298 return mss_now;
1299}
1300
1301
1302int tcp_mtu_to_mss(struct sock *sk, int pmtu)
1303{
1304
1305 return __tcp_mtu_to_mss(sk, pmtu) -
1306 (tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr));
1307}
1308
1309
1310int tcp_mss_to_mtu(struct sock *sk, int mss)
1311{
1312 const struct tcp_sock *tp = tcp_sk(sk);
1313 const struct inet_connection_sock *icsk = inet_csk(sk);
1314 int mtu;
1315
1316 mtu = mss +
1317 tp->tcp_header_len +
1318 icsk->icsk_ext_hdr_len +
1319 icsk->icsk_af_ops->net_header_len;
1320
1321
1322 if (icsk->icsk_af_ops->net_frag_header_len) {
1323 const struct dst_entry *dst = __sk_dst_get(sk);
1324
1325 if (dst && dst_allfrag(dst))
1326 mtu += icsk->icsk_af_ops->net_frag_header_len;
1327 }
1328 return mtu;
1329}
1330
1331
1332void tcp_mtup_init(struct sock *sk)
1333{
1334 struct tcp_sock *tp = tcp_sk(sk);
1335 struct inet_connection_sock *icsk = inet_csk(sk);
1336
1337 icsk->icsk_mtup.enabled = sysctl_tcp_mtu_probing > 1;
1338 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
1339 icsk->icsk_af_ops->net_header_len;
1340 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss);
1341 icsk->icsk_mtup.probe_size = 0;
1342}
1343EXPORT_SYMBOL(tcp_mtup_init);
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
1368{
1369 struct tcp_sock *tp = tcp_sk(sk);
1370 struct inet_connection_sock *icsk = inet_csk(sk);
1371 int mss_now;
1372
1373 if (icsk->icsk_mtup.search_high > pmtu)
1374 icsk->icsk_mtup.search_high = pmtu;
1375
1376 mss_now = tcp_mtu_to_mss(sk, pmtu);
1377 mss_now = tcp_bound_to_half_wnd(tp, mss_now);
1378
1379
1380 icsk->icsk_pmtu_cookie = pmtu;
1381 if (icsk->icsk_mtup.enabled)
1382 mss_now = min(mss_now, tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low));
1383 tp->mss_cache = mss_now;
1384
1385 return mss_now;
1386}
1387EXPORT_SYMBOL(tcp_sync_mss);
1388
1389
1390
1391
1392unsigned int tcp_current_mss(struct sock *sk)
1393{
1394 const struct tcp_sock *tp = tcp_sk(sk);
1395 const struct dst_entry *dst = __sk_dst_get(sk);
1396 u32 mss_now;
1397 unsigned int header_len;
1398 struct tcp_out_options opts;
1399 struct tcp_md5sig_key *md5;
1400
1401 mss_now = tp->mss_cache;
1402
1403 if (dst) {
1404 u32 mtu = dst_mtu(dst);
1405 if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
1406 mss_now = tcp_sync_mss(sk, mtu);
1407 }
1408
1409 header_len = tcp_established_options(sk, NULL, &opts, &md5) +
1410 sizeof(struct tcphdr);
1411
1412
1413
1414
1415 if (header_len != tp->tcp_header_len) {
1416 int delta = (int) header_len - tp->tcp_header_len;
1417 mss_now -= delta;
1418 }
1419
1420 return mss_now;
1421}
1422
1423
1424
1425
1426
1427static void tcp_cwnd_application_limited(struct sock *sk)
1428{
1429 struct tcp_sock *tp = tcp_sk(sk);
1430
1431 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
1432 sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
1433
1434 u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
1435 u32 win_used = max(tp->snd_cwnd_used, init_win);
1436 if (win_used < tp->snd_cwnd) {
1437 tp->snd_ssthresh = tcp_current_ssthresh(sk);
1438 tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
1439 }
1440 tp->snd_cwnd_used = 0;
1441 }
1442 tp->snd_cwnd_stamp = tcp_time_stamp;
1443}
1444
1445static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
1446{
1447 struct tcp_sock *tp = tcp_sk(sk);
1448
1449
1450
1451
1452 if (!before(tp->snd_una, tp->max_packets_seq) ||
1453 tp->packets_out > tp->max_packets_out) {
1454 tp->max_packets_out = tp->packets_out;
1455 tp->max_packets_seq = tp->snd_nxt;
1456 tp->is_cwnd_limited = is_cwnd_limited;
1457 }
1458
1459 if (tcp_is_cwnd_limited(sk)) {
1460
1461 tp->snd_cwnd_used = 0;
1462 tp->snd_cwnd_stamp = tcp_time_stamp;
1463 } else {
1464
1465 if (tp->packets_out > tp->snd_cwnd_used)
1466 tp->snd_cwnd_used = tp->packets_out;
1467
1468 if (sysctl_tcp_slow_start_after_idle &&
1469 (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
1470 tcp_cwnd_application_limited(sk);
1471 }
1472}
1473
1474
1475static bool tcp_minshall_check(const struct tcp_sock *tp)
1476{
1477 return after(tp->snd_sml, tp->snd_una) &&
1478 !after(tp->snd_sml, tp->snd_nxt);
1479}
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
1490 const struct sk_buff *skb)
1491{
1492 if (skb->len < tcp_skb_pcount(skb) * mss_now)
1493 tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
1494}
1495
1496
1497
1498
1499
1500
1501
1502
1503static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
1504 int nonagle)
1505{
1506 return partial &&
1507 ((nonagle & TCP_NAGLE_CORK) ||
1508 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1509}
1510
1511
1512
1513
1514static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now)
1515{
1516 u32 bytes, segs;
1517
1518 bytes = min(sk->sk_pacing_rate >> 10,
1519 sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
1520
1521
1522
1523
1524
1525
1526 segs = max_t(u32, bytes / mss_now, sysctl_tcp_min_tso_segs);
1527
1528 return min_t(u32, segs, sk->sk_gso_max_segs);
1529}
1530
1531
1532static unsigned int tcp_mss_split_point(const struct sock *sk,
1533 const struct sk_buff *skb,
1534 unsigned int mss_now,
1535 unsigned int max_segs,
1536 int nonagle)
1537{
1538 const struct tcp_sock *tp = tcp_sk(sk);
1539 u32 partial, needed, window, max_len;
1540
1541 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1542 max_len = mss_now * max_segs;
1543
1544 if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
1545 return max_len;
1546
1547 needed = min(skb->len, window);
1548
1549 if (max_len <= needed)
1550 return max_len;
1551
1552 partial = needed % mss_now;
1553
1554
1555
1556
1557 if (tcp_nagle_check(partial != 0, tp, nonagle))
1558 return needed - partial;
1559
1560 return needed;
1561}
1562
1563
1564
1565
1566static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
1567 const struct sk_buff *skb)
1568{
1569 u32 in_flight, cwnd, halfcwnd;
1570
1571
1572 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
1573 tcp_skb_pcount(skb) == 1)
1574 return 1;
1575
1576 in_flight = tcp_packets_in_flight(tp);
1577 cwnd = tp->snd_cwnd;
1578 if (in_flight >= cwnd)
1579 return 0;
1580
1581
1582
1583
1584 halfcwnd = max(cwnd >> 1, 1U);
1585 return min(halfcwnd, cwnd - in_flight);
1586}
1587
1588
1589
1590
1591
1592static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb,
1593 unsigned int mss_now)
1594{
1595 int tso_segs = tcp_skb_pcount(skb);
1596
1597 if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
1598 tcp_set_skb_tso_segs(sk, skb, mss_now);
1599 tso_segs = tcp_skb_pcount(skb);
1600 }
1601 return tso_segs;
1602}
1603
1604
1605
1606
1607
1608static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
1609 unsigned int cur_mss, int nonagle)
1610{
1611
1612
1613
1614
1615
1616
1617 if (nonagle & TCP_NAGLE_PUSH)
1618 return true;
1619
1620
1621 if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
1622 return true;
1623
1624 if (!tcp_nagle_check(skb->len < cur_mss, tp, nonagle))
1625 return true;
1626
1627 return false;
1628}
1629
1630
1631static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
1632 const struct sk_buff *skb,
1633 unsigned int cur_mss)
1634{
1635 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
1636
1637 if (skb->len > cur_mss)
1638 end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
1639
1640 return !after(end_seq, tcp_wnd_end(tp));
1641}
1642
1643
1644
1645
1646
1647static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
1648 unsigned int cur_mss, int nonagle)
1649{
1650 const struct tcp_sock *tp = tcp_sk(sk);
1651 unsigned int cwnd_quota;
1652
1653 tcp_init_tso_segs(sk, skb, cur_mss);
1654
1655 if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
1656 return 0;
1657
1658 cwnd_quota = tcp_cwnd_test(tp, skb);
1659 if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
1660 cwnd_quota = 0;
1661
1662 return cwnd_quota;
1663}
1664
1665
1666bool tcp_may_send_now(struct sock *sk)
1667{
1668 const struct tcp_sock *tp = tcp_sk(sk);
1669 struct sk_buff *skb = tcp_send_head(sk);
1670
1671 return skb &&
1672 tcp_snd_test(sk, skb, tcp_current_mss(sk),
1673 (tcp_skb_is_last(sk, skb) ?
1674 tp->nonagle : TCP_NAGLE_PUSH));
1675}
1676
1677
1678
1679
1680
1681
1682
1683
1684static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1685 unsigned int mss_now, gfp_t gfp)
1686{
1687 struct sk_buff *buff;
1688 int nlen = skb->len - len;
1689 u8 flags;
1690
1691
1692 if (skb->len != skb->data_len)
1693 return tcp_fragment(sk, skb, len, mss_now);
1694
1695 buff = sk_stream_alloc_skb(sk, 0, gfp);
1696 if (unlikely(buff == NULL))
1697 return -ENOMEM;
1698
1699 sk->sk_wmem_queued += buff->truesize;
1700 sk_mem_charge(sk, buff->truesize);
1701 buff->truesize += nlen;
1702 skb->truesize -= nlen;
1703
1704
1705 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1706 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1707 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1708
1709
1710 flags = TCP_SKB_CB(skb)->tcp_flags;
1711 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1712 TCP_SKB_CB(buff)->tcp_flags = flags;
1713
1714
1715 TCP_SKB_CB(buff)->sacked = 0;
1716
1717 buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
1718 skb_split(skb, buff, len);
1719
1720
1721 tcp_set_skb_tso_segs(sk, skb, mss_now);
1722 tcp_set_skb_tso_segs(sk, buff, mss_now);
1723
1724
1725 __skb_header_release(buff);
1726 tcp_insert_write_queue_after(skb, buff, sk);
1727
1728 return 0;
1729}
1730
1731
1732
1733
1734
1735
1736static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1737 bool *is_cwnd_limited, u32 max_segs)
1738{
1739 struct tcp_sock *tp = tcp_sk(sk);
1740 const struct inet_connection_sock *icsk = inet_csk(sk);
1741 u32 send_win, cong_win, limit, in_flight;
1742 int win_divisor;
1743
1744 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1745 goto send_now;
1746
1747 if (icsk->icsk_ca_state != TCP_CA_Open)
1748 goto send_now;
1749
1750
1751 if (tp->tso_deferred &&
1752 (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1)
1753 goto send_now;
1754
1755 in_flight = tcp_packets_in_flight(tp);
1756
1757 BUG_ON(tcp_skb_pcount(skb) <= 1);
1758 BUG_ON(tp->snd_cwnd <= in_flight);
1759
1760 send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1761
1762
1763 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
1764
1765 limit = min(send_win, cong_win);
1766
1767
1768 if (limit >= max_segs * tp->mss_cache)
1769 goto send_now;
1770
1771
1772 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
1773 goto send_now;
1774
1775 win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor);
1776 if (win_divisor) {
1777 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
1778
1779
1780
1781
1782 chunk /= win_divisor;
1783 if (limit >= chunk)
1784 goto send_now;
1785 } else {
1786
1787
1788
1789
1790
1791 if (limit > tcp_max_tso_deferred_mss(tp) * tp->mss_cache)
1792 goto send_now;
1793 }
1794
1795
1796
1797
1798 if (!tp->tso_deferred)
1799 tp->tso_deferred = 1 | (jiffies << 1);
1800
1801 if (cong_win < send_win && cong_win < skb->len)
1802 *is_cwnd_limited = true;
1803
1804 return true;
1805
1806send_now:
1807 tp->tso_deferred = 0;
1808 return false;
1809}
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820static int tcp_mtu_probe(struct sock *sk)
1821{
1822 struct tcp_sock *tp = tcp_sk(sk);
1823 struct inet_connection_sock *icsk = inet_csk(sk);
1824 struct sk_buff *skb, *nskb, *next;
1825 int len;
1826 int probe_size;
1827 int size_needed;
1828 int copy;
1829 int mss_now;
1830
1831
1832
1833
1834
1835 if (!icsk->icsk_mtup.enabled ||
1836 icsk->icsk_mtup.probe_size ||
1837 inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
1838 tp->snd_cwnd < 11 ||
1839 tp->rx_opt.num_sacks || tp->rx_opt.dsack)
1840 return -1;
1841
1842
1843 mss_now = tcp_current_mss(sk);
1844 probe_size = 2 * tp->mss_cache;
1845 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
1846 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
1847
1848 return -1;
1849 }
1850
1851
1852 if (tp->write_seq - tp->snd_nxt < size_needed)
1853 return -1;
1854
1855 if (tp->snd_wnd < size_needed)
1856 return -1;
1857 if (after(tp->snd_nxt + size_needed, tcp_wnd_end(tp)))
1858 return 0;
1859
1860
1861 if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
1862 if (!tcp_packets_in_flight(tp))
1863 return -1;
1864 else
1865 return 0;
1866 }
1867
1868
1869 if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL)
1870 return -1;
1871 sk->sk_wmem_queued += nskb->truesize;
1872 sk_mem_charge(sk, nskb->truesize);
1873
1874 skb = tcp_send_head(sk);
1875
1876 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
1877 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
1878 TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
1879 TCP_SKB_CB(nskb)->sacked = 0;
1880 nskb->csum = 0;
1881 nskb->ip_summed = skb->ip_summed;
1882
1883 tcp_insert_write_queue_before(nskb, skb, sk);
1884
1885 len = 0;
1886 tcp_for_write_queue_from_safe(skb, next, sk) {
1887 copy = min_t(int, skb->len, probe_size - len);
1888 if (nskb->ip_summed)
1889 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
1890 else
1891 nskb->csum = skb_copy_and_csum_bits(skb, 0,
1892 skb_put(nskb, copy),
1893 copy, nskb->csum);
1894
1895 if (skb->len <= copy) {
1896
1897
1898 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1899 tcp_unlink_write_queue(skb, sk);
1900 sk_wmem_free_skb(sk, skb);
1901 } else {
1902 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
1903 ~(TCPHDR_FIN|TCPHDR_PSH);
1904 if (!skb_shinfo(skb)->nr_frags) {
1905 skb_pull(skb, copy);
1906 if (skb->ip_summed != CHECKSUM_PARTIAL)
1907 skb->csum = csum_partial(skb->data,
1908 skb->len, 0);
1909 } else {
1910 __pskb_trim_head(skb, copy);
1911 tcp_set_skb_tso_segs(sk, skb, mss_now);
1912 }
1913 TCP_SKB_CB(skb)->seq += copy;
1914 }
1915
1916 len += copy;
1917
1918 if (len >= probe_size)
1919 break;
1920 }
1921 tcp_init_tso_segs(sk, nskb, nskb->len);
1922
1923
1924
1925
1926 if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
1927
1928
1929 tp->snd_cwnd--;
1930 tcp_event_new_data_sent(sk, nskb);
1931
1932 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
1933 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
1934 tp->mtu_probe.probe_seq_end = TCP_SKB_CB(nskb)->end_seq;
1935
1936 return 1;
1937 }
1938
1939 return -1;
1940}
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1957 int push_one, gfp_t gfp)
1958{
1959 struct tcp_sock *tp = tcp_sk(sk);
1960 struct sk_buff *skb;
1961 unsigned int tso_segs, sent_pkts;
1962 int cwnd_quota;
1963 int result;
1964 bool is_cwnd_limited = false;
1965 u32 max_segs;
1966
1967 sent_pkts = 0;
1968
1969 if (!push_one) {
1970
1971 result = tcp_mtu_probe(sk);
1972 if (!result) {
1973 return false;
1974 } else if (result > 0) {
1975 sent_pkts = 1;
1976 }
1977 }
1978
1979 max_segs = tcp_tso_autosize(sk, mss_now);
1980 while ((skb = tcp_send_head(sk))) {
1981 unsigned int limit;
1982
1983 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
1984 BUG_ON(!tso_segs);
1985
1986 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
1987
1988 skb_mstamp_get(&skb->skb_mstamp);
1989 goto repair;
1990 }
1991
1992 cwnd_quota = tcp_cwnd_test(tp, skb);
1993 if (!cwnd_quota) {
1994 is_cwnd_limited = true;
1995 if (push_one == 2)
1996
1997 cwnd_quota = 1;
1998 else
1999 break;
2000 }
2001
2002 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
2003 break;
2004
2005 if (tso_segs == 1) {
2006 if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
2007 (tcp_skb_is_last(sk, skb) ?
2008 nonagle : TCP_NAGLE_PUSH))))
2009 break;
2010 } else {
2011 if (!push_one &&
2012 tcp_tso_should_defer(sk, skb, &is_cwnd_limited,
2013 max_segs))
2014 break;
2015 }
2016
2017 limit = mss_now;
2018 if (tso_segs > 1 && !tcp_urg_mode(tp))
2019 limit = tcp_mss_split_point(sk, skb, mss_now,
2020 min_t(unsigned int,
2021 cwnd_quota,
2022 max_segs),
2023 nonagle);
2024
2025 if (skb->len > limit &&
2026 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
2027 break;
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039 limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
2040 limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
2041
2042 if (atomic_read(&sk->sk_wmem_alloc) > limit) {
2043 set_bit(TSQ_THROTTLED, &tp->tsq_flags);
2044
2045
2046
2047
2048
2049
2050 smp_mb__after_clear_bit();
2051 if (atomic_read(&sk->sk_wmem_alloc) > limit)
2052 break;
2053 }
2054
2055 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
2056 break;
2057
2058repair:
2059
2060
2061
2062 tcp_event_new_data_sent(sk, skb);
2063
2064 tcp_minshall_update(tp, mss_now, skb);
2065 sent_pkts += tcp_skb_pcount(skb);
2066
2067 if (push_one)
2068 break;
2069 }
2070
2071 if (likely(sent_pkts)) {
2072 if (tcp_in_cwnd_reduction(sk))
2073 tp->prr_out += sent_pkts;
2074
2075
2076 if (push_one != 2)
2077 tcp_schedule_loss_probe(sk);
2078 tcp_cwnd_validate(sk, is_cwnd_limited);
2079 return false;
2080 }
2081 return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));
2082}
2083
2084bool tcp_schedule_loss_probe(struct sock *sk)
2085{
2086 struct inet_connection_sock *icsk = inet_csk(sk);
2087 struct tcp_sock *tp = tcp_sk(sk);
2088 u32 timeout, tlp_time_stamp, rto_time_stamp;
2089 u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
2090
2091 if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS))
2092 return false;
2093
2094 if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
2095 tcp_rearm_rto(sk);
2096 return false;
2097 }
2098
2099
2100
2101 if (sk->sk_state == TCP_SYN_RECV)
2102 return false;
2103
2104
2105 if (icsk->icsk_pending != ICSK_TIME_RETRANS)
2106 return false;
2107
2108
2109
2110
2111 if (sysctl_tcp_early_retrans < 3 || !tp->srtt_us || !tp->packets_out ||
2112 !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
2113 return false;
2114
2115 if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
2116 tcp_send_head(sk))
2117 return false;
2118
2119
2120
2121
2122 timeout = rtt << 1;
2123 if (tp->packets_out == 1)
2124 timeout = max_t(u32, timeout,
2125 (rtt + (rtt >> 1) + TCP_DELACK_MAX));
2126 timeout = max_t(u32, timeout, msecs_to_jiffies(10));
2127
2128
2129 tlp_time_stamp = tcp_time_stamp + timeout;
2130 rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout;
2131 if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) {
2132 s32 delta = rto_time_stamp - tcp_time_stamp;
2133 if (delta > 0)
2134 timeout = delta;
2135 }
2136
2137 inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
2138 TCP_RTO_MAX);
2139 return true;
2140}
2141
2142
2143
2144
2145
2146
2147static bool skb_still_in_host_queue(const struct sock *sk,
2148 const struct sk_buff *skb)
2149{
2150 if (unlikely(skb_fclone_busy(sk, skb))) {
2151 NET_INC_STATS_BH(sock_net(sk),
2152 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
2153 return true;
2154 }
2155 return false;
2156}
2157
2158
2159
2160
2161void tcp_send_loss_probe(struct sock *sk)
2162{
2163 struct tcp_sock *tp = tcp_sk(sk);
2164 struct sk_buff *skb;
2165 int pcount;
2166 int mss = tcp_current_mss(sk);
2167 int err = -1;
2168
2169 if (tcp_send_head(sk) != NULL) {
2170 err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
2171 goto rearm_timer;
2172 }
2173
2174
2175 if (tp->tlp_high_seq)
2176 goto rearm_timer;
2177
2178
2179 skb = tcp_write_queue_tail(sk);
2180 if (WARN_ON(!skb))
2181 goto rearm_timer;
2182
2183 if (skb_still_in_host_queue(sk, skb))
2184 goto rearm_timer;
2185
2186 pcount = tcp_skb_pcount(skb);
2187 if (WARN_ON(!pcount))
2188 goto rearm_timer;
2189
2190 if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
2191 if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss)))
2192 goto rearm_timer;
2193 skb = tcp_write_queue_tail(sk);
2194 }
2195
2196 if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
2197 goto rearm_timer;
2198
2199 err = __tcp_retransmit_skb(sk, skb);
2200
2201
2202 if (likely(!err))
2203 tp->tlp_high_seq = tp->snd_nxt;
2204
2205rearm_timer:
2206 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2207 inet_csk(sk)->icsk_rto,
2208 TCP_RTO_MAX);
2209
2210 if (likely(!err))
2211 NET_INC_STATS_BH(sock_net(sk),
2212 LINUX_MIB_TCPLOSSPROBES);
2213 return;
2214}
2215
2216
2217
2218
2219
2220void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
2221 int nonagle)
2222{
2223
2224
2225
2226
2227 if (unlikely(sk->sk_state == TCP_CLOSE))
2228 return;
2229
2230 if (tcp_write_xmit(sk, cur_mss, nonagle, 0,
2231 sk_gfp_atomic(sk, GFP_ATOMIC)))
2232 tcp_check_probe_timer(sk);
2233}
2234
2235
2236
2237
2238void tcp_push_one(struct sock *sk, unsigned int mss_now)
2239{
2240 struct sk_buff *skb = tcp_send_head(sk);
2241
2242 BUG_ON(!skb || skb->len < mss_now);
2243
2244 tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation);
2245}
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299u32 __tcp_select_window(struct sock *sk)
2300{
2301 struct inet_connection_sock *icsk = inet_csk(sk);
2302 struct tcp_sock *tp = tcp_sk(sk);
2303
2304
2305
2306
2307
2308
2309 int mss = icsk->icsk_ack.rcv_mss;
2310 int free_space = tcp_space(sk);
2311 int allowed_space = tcp_full_space(sk);
2312 int full_space = min_t(int, tp->window_clamp, allowed_space);
2313 int window;
2314
2315 if (unlikely(mss > full_space)) {
2316 mss = full_space;
2317 if (mss <= 0)
2318 return 0;
2319 }
2320 if (free_space < (full_space >> 1)) {
2321 icsk->icsk_ack.quick = 0;
2322
2323 if (tcp_under_memory_pressure(sk))
2324 tp->rcv_ssthresh = min(tp->rcv_ssthresh,
2325 4U * tp->advmss);
2326
2327
2328
2329
2330 free_space = round_down(free_space, 1 << tp->rx_opt.rcv_wscale);
2331
2332
2333
2334
2335
2336
2337
2338
2339 if (free_space < (allowed_space >> 4) || free_space < mss)
2340 return 0;
2341 }
2342
2343 if (free_space > tp->rcv_ssthresh)
2344 free_space = tp->rcv_ssthresh;
2345
2346
2347
2348
2349 window = tp->rcv_wnd;
2350 if (tp->rx_opt.rcv_wscale) {
2351 window = free_space;
2352
2353
2354
2355
2356
2357 if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window)
2358 window = (((window >> tp->rx_opt.rcv_wscale) + 1)
2359 << tp->rx_opt.rcv_wscale);
2360 } else {
2361
2362
2363
2364
2365
2366
2367
2368
2369 if (window <= free_space - mss || window > free_space)
2370 window = (free_space / mss) * mss;
2371 else if (mss == full_space &&
2372 free_space > window + (full_space >> 1))
2373 window = free_space;
2374 }
2375
2376 return window;
2377}
2378
2379
2380static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2381{
2382 struct tcp_sock *tp = tcp_sk(sk);
2383 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
2384 int skb_size, next_skb_size;
2385
2386 skb_size = skb->len;
2387 next_skb_size = next_skb->len;
2388
2389 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
2390
2391 tcp_highest_sack_combine(sk, next_skb, skb);
2392
2393 tcp_unlink_write_queue(next_skb, sk);
2394
2395 skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size),
2396 next_skb_size);
2397
2398 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
2399 skb->ip_summed = CHECKSUM_PARTIAL;
2400
2401 if (skb->ip_summed != CHECKSUM_PARTIAL)
2402 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
2403
2404
2405 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
2406
2407
2408 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags;
2409
2410
2411
2412
2413 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
2414
2415
2416 tcp_clear_retrans_hints_partial(tp);
2417 if (next_skb == tp->retransmit_skb_hint)
2418 tp->retransmit_skb_hint = skb;
2419
2420 tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb));
2421
2422 sk_wmem_free_skb(sk, next_skb);
2423}
2424
2425
2426static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
2427{
2428 if (tcp_skb_pcount(skb) > 1)
2429 return false;
2430
2431 if (skb_shinfo(skb)->nr_frags != 0)
2432 return false;
2433 if (skb_cloned(skb))
2434 return false;
2435 if (skb == tcp_send_head(sk))
2436 return false;
2437
2438 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2439 return false;
2440
2441 return true;
2442}
2443
2444
2445
2446
2447static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2448 int space)
2449{
2450 struct tcp_sock *tp = tcp_sk(sk);
2451 struct sk_buff *skb = to, *tmp;
2452 bool first = true;
2453
2454 if (!sysctl_tcp_retrans_collapse)
2455 return;
2456 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2457 return;
2458
2459 tcp_for_write_queue_from_safe(skb, tmp, sk) {
2460 if (!tcp_can_collapse(sk, skb))
2461 break;
2462
2463 space -= skb->len;
2464
2465 if (first) {
2466 first = false;
2467 continue;
2468 }
2469
2470 if (space < 0)
2471 break;
2472
2473
2474
2475 if (skb->len > skb_availroom(to))
2476 break;
2477
2478 if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
2479 break;
2480
2481 tcp_collapse_retrans(sk, to);
2482 }
2483}
2484
2485
2486
2487
2488
2489int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2490{
2491 struct tcp_sock *tp = tcp_sk(sk);
2492 struct inet_connection_sock *icsk = inet_csk(sk);
2493 unsigned int cur_mss;
2494 int err;
2495
2496
2497 if (icsk->icsk_mtup.probe_size) {
2498 icsk->icsk_mtup.probe_size = 0;
2499 }
2500
2501
2502
2503
2504 if (atomic_read(&sk->sk_wmem_alloc) >
2505 min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
2506 return -EAGAIN;
2507
2508 if (skb_still_in_host_queue(sk, skb))
2509 return -EBUSY;
2510
2511 if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
2512 if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
2513 BUG();
2514 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
2515 return -ENOMEM;
2516 }
2517
2518 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
2519 return -EHOSTUNREACH;
2520
2521 cur_mss = tcp_current_mss(sk);
2522
2523
2524
2525
2526
2527
2528 if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) &&
2529 TCP_SKB_CB(skb)->seq != tp->snd_una)
2530 return -EAGAIN;
2531
2532 if (skb->len > cur_mss) {
2533 if (tcp_fragment(sk, skb, cur_mss, cur_mss))
2534 return -ENOMEM;
2535 } else {
2536 int oldpcount = tcp_skb_pcount(skb);
2537
2538 if (unlikely(oldpcount > 1)) {
2539 if (skb_unclone(skb, GFP_ATOMIC))
2540 return -ENOMEM;
2541 tcp_init_tso_segs(sk, skb, cur_mss);
2542 tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
2543 }
2544 }
2545
2546 tcp_retrans_try_collapse(sk, skb, cur_mss);
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556 if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) ||
2557 skb_headroom(skb) >= 0xFFFF)) {
2558 struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
2559 GFP_ATOMIC);
2560 err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
2561 -ENOBUFS;
2562 } else {
2563 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2564 }
2565
2566 if (likely(!err)) {
2567 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
2568
2569 TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
2570 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2571 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
2572 tp->total_retrans++;
2573 }
2574 return err;
2575}
2576
2577int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2578{
2579 struct tcp_sock *tp = tcp_sk(sk);
2580 int err = __tcp_retransmit_skb(sk, skb);
2581
2582 if (err == 0) {
2583#if FASTRETRANS_DEBUG > 0
2584 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2585 net_dbg_ratelimited("retrans_out leaked\n");
2586 }
2587#endif
2588 if (!tp->retrans_out)
2589 tp->lost_retrans_low = tp->snd_nxt;
2590 TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
2591 tp->retrans_out += tcp_skb_pcount(skb);
2592
2593
2594 if (!tp->retrans_stamp)
2595 tp->retrans_stamp = tcp_skb_timestamp(skb);
2596
2597
2598
2599
2600 TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
2601 } else if (err != -EBUSY) {
2602 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
2603 }
2604
2605 if (tp->undo_retrans < 0)
2606 tp->undo_retrans = 0;
2607 tp->undo_retrans += tcp_skb_pcount(skb);
2608 return err;
2609}
2610
2611
2612
2613
2614static bool tcp_can_forward_retransmit(struct sock *sk)
2615{
2616 const struct inet_connection_sock *icsk = inet_csk(sk);
2617 const struct tcp_sock *tp = tcp_sk(sk);
2618
2619
2620 if (icsk->icsk_ca_state != TCP_CA_Recovery)
2621 return false;
2622
2623
2624 if (tcp_is_reno(tp))
2625 return false;
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635 if (tcp_may_send_now(sk))
2636 return false;
2637
2638 return true;
2639}
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649void tcp_xmit_retransmit_queue(struct sock *sk)
2650{
2651 const struct inet_connection_sock *icsk = inet_csk(sk);
2652 struct tcp_sock *tp = tcp_sk(sk);
2653 struct sk_buff *skb;
2654 struct sk_buff *hole = NULL;
2655 u32 last_lost;
2656 int mib_idx;
2657 int fwd_rexmitting = 0;
2658
2659 if (!tp->packets_out)
2660 return;
2661
2662 if (!tp->lost_out)
2663 tp->retransmit_high = tp->snd_una;
2664
2665 if (tp->retransmit_skb_hint) {
2666 skb = tp->retransmit_skb_hint;
2667 last_lost = TCP_SKB_CB(skb)->end_seq;
2668 if (after(last_lost, tp->retransmit_high))
2669 last_lost = tp->retransmit_high;
2670 } else {
2671 skb = tcp_write_queue_head(sk);
2672 last_lost = tp->snd_una;
2673 }
2674
2675 tcp_for_write_queue_from(skb, sk) {
2676 __u8 sacked = TCP_SKB_CB(skb)->sacked;
2677
2678 if (skb == tcp_send_head(sk))
2679 break;
2680
2681 if (hole == NULL)
2682 tp->retransmit_skb_hint = skb;
2683
2684
2685
2686
2687
2688
2689
2690
2691 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
2692 return;
2693
2694 if (fwd_rexmitting) {
2695begin_fwd:
2696 if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
2697 break;
2698 mib_idx = LINUX_MIB_TCPFORWARDRETRANS;
2699
2700 } else if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high)) {
2701 tp->retransmit_high = last_lost;
2702 if (!tcp_can_forward_retransmit(sk))
2703 break;
2704
2705 if (hole != NULL) {
2706 skb = hole;
2707 hole = NULL;
2708 }
2709 fwd_rexmitting = 1;
2710 goto begin_fwd;
2711
2712 } else if (!(sacked & TCPCB_LOST)) {
2713 if (hole == NULL && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
2714 hole = skb;
2715 continue;
2716
2717 } else {
2718 last_lost = TCP_SKB_CB(skb)->end_seq;
2719 if (icsk->icsk_ca_state != TCP_CA_Loss)
2720 mib_idx = LINUX_MIB_TCPFASTRETRANS;
2721 else
2722 mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
2723 }
2724
2725 if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
2726 continue;
2727
2728 if (tcp_retransmit_skb(sk, skb))
2729 return;
2730
2731 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2732
2733 if (tcp_in_cwnd_reduction(sk))
2734 tp->prr_out += tcp_skb_pcount(skb);
2735
2736 if (skb == tcp_write_queue_head(sk))
2737 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2738 inet_csk(sk)->icsk_rto,
2739 TCP_RTO_MAX);
2740 }
2741}
2742
2743
2744
2745
2746
2747
2748
2749void sk_forced_mem_schedule(struct sock *sk, int size)
2750{
2751 int amt, status;
2752
2753 if (size <= sk->sk_forward_alloc)
2754 return;
2755 amt = sk_mem_pages(size);
2756 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
2757 sk_memory_allocated_add(sk, amt, &status);
2758}
2759
2760
2761
2762
2763void tcp_send_fin(struct sock *sk)
2764{
2765 struct tcp_sock *tp = tcp_sk(sk);
2766 struct sk_buff *skb = tcp_write_queue_tail(sk);
2767 int mss_now;
2768
2769
2770
2771
2772
2773 mss_now = tcp_current_mss(sk);
2774
2775 if (tcp_send_head(sk) != NULL) {
2776 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN;
2777 TCP_SKB_CB(skb)->end_seq++;
2778 tp->write_seq++;
2779 } else {
2780
2781 for (;;) {
2782 skb = alloc_skb_fclone(MAX_TCP_HEADER,
2783 sk->sk_allocation);
2784 if (skb)
2785 break;
2786 yield();
2787 }
2788
2789
2790 skb_reserve(skb, MAX_TCP_HEADER);
2791
2792 tcp_init_nondata_skb(skb, tp->write_seq,
2793 TCPHDR_ACK | TCPHDR_FIN);
2794 tcp_queue_skb(sk, skb);
2795 }
2796 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
2797}
2798
2799
2800
2801
2802
2803
2804void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2805{
2806 struct sk_buff *skb;
2807
2808
2809 skb = alloc_skb(MAX_TCP_HEADER, priority);
2810 if (!skb) {
2811 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2812 return;
2813 }
2814
2815
2816 skb_reserve(skb, MAX_TCP_HEADER);
2817 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
2818 TCPHDR_ACK | TCPHDR_RST);
2819
2820 if (tcp_transmit_skb(sk, skb, 0, priority))
2821 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2822
2823 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
2824}
2825
2826
2827
2828
2829
2830
2831
2832int tcp_send_synack(struct sock *sk)
2833{
2834 struct sk_buff *skb;
2835
2836 skb = tcp_write_queue_head(sk);
2837 if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
2838 pr_debug("%s: wrong queue state\n", __func__);
2839 return -EFAULT;
2840 }
2841 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
2842 if (skb_cloned(skb)) {
2843 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
2844 if (nskb == NULL)
2845 return -ENOMEM;
2846 tcp_unlink_write_queue(skb, sk);
2847 __skb_header_release(nskb);
2848 __tcp_add_write_queue_head(sk, nskb);
2849 sk_wmem_free_skb(sk, skb);
2850 sk->sk_wmem_queued += nskb->truesize;
2851 sk_mem_charge(sk, nskb->truesize);
2852 skb = nskb;
2853 }
2854
2855 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK;
2856 TCP_ECN_send_synack(sk, skb);
2857 }
2858 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2859}
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2871 struct request_sock *req,
2872 struct tcp_fastopen_cookie *foc)
2873{
2874 struct tcp_out_options opts;
2875 struct inet_request_sock *ireq = inet_rsk(req);
2876 struct tcp_sock *tp = tcp_sk(sk);
2877 struct tcphdr *th;
2878 struct sk_buff *skb;
2879 struct tcp_md5sig_key *md5;
2880 int tcp_header_size;
2881 int mss;
2882
2883 skb = sock_wmalloc(sk, MAX_TCP_HEADER, 1, GFP_ATOMIC);
2884 if (unlikely(!skb)) {
2885 dst_release(dst);
2886 return NULL;
2887 }
2888
2889 skb_reserve(skb, MAX_TCP_HEADER);
2890
2891 skb_dst_set(skb, dst);
2892 security_skb_owned_by(skb, sk);
2893
2894 mss = dst_metric_advmss(dst);
2895 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
2896 mss = tp->rx_opt.user_mss;
2897
2898 memset(&opts, 0, sizeof(opts));
2899#ifdef CONFIG_SYN_COOKIES
2900 if (unlikely(req->cookie_ts))
2901 skb->skb_mstamp.stamp_jiffies = cookie_init_timestamp(req);
2902 else
2903#endif
2904 skb_mstamp_get(&skb->skb_mstamp);
2905 tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5,
2906 foc) + sizeof(*th);
2907
2908 skb_push(skb, tcp_header_size);
2909 skb_reset_transport_header(skb);
2910
2911 th = tcp_hdr(skb);
2912 memset(th, 0, sizeof(struct tcphdr));
2913 th->syn = 1;
2914 th->ack = 1;
2915 TCP_ECN_make_synack(req, th, sk);
2916 th->source = htons(ireq->ir_num);
2917 th->dest = ireq->ir_rmt_port;
2918
2919
2920
2921 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
2922 TCPHDR_SYN | TCPHDR_ACK);
2923
2924 th->seq = htonl(TCP_SKB_CB(skb)->seq);
2925
2926 th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
2927
2928
2929 th->window = htons(min(req->rcv_wnd, 65535U));
2930 tcp_options_write((__be32 *)(th + 1), tp, &opts);
2931 th->doff = (tcp_header_size >> 2);
2932 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_OUTSEGS);
2933
2934#ifdef CONFIG_TCP_MD5SIG
2935
2936 if (md5) {
2937 tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
2938 md5, NULL, req, skb);
2939 }
2940#endif
2941
2942
2943 skb->tstamp.tv64 = 0;
2944 return skb;
2945}
2946EXPORT_SYMBOL(tcp_make_synack);
2947
2948static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst)
2949{
2950 struct inet_connection_sock *icsk = inet_csk(sk);
2951 const struct tcp_congestion_ops *ca;
2952 u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
2953
2954 if (ca_key == TCP_CA_UNSPEC)
2955 return;
2956
2957 rcu_read_lock();
2958 ca = tcp_ca_find_key(ca_key);
2959 if (likely(ca && try_module_get(ca->owner))) {
2960 module_put(icsk->icsk_ca_ops->owner);
2961 icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst);
2962 icsk->icsk_ca_ops = ca;
2963 }
2964 rcu_read_unlock();
2965}
2966
2967
2968static void tcp_connect_init(struct sock *sk)
2969{
2970 const struct dst_entry *dst = __sk_dst_get(sk);
2971 struct tcp_sock *tp = tcp_sk(sk);
2972 __u8 rcv_wscale;
2973
2974
2975
2976
2977 tp->tcp_header_len = sizeof(struct tcphdr) +
2978 (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
2979
2980#ifdef CONFIG_TCP_MD5SIG
2981 if (tp->af_specific->md5_lookup(sk, sk) != NULL)
2982 tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
2983#endif
2984
2985
2986 if (tp->rx_opt.user_mss)
2987 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
2988 tp->max_window = 0;
2989 tcp_mtup_init(sk);
2990 tcp_sync_mss(sk, dst_mtu(dst));
2991
2992 tcp_ca_dst_init(sk, dst);
2993
2994 if (!tp->window_clamp)
2995 tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
2996 tp->advmss = dst_metric_advmss(dst);
2997 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss)
2998 tp->advmss = tp->rx_opt.user_mss;
2999
3000 tcp_initialize_rcv_mss(sk);
3001
3002
3003 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
3004 (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
3005 tp->window_clamp = tcp_full_space(sk);
3006
3007 tcp_select_initial_window(tcp_full_space(sk),
3008 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
3009 &tp->rcv_wnd,
3010 &tp->window_clamp,
3011 sysctl_tcp_window_scaling,
3012 &rcv_wscale,
3013 dst_metric(dst, RTAX_INITRWND));
3014
3015 tp->rx_opt.rcv_wscale = rcv_wscale;
3016 tp->rcv_ssthresh = tp->rcv_wnd;
3017
3018 sk->sk_err = 0;
3019 sock_reset_flag(sk, SOCK_DONE);
3020 tp->snd_wnd = 0;
3021 tcp_init_wl(tp, 0);
3022 tp->snd_una = tp->write_seq;
3023 tp->snd_sml = tp->write_seq;
3024 tp->snd_up = tp->write_seq;
3025 tp->snd_nxt = tp->write_seq;
3026
3027 if (likely(!tp->repair))
3028 tp->rcv_nxt = 0;
3029 else
3030 tp->rcv_tstamp = tcp_time_stamp;
3031 tp->rcv_wup = tp->rcv_nxt;
3032 tp->copied_seq = tp->rcv_nxt;
3033
3034 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
3035 inet_csk(sk)->icsk_retransmits = 0;
3036 tcp_clear_retrans(tp);
3037}
3038
3039static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
3040{
3041 struct tcp_sock *tp = tcp_sk(sk);
3042 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
3043
3044 tcb->end_seq += skb->len;
3045 __skb_header_release(skb);
3046 __tcp_add_write_queue_tail(sk, skb);
3047 sk->sk_wmem_queued += skb->truesize;
3048 sk_mem_charge(sk, skb->truesize);
3049 tp->write_seq = tcb->end_seq;
3050 tp->packets_out += tcp_skb_pcount(skb);
3051}
3052
3053
3054
3055
3056
3057
3058
3059
3060static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3061{
3062 struct tcp_sock *tp = tcp_sk(sk);
3063 struct tcp_fastopen_request *fo = tp->fastopen_req;
3064 int syn_loss = 0, space, i, err = 0, iovlen = fo->data->msg_iovlen;
3065 struct sk_buff *syn_data = NULL, *data;
3066 unsigned long last_syn_loss = 0;
3067
3068 tp->rx_opt.mss_clamp = tp->advmss;
3069 tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie,
3070 &syn_loss, &last_syn_loss);
3071
3072 if (syn_loss > 1 &&
3073 time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) {
3074 fo->cookie.len = -1;
3075 goto fallback;
3076 }
3077
3078 if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE)
3079 fo->cookie.len = -1;
3080 else if (fo->cookie.len <= 0)
3081 goto fallback;
3082
3083
3084
3085
3086
3087 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->rx_opt.mss_clamp)
3088 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
3089 space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
3090 MAX_TCP_OPTION_SPACE;
3091
3092 space = min_t(size_t, space, fo->size);
3093
3094
3095 space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));
3096
3097 syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space,
3098 sk->sk_allocation);
3099 if (syn_data == NULL)
3100 goto fallback;
3101
3102 for (i = 0; i < iovlen && syn_data->len < space; ++i) {
3103 struct iovec *iov = &fo->data->msg_iov[i];
3104 unsigned char __user *from = iov->iov_base;
3105 int len = iov->iov_len;
3106
3107 if (syn_data->len + len > space)
3108 len = space - syn_data->len;
3109 else if (i + 1 == iovlen)
3110
3111 fo->data = NULL;
3112
3113 if (skb_add_data(syn_data, from, len))
3114 goto fallback;
3115 }
3116
3117
3118 data = pskb_copy(syn_data, sk->sk_allocation);
3119 if (data == NULL)
3120 goto fallback;
3121 TCP_SKB_CB(data)->seq++;
3122 TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN;
3123 TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH);
3124 tcp_connect_queue_skb(sk, data);
3125 fo->copied = data->len;
3126
3127
3128
3129
3130 skb_mstamp_get(&syn->skb_mstamp);
3131 data->skb_mstamp = syn->skb_mstamp;
3132
3133 if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) {
3134 tp->syn_data = (fo->copied > 0);
3135 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
3136 goto done;
3137 }
3138 syn_data = NULL;
3139
3140fallback:
3141
3142 if (fo->cookie.len > 0)
3143 fo->cookie.len = 0;
3144 err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation);
3145 if (err)
3146 tp->syn_fastopen = 0;
3147 kfree_skb(syn_data);
3148done:
3149 fo->cookie.len = -1;
3150 return err;
3151}
3152
3153
3154int tcp_connect(struct sock *sk)
3155{
3156 struct tcp_sock *tp = tcp_sk(sk);
3157 struct sk_buff *buff;
3158 int err;
3159
3160 tcp_connect_init(sk);
3161
3162 if (unlikely(tp->repair)) {
3163 tcp_finish_connect(sk, NULL);
3164 return 0;
3165 }
3166
3167 buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
3168 if (unlikely(buff == NULL))
3169 return -ENOBUFS;
3170
3171
3172 skb_reserve(buff, MAX_TCP_HEADER);
3173
3174 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
3175 tp->retrans_stamp = tcp_time_stamp;
3176 tcp_connect_queue_skb(sk, buff);
3177 TCP_ECN_send_syn(sk, buff);
3178
3179
3180 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
3181 tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
3182 if (err == -ECONNREFUSED)
3183 return err;
3184
3185
3186
3187
3188 tp->snd_nxt = tp->write_seq;
3189 tp->pushed_seq = tp->write_seq;
3190 TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
3191
3192
3193 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3194 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
3195 return 0;
3196}
3197EXPORT_SYMBOL(tcp_connect);
3198
3199
3200
3201
3202
3203void tcp_send_delayed_ack(struct sock *sk)
3204{
3205 struct inet_connection_sock *icsk = inet_csk(sk);
3206 int ato = icsk->icsk_ack.ato;
3207 unsigned long timeout;
3208
3209 tcp_ca_event(sk, CA_EVENT_DELAYED_ACK);
3210
3211 if (ato > TCP_DELACK_MIN) {
3212 const struct tcp_sock *tp = tcp_sk(sk);
3213 int max_ato = HZ / 2;
3214
3215 if (icsk->icsk_ack.pingpong ||
3216 (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
3217 max_ato = TCP_DELACK_MAX;
3218
3219
3220
3221
3222
3223
3224
3225 if (tp->srtt_us) {
3226 int rtt = max_t(int, usecs_to_jiffies(tp->srtt_us >> 3),
3227 TCP_DELACK_MIN);
3228
3229 if (rtt < max_ato)
3230 max_ato = rtt;
3231 }
3232
3233 ato = min(ato, max_ato);
3234 }
3235
3236
3237 timeout = jiffies + ato;
3238
3239
3240 if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
3241
3242
3243
3244 if (icsk->icsk_ack.blocked ||
3245 time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
3246 tcp_send_ack(sk);
3247 return;
3248 }
3249
3250 if (!time_before(timeout, icsk->icsk_ack.timeout))
3251 timeout = icsk->icsk_ack.timeout;
3252 }
3253 icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
3254 icsk->icsk_ack.timeout = timeout;
3255 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
3256}
3257
3258
3259void tcp_send_ack(struct sock *sk)
3260{
3261 struct sk_buff *buff;
3262
3263
3264 if (sk->sk_state == TCP_CLOSE)
3265 return;
3266
3267 tcp_ca_event(sk, CA_EVENT_NON_DELAYED_ACK);
3268
3269
3270
3271
3272
3273 buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
3274 if (buff == NULL) {
3275 inet_csk_schedule_ack(sk);
3276 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
3277 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
3278 TCP_DELACK_MAX, TCP_RTO_MAX);
3279 return;
3280 }
3281
3282
3283 skb_reserve(buff, MAX_TCP_HEADER);
3284 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
3285
3286
3287
3288
3289
3290
3291
3292 skb_set_tcp_pure_ack(buff);
3293
3294
3295 skb_mstamp_get(&buff->skb_mstamp);
3296 tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));
3297}
3298EXPORT_SYMBOL_GPL(tcp_send_ack);
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
3312{
3313 struct tcp_sock *tp = tcp_sk(sk);
3314 struct sk_buff *skb;
3315
3316
3317 skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
3318 if (skb == NULL)
3319 return -1;
3320
3321
3322 skb_reserve(skb, MAX_TCP_HEADER);
3323
3324
3325
3326
3327 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
3328 skb_mstamp_get(&skb->skb_mstamp);
3329 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
3330}
3331
3332void tcp_send_window_probe(struct sock *sk)
3333{
3334 if (sk->sk_state == TCP_ESTABLISHED) {
3335 tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
3336 tcp_xmit_probe_skb(sk, 0);
3337 }
3338}
3339
3340
3341int tcp_write_wakeup(struct sock *sk)
3342{
3343 struct tcp_sock *tp = tcp_sk(sk);
3344 struct sk_buff *skb;
3345
3346 if (sk->sk_state == TCP_CLOSE)
3347 return -1;
3348
3349 if ((skb = tcp_send_head(sk)) != NULL &&
3350 before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
3351 int err;
3352 unsigned int mss = tcp_current_mss(sk);
3353 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
3354
3355 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
3356 tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
3357
3358
3359
3360
3361
3362 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
3363 skb->len > mss) {
3364 seg_size = min(seg_size, mss);
3365 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3366 if (tcp_fragment(sk, skb, seg_size, mss))
3367 return -1;
3368 } else if (!tcp_skb_pcount(skb))
3369 tcp_set_skb_tso_segs(sk, skb, mss);
3370
3371 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3372 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
3373 if (!err)
3374 tcp_event_new_data_sent(sk, skb);
3375 return err;
3376 } else {
3377 if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
3378 tcp_xmit_probe_skb(sk, 1);
3379 return tcp_xmit_probe_skb(sk, 0);
3380 }
3381}
3382
3383
3384
3385
3386void tcp_send_probe0(struct sock *sk)
3387{
3388 struct inet_connection_sock *icsk = inet_csk(sk);
3389 struct tcp_sock *tp = tcp_sk(sk);
3390 unsigned long probe_max;
3391 int err;
3392
3393 err = tcp_write_wakeup(sk);
3394
3395 if (tp->packets_out || !tcp_send_head(sk)) {
3396
3397 icsk->icsk_probes_out = 0;
3398 icsk->icsk_backoff = 0;
3399 return;
3400 }
3401
3402 if (err <= 0) {
3403 if (icsk->icsk_backoff < sysctl_tcp_retries2)
3404 icsk->icsk_backoff++;
3405 icsk->icsk_probes_out++;
3406 probe_max = TCP_RTO_MAX;
3407 } else {
3408
3409
3410
3411
3412
3413
3414 if (!icsk->icsk_probes_out)
3415 icsk->icsk_probes_out = 1;
3416 probe_max = TCP_RESOURCE_PROBE_INTERVAL;
3417 }
3418 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3419 inet_csk_rto_backoff(icsk, probe_max),
3420 TCP_RTO_MAX);
3421}
3422
3423int tcp_rtx_synack(struct sock *sk, struct request_sock *req)
3424{
3425 const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific;
3426 struct flowi fl;
3427 int res;
3428
3429 res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL);
3430 if (!res) {
3431 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
3432 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
3433 }
3434 return res;
3435}
3436EXPORT_SYMBOL(tcp_rtx_synack);
3437