1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37#define pr_fmt(fmt) "TCP: " fmt
38
39#include <net/tcp.h>
40
41#include <linux/compiler.h>
42#include <linux/gfp.h>
43#include <linux/module.h>
44
45
46int sysctl_tcp_retrans_collapse __read_mostly = 1;
47
48
49
50
51int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
52
53
54int sysctl_tcp_limit_output_bytes __read_mostly = 262144;
55
56
57
58
59
60int sysctl_tcp_tso_win_divisor __read_mostly = 3;
61
62int sysctl_tcp_mtu_probing __read_mostly = 0;
63int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
64
65
66int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
67
68unsigned int sysctl_tcp_notsent_lowat __read_mostly = UINT_MAX;
69EXPORT_SYMBOL(sysctl_tcp_notsent_lowat);
70
71static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
72 int push_one, gfp_t gfp);
73
74
75static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
76{
77 struct inet_connection_sock *icsk = inet_csk(sk);
78 struct tcp_sock *tp = tcp_sk(sk);
79 unsigned int prior_packets = tp->packets_out;
80
81 tcp_advance_send_head(sk, skb);
82 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
83
84 tp->packets_out += tcp_skb_pcount(skb);
85 if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
86 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
87 tcp_rearm_rto(sk);
88 }
89
90 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
91 tcp_skb_pcount(skb));
92}
93
94
95
96
97
98
99
100static inline __u32 tcp_acceptable_seq(const struct sock *sk)
101{
102 const struct tcp_sock *tp = tcp_sk(sk);
103
104 if (!before(tcp_wnd_end(tp), tp->snd_nxt))
105 return tp->snd_nxt;
106 else
107 return tcp_wnd_end(tp);
108}
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124static __u16 tcp_advertise_mss(struct sock *sk)
125{
126 struct tcp_sock *tp = tcp_sk(sk);
127 const struct dst_entry *dst = __sk_dst_get(sk);
128 int mss = tp->advmss;
129
130 if (dst) {
131 unsigned int metric = dst_metric_advmss(dst);
132
133 if (metric < mss) {
134 mss = metric;
135 tp->advmss = mss;
136 }
137 }
138
139 return (__u16)mss;
140}
141
142
143
144static void tcp_cwnd_restart(struct sock *sk, const struct dst_entry *dst)
145{
146 struct tcp_sock *tp = tcp_sk(sk);
147 s32 delta = tcp_time_stamp - tp->lsndtime;
148 u32 restart_cwnd = tcp_init_cwnd(tp, dst);
149 u32 cwnd = tp->snd_cwnd;
150
151 tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
152
153 tp->snd_ssthresh = tcp_current_ssthresh(sk);
154 restart_cwnd = min(restart_cwnd, cwnd);
155
156 while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
157 cwnd >>= 1;
158 tp->snd_cwnd = max(cwnd, restart_cwnd);
159 tp->snd_cwnd_stamp = tcp_time_stamp;
160 tp->snd_cwnd_used = 0;
161}
162
163
164static void tcp_event_data_sent(struct tcp_sock *tp,
165 struct sock *sk)
166{
167 struct inet_connection_sock *icsk = inet_csk(sk);
168 const u32 now = tcp_time_stamp;
169
170 if (sysctl_tcp_slow_start_after_idle &&
171 (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto))
172 tcp_cwnd_restart(sk, __sk_dst_get(sk));
173
174 tp->lsndtime = now;
175
176
177
178
179 if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
180 icsk->icsk_ack.pingpong = 1;
181}
182
183
184static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
185{
186 tcp_dec_quickack_mode(sk, pkts);
187 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
188}
189
190
191u32 tcp_default_init_rwnd(u32 mss)
192{
193
194
195
196
197
198 u32 init_rwnd = TCP_INIT_CWND * 2;
199
200 if (mss > 1460)
201 init_rwnd = max((1460 * init_rwnd) / mss, 2U);
202 return init_rwnd;
203}
204
205
206
207
208
209
210
211
212void tcp_select_initial_window(int __space, __u32 mss,
213 __u32 *rcv_wnd, __u32 *window_clamp,
214 int wscale_ok, __u8 *rcv_wscale,
215 __u32 init_rcv_wnd)
216{
217 unsigned int space = (__space < 0 ? 0 : __space);
218
219
220 if (*window_clamp == 0)
221 (*window_clamp) = (65535 << 14);
222 space = min(*window_clamp, space);
223
224
225 if (space > mss)
226 space = (space / mss) * mss;
227
228
229
230
231
232
233
234
235
236 if (sysctl_tcp_workaround_signed_windows)
237 (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
238 else
239 (*rcv_wnd) = space;
240
241 (*rcv_wscale) = 0;
242 if (wscale_ok) {
243
244
245
246 space = max_t(u32, space, sysctl_tcp_rmem[2]);
247 space = max_t(u32, space, sysctl_rmem_max);
248 space = min_t(u32, space, *window_clamp);
249 while (space > 65535 && (*rcv_wscale) < 14) {
250 space >>= 1;
251 (*rcv_wscale)++;
252 }
253 }
254
255 if (mss > (1 << *rcv_wscale)) {
256 if (!init_rcv_wnd)
257 init_rcv_wnd = tcp_default_init_rwnd(mss);
258 *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
259 }
260
261
262 (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
263}
264EXPORT_SYMBOL(tcp_select_initial_window);
265
266
267
268
269
270
271static u16 tcp_select_window(struct sock *sk)
272{
273 struct tcp_sock *tp = tcp_sk(sk);
274 u32 old_win = tp->rcv_wnd;
275 u32 cur_win = tcp_receive_window(tp);
276 u32 new_win = __tcp_select_window(sk);
277
278
279 if (new_win < cur_win) {
280
281
282
283
284
285
286
287 if (new_win == 0)
288 NET_INC_STATS(sock_net(sk),
289 LINUX_MIB_TCPWANTZEROWINDOWADV);
290 new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
291 }
292 tp->rcv_wnd = new_win;
293 tp->rcv_wup = tp->rcv_nxt;
294
295
296
297
298 if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
299 new_win = min(new_win, MAX_TCP_WINDOW);
300 else
301 new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
302
303
304 new_win >>= tp->rx_opt.rcv_wscale;
305
306
307 if (new_win == 0) {
308 tp->pred_flags = 0;
309 if (old_win)
310 NET_INC_STATS(sock_net(sk),
311 LINUX_MIB_TCPTOZEROWINDOWADV);
312 } else if (old_win == 0) {
313 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV);
314 }
315
316 return new_win;
317}
318
319
320static inline void TCP_ECN_send_synack(struct sock *sk, struct sk_buff *skb)
321{
322 const struct tcp_sock *tp = tcp_sk(sk);
323
324 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
325 if (!(tp->ecn_flags & TCP_ECN_OK))
326 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
327 else if (tcp_ca_needs_ecn(sk))
328 INET_ECN_xmit(sk);
329}
330
331
332static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
333{
334 struct tcp_sock *tp = tcp_sk(sk);
335 bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
336 tcp_ca_needs_ecn(sk);
337
338 if (!use_ecn) {
339 const struct dst_entry *dst = __sk_dst_get(sk);
340
341 if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
342 use_ecn = true;
343 }
344
345 tp->ecn_flags = 0;
346
347 if (use_ecn) {
348 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
349 tp->ecn_flags = TCP_ECN_OK;
350 if (tcp_ca_needs_ecn(sk))
351 INET_ECN_xmit(sk);
352 }
353}
354
355static __inline__ void
356TCP_ECN_make_synack(const struct request_sock *req, struct tcphdr *th,
357 struct sock *sk)
358{
359 if (inet_rsk(req)->ecn_ok) {
360 th->ece = 1;
361 if (tcp_ca_needs_ecn(sk))
362 INET_ECN_xmit(sk);
363 }
364}
365
366
367
368
369static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
370 int tcp_header_len)
371{
372 struct tcp_sock *tp = tcp_sk(sk);
373
374 if (tp->ecn_flags & TCP_ECN_OK) {
375
376 if (skb->len != tcp_header_len &&
377 !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
378 INET_ECN_xmit(sk);
379 if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) {
380 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
381 tcp_hdr(skb)->cwr = 1;
382 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
383 }
384 } else if (!tcp_ca_needs_ecn(sk)) {
385
386 INET_ECN_dontxmit(sk);
387 }
388 if (tp->ecn_flags & TCP_ECN_DEMAND_CWR)
389 tcp_hdr(skb)->ece = 1;
390 }
391}
392
393
394
395
396static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
397{
398 struct skb_shared_info *shinfo = skb_shinfo(skb);
399
400 skb->ip_summed = CHECKSUM_PARTIAL;
401 skb->csum = 0;
402
403 TCP_SKB_CB(skb)->tcp_flags = flags;
404 TCP_SKB_CB(skb)->sacked = 0;
405
406 shinfo->gso_segs = 1;
407 shinfo->gso_size = 0;
408 shinfo->gso_type = 0;
409
410 TCP_SKB_CB(skb)->seq = seq;
411 if (flags & (TCPHDR_SYN | TCPHDR_FIN))
412 seq++;
413 TCP_SKB_CB(skb)->end_seq = seq;
414}
415
416static inline bool tcp_urg_mode(const struct tcp_sock *tp)
417{
418 return tp->snd_una != tp->snd_up;
419}
420
421#define OPTION_SACK_ADVERTISE (1 << 0)
422#define OPTION_TS (1 << 1)
423#define OPTION_MD5 (1 << 2)
424#define OPTION_WSCALE (1 << 3)
425#define OPTION_FAST_OPEN_COOKIE (1 << 8)
426
427struct tcp_out_options {
428 u16 options;
429 u16 mss;
430 u8 ws;
431 u8 num_sack_blocks;
432 u8 hash_size;
433 __u8 *hash_location;
434 __u32 tsval, tsecr;
435 struct tcp_fastopen_cookie *fastopen_cookie;
436};
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
452 struct tcp_out_options *opts)
453{
454 u16 options = opts->options;
455
456 if (unlikely(OPTION_MD5 & options)) {
457 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
458 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
459
460 opts->hash_location = (__u8 *)ptr;
461 ptr += 4;
462 }
463
464 if (unlikely(opts->mss)) {
465 *ptr++ = htonl((TCPOPT_MSS << 24) |
466 (TCPOLEN_MSS << 16) |
467 opts->mss);
468 }
469
470 if (likely(OPTION_TS & options)) {
471 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
472 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
473 (TCPOLEN_SACK_PERM << 16) |
474 (TCPOPT_TIMESTAMP << 8) |
475 TCPOLEN_TIMESTAMP);
476 options &= ~OPTION_SACK_ADVERTISE;
477 } else {
478 *ptr++ = htonl((TCPOPT_NOP << 24) |
479 (TCPOPT_NOP << 16) |
480 (TCPOPT_TIMESTAMP << 8) |
481 TCPOLEN_TIMESTAMP);
482 }
483 *ptr++ = htonl(opts->tsval);
484 *ptr++ = htonl(opts->tsecr);
485 }
486
487 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
488 *ptr++ = htonl((TCPOPT_NOP << 24) |
489 (TCPOPT_NOP << 16) |
490 (TCPOPT_SACK_PERM << 8) |
491 TCPOLEN_SACK_PERM);
492 }
493
494 if (unlikely(OPTION_WSCALE & options)) {
495 *ptr++ = htonl((TCPOPT_NOP << 24) |
496 (TCPOPT_WINDOW << 16) |
497 (TCPOLEN_WINDOW << 8) |
498 opts->ws);
499 }
500
501 if (unlikely(opts->num_sack_blocks)) {
502 struct tcp_sack_block *sp = tp->rx_opt.dsack ?
503 tp->duplicate_sack : tp->selective_acks;
504 int this_sack;
505
506 *ptr++ = htonl((TCPOPT_NOP << 24) |
507 (TCPOPT_NOP << 16) |
508 (TCPOPT_SACK << 8) |
509 (TCPOLEN_SACK_BASE + (opts->num_sack_blocks *
510 TCPOLEN_SACK_PERBLOCK)));
511
512 for (this_sack = 0; this_sack < opts->num_sack_blocks;
513 ++this_sack) {
514 *ptr++ = htonl(sp[this_sack].start_seq);
515 *ptr++ = htonl(sp[this_sack].end_seq);
516 }
517
518 tp->rx_opt.dsack = 0;
519 }
520
521 if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) {
522 struct tcp_fastopen_cookie *foc = opts->fastopen_cookie;
523 u8 *p = (u8 *)ptr;
524 u32 len;
525
526 if (foc->exp) {
527 len = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
528 *ptr = htonl((TCPOPT_EXP << 24) | (len << 16) |
529 TCPOPT_FASTOPEN_MAGIC);
530 p += TCPOLEN_EXP_FASTOPEN_BASE;
531 } else {
532 len = TCPOLEN_FASTOPEN_BASE + foc->len;
533 *p++ = TCPOPT_FASTOPEN;
534 *p++ = len;
535 }
536
537 memcpy(p, foc->val, foc->len);
538 if ((len & 3) == 2) {
539 p[foc->len] = TCPOPT_NOP;
540 p[foc->len + 1] = TCPOPT_NOP;
541 }
542 ptr += (len + 3) >> 2;
543 }
544}
545
546
547
548
549static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
550 struct tcp_out_options *opts,
551 struct tcp_md5sig_key **md5)
552{
553 struct tcp_sock *tp = tcp_sk(sk);
554 unsigned int remaining = MAX_TCP_OPTION_SPACE;
555 struct tcp_fastopen_request *fastopen = tp->fastopen_req;
556
557#ifdef CONFIG_TCP_MD5SIG
558 *md5 = tp->af_specific->md5_lookup(sk, sk);
559 if (*md5) {
560 opts->options |= OPTION_MD5;
561 remaining -= TCPOLEN_MD5SIG_ALIGNED;
562 }
563#else
564 *md5 = NULL;
565#endif
566
567
568
569
570
571
572
573
574
575
576 opts->mss = tcp_advertise_mss(sk);
577 remaining -= TCPOLEN_MSS_ALIGNED;
578
579 if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
580 opts->options |= OPTION_TS;
581 opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
582 opts->tsecr = tp->rx_opt.ts_recent;
583 remaining -= TCPOLEN_TSTAMP_ALIGNED;
584 }
585 if (likely(sysctl_tcp_window_scaling)) {
586 opts->ws = tp->rx_opt.rcv_wscale;
587 opts->options |= OPTION_WSCALE;
588 remaining -= TCPOLEN_WSCALE_ALIGNED;
589 }
590 if (likely(sysctl_tcp_sack)) {
591 opts->options |= OPTION_SACK_ADVERTISE;
592 if (unlikely(!(OPTION_TS & opts->options)))
593 remaining -= TCPOLEN_SACKPERM_ALIGNED;
594 }
595
596 if (fastopen && fastopen->cookie.len >= 0) {
597 u32 need = fastopen->cookie.len;
598
599 need += fastopen->cookie.exp ? TCPOLEN_EXP_FASTOPEN_BASE :
600 TCPOLEN_FASTOPEN_BASE;
601 need = (need + 3) & ~3U;
602 if (remaining >= need) {
603 opts->options |= OPTION_FAST_OPEN_COOKIE;
604 opts->fastopen_cookie = &fastopen->cookie;
605 remaining -= need;
606 tp->syn_fastopen = 1;
607 tp->syn_fastopen_exp = fastopen->cookie.exp ? 1 : 0;
608 }
609 }
610
611 return MAX_TCP_OPTION_SPACE - remaining;
612}
613
614
615static unsigned int tcp_synack_options(struct sock *sk,
616 struct request_sock *req,
617 unsigned int mss, struct sk_buff *skb,
618 struct tcp_out_options *opts,
619 struct tcp_md5sig_key **md5,
620 struct tcp_fastopen_cookie *foc)
621{
622 struct inet_request_sock *ireq = inet_rsk(req);
623 unsigned int remaining = MAX_TCP_OPTION_SPACE;
624
625#ifdef CONFIG_TCP_MD5SIG
626 *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
627 if (*md5) {
628 opts->options |= OPTION_MD5;
629 remaining -= TCPOLEN_MD5SIG_ALIGNED;
630
631
632
633
634
635
636 ireq->tstamp_ok &= !ireq->sack_ok;
637 }
638#else
639 *md5 = NULL;
640#endif
641
642
643 opts->mss = mss;
644 remaining -= TCPOLEN_MSS_ALIGNED;
645
646 if (likely(ireq->wscale_ok)) {
647 opts->ws = ireq->rcv_wscale;
648 opts->options |= OPTION_WSCALE;
649 remaining -= TCPOLEN_WSCALE_ALIGNED;
650 }
651 if (likely(ireq->tstamp_ok)) {
652 opts->options |= OPTION_TS;
653 opts->tsval = tcp_skb_timestamp(skb);
654 opts->tsecr = req->ts_recent;
655 remaining -= TCPOLEN_TSTAMP_ALIGNED;
656 }
657 if (likely(ireq->sack_ok)) {
658 opts->options |= OPTION_SACK_ADVERTISE;
659 if (unlikely(!ireq->tstamp_ok))
660 remaining -= TCPOLEN_SACKPERM_ALIGNED;
661 }
662 if (foc != NULL && foc->len >= 0) {
663 u32 need = foc->len;
664
665 need += foc->exp ? TCPOLEN_EXP_FASTOPEN_BASE :
666 TCPOLEN_FASTOPEN_BASE;
667 need = (need + 3) & ~3U;
668 if (remaining >= need) {
669 opts->options |= OPTION_FAST_OPEN_COOKIE;
670 opts->fastopen_cookie = foc;
671 remaining -= need;
672 }
673 }
674
675 return MAX_TCP_OPTION_SPACE - remaining;
676}
677
678
679
680
681static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb,
682 struct tcp_out_options *opts,
683 struct tcp_md5sig_key **md5)
684{
685 struct tcp_sock *tp = tcp_sk(sk);
686 unsigned int size = 0;
687 unsigned int eff_sacks;
688
689 opts->options = 0;
690
691#ifdef CONFIG_TCP_MD5SIG
692 *md5 = tp->af_specific->md5_lookup(sk, sk);
693 if (unlikely(*md5)) {
694 opts->options |= OPTION_MD5;
695 size += TCPOLEN_MD5SIG_ALIGNED;
696 }
697#else
698 *md5 = NULL;
699#endif
700
701 if (likely(tp->rx_opt.tstamp_ok)) {
702 opts->options |= OPTION_TS;
703 opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0;
704 opts->tsecr = tp->rx_opt.ts_recent;
705 size += TCPOLEN_TSTAMP_ALIGNED;
706 }
707
708 eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
709 if (unlikely(eff_sacks)) {
710 const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
711 opts->num_sack_blocks =
712 min_t(unsigned int, eff_sacks,
713 (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
714 TCPOLEN_SACK_PERBLOCK);
715 size += TCPOLEN_SACK_BASE_ALIGNED +
716 opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
717 }
718
719 return size;
720}
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737struct tsq_tasklet {
738 struct tasklet_struct tasklet;
739 struct list_head head;
740};
741static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);
742
743static void tcp_tsq_handler(struct sock *sk)
744{
745 if ((1 << sk->sk_state) &
746 (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
747 TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
748 tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle,
749 0, GFP_ATOMIC);
750}
751
752
753
754
755
756
757static void tcp_tasklet_func(unsigned long data)
758{
759 struct tsq_tasklet *tsq = (struct tsq_tasklet *)data;
760 LIST_HEAD(list);
761 unsigned long flags;
762 struct list_head *q, *n;
763 struct tcp_sock *tp;
764 struct sock *sk;
765
766 local_irq_save(flags);
767 list_splice_init(&tsq->head, &list);
768 local_irq_restore(flags);
769
770 list_for_each_safe(q, n, &list) {
771 tp = list_entry(q, struct tcp_sock, tsq_node);
772 list_del(&tp->tsq_node);
773
774 sk = (struct sock *)tp;
775 bh_lock_sock(sk);
776
777 if (!sock_owned_by_user(sk)) {
778 tcp_tsq_handler(sk);
779 } else {
780
781 set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags);
782 }
783 bh_unlock_sock(sk);
784
785 clear_bit(TSQ_QUEUED, &tp->tsq_flags);
786 sk_free(sk);
787 }
788}
789
790#define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \
791 (1UL << TCP_WRITE_TIMER_DEFERRED) | \
792 (1UL << TCP_DELACK_TIMER_DEFERRED) | \
793 (1UL << TCP_MTU_REDUCED_DEFERRED))
794
795
796
797
798
799
800
801void tcp_release_cb(struct sock *sk)
802{
803 struct tcp_sock *tp = tcp_sk(sk);
804 unsigned long flags, nflags;
805
806
807 do {
808 flags = tp->tsq_flags;
809 if (!(flags & TCP_DEFERRED_ALL))
810 return;
811 nflags = flags & ~TCP_DEFERRED_ALL;
812 } while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags);
813
814 if (flags & (1UL << TCP_TSQ_DEFERRED))
815 tcp_tsq_handler(sk);
816
817
818
819
820
821
822
823
824
825
826 sock_release_ownership(sk);
827
828 if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
829 tcp_write_timer_handler(sk);
830 __sock_put(sk);
831 }
832 if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) {
833 tcp_delack_timer_handler(sk);
834 __sock_put(sk);
835 }
836 if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
837 inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
838 __sock_put(sk);
839 }
840}
841EXPORT_SYMBOL(tcp_release_cb);
842
843void __init tcp_tasklet_init(void)
844{
845 int i;
846
847 for_each_possible_cpu(i) {
848 struct tsq_tasklet *tsq = &per_cpu(tsq_tasklet, i);
849
850 INIT_LIST_HEAD(&tsq->head);
851 tasklet_init(&tsq->tasklet,
852 tcp_tasklet_func,
853 (unsigned long)tsq);
854 }
855}
856
857
858
859
860
861
862void tcp_wfree(struct sk_buff *skb)
863{
864 struct sock *sk = skb->sk;
865 struct tcp_sock *tp = tcp_sk(sk);
866
867 if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) &&
868 !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) {
869 unsigned long flags;
870 struct tsq_tasklet *tsq;
871
872
873
874
875 atomic_sub(skb->truesize - 1, &sk->sk_wmem_alloc);
876
877
878 local_irq_save(flags);
879 tsq = &__get_cpu_var(tsq_tasklet);
880 list_add(&tp->tsq_node, &tsq->head);
881 tasklet_schedule(&tsq->tasklet);
882 local_irq_restore(flags);
883 } else {
884 sock_wfree(skb);
885 }
886}
887
888
889
890
891
892
893
894
895
896
897
898
899static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
900 gfp_t gfp_mask)
901{
902 const struct inet_connection_sock *icsk = inet_csk(sk);
903 struct inet_sock *inet;
904 struct tcp_sock *tp;
905 struct tcp_skb_cb *tcb;
906 struct tcp_out_options opts;
907 unsigned int tcp_options_size, tcp_header_size;
908 struct tcp_md5sig_key *md5;
909 struct tcphdr *th;
910 int err;
911
912 BUG_ON(!skb || !tcp_skb_pcount(skb));
913
914 if (clone_it) {
915 skb_mstamp_get(&skb->skb_mstamp);
916
917 if (unlikely(skb_cloned(skb)))
918 skb = pskb_copy(skb, gfp_mask);
919 else
920 skb = skb_clone(skb, gfp_mask);
921 if (unlikely(!skb))
922 return -ENOBUFS;
923 }
924
925 inet = inet_sk(sk);
926 tp = tcp_sk(sk);
927 tcb = TCP_SKB_CB(skb);
928 memset(&opts, 0, sizeof(opts));
929
930 if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
931 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
932 else
933 tcp_options_size = tcp_established_options(sk, skb, &opts,
934 &md5);
935 tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
936
937 if (tcp_packets_in_flight(tp) == 0)
938 tcp_ca_event(sk, CA_EVENT_TX_START);
939
940
941
942
943 skb->ooo_okay = sk_wmem_alloc_get(sk) == 0;
944
945 skb_push(skb, tcp_header_size);
946 skb_reset_transport_header(skb);
947
948 skb_orphan(skb);
949 skb->sk = sk;
950 skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree;
951 skb_set_hash_from_sk(skb, sk);
952 atomic_add(skb->truesize, &sk->sk_wmem_alloc);
953
954 skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm);
955
956
957 th = tcp_hdr(skb);
958 th->source = inet->inet_sport;
959 th->dest = inet->inet_dport;
960 th->seq = htonl(tcb->seq);
961 th->ack_seq = htonl(tp->rcv_nxt);
962 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
963 tcb->tcp_flags);
964
965 if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
966
967
968
969 th->window = htons(min(tp->rcv_wnd, 65535U));
970 } else {
971 th->window = htons(tcp_select_window(sk));
972 }
973 th->check = 0;
974 th->urg_ptr = 0;
975
976
977 if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
978 if (before(tp->snd_up, tcb->seq + 0x10000)) {
979 th->urg_ptr = htons(tp->snd_up - tcb->seq);
980 th->urg = 1;
981 } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
982 th->urg_ptr = htons(0xFFFF);
983 th->urg = 1;
984 }
985 }
986
987 tcp_options_write((__be32 *)(th + 1), tp, &opts);
988 if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0))
989 TCP_ECN_send(sk, skb, tcp_header_size);
990
991#ifdef CONFIG_TCP_MD5SIG
992
993 if (md5) {
994 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
995 tp->af_specific->calc_md5_hash(opts.hash_location,
996 md5, sk, NULL, skb);
997 }
998#endif
999
1000 icsk->icsk_af_ops->send_check(sk, skb);
1001
1002 if (likely(tcb->tcp_flags & TCPHDR_ACK))
1003 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
1004
1005 if (skb->len != tcp_header_size)
1006 tcp_event_data_sent(tp, sk);
1007
1008 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
1009 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
1010 tcp_skb_pcount(skb));
1011
1012 tp->segs_out += tcp_skb_pcount(skb);
1013
1014 skb->tstamp.tv64 = 0;
1015 err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl);
1016
1017 if (likely(err <= 0))
1018 return err;
1019
1020 tcp_enter_cwr(sk);
1021
1022 return net_xmit_eval(err);
1023}
1024
1025
1026
1027
1028
1029
1030static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
1031{
1032 struct tcp_sock *tp = tcp_sk(sk);
1033
1034
1035 tp->write_seq = TCP_SKB_CB(skb)->end_seq;
1036 __skb_header_release(skb);
1037 tcp_add_write_queue_tail(sk, skb);
1038 sk->sk_wmem_queued += skb->truesize;
1039 sk_mem_charge(sk, skb->truesize);
1040}
1041
1042
1043static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
1044 unsigned int mss_now)
1045{
1046 struct skb_shared_info *shinfo = skb_shinfo(skb);
1047
1048
1049 WARN_ON_ONCE(skb_cloned(skb));
1050
1051 if (skb->len <= mss_now || !sk_can_gso(sk) ||
1052 skb->ip_summed == CHECKSUM_NONE) {
1053
1054
1055
1056 shinfo->gso_segs = 1;
1057 shinfo->gso_size = 0;
1058 shinfo->gso_type = 0;
1059 } else {
1060 shinfo->gso_segs = DIV_ROUND_UP(skb->len, mss_now);
1061 shinfo->gso_size = mss_now;
1062 shinfo->gso_type = sk->sk_gso_type;
1063 }
1064}
1065
1066
1067
1068
1069static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb,
1070 int decr)
1071{
1072 struct tcp_sock *tp = tcp_sk(sk);
1073
1074 if (!tp->sacked_out || tcp_is_reno(tp))
1075 return;
1076
1077 if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
1078 tp->fackets_out -= decr;
1079}
1080
1081
1082
1083
1084static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
1085{
1086 struct tcp_sock *tp = tcp_sk(sk);
1087
1088 tp->packets_out -= decr;
1089
1090 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1091 tp->sacked_out -= decr;
1092 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
1093 tp->retrans_out -= decr;
1094 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
1095 tp->lost_out -= decr;
1096
1097
1098 if (tcp_is_reno(tp) && decr > 0)
1099 tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
1100
1101 tcp_adjust_fackets_out(sk, skb, decr);
1102
1103 if (tp->lost_skb_hint &&
1104 before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
1105 (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)))
1106 tp->lost_cnt_hint -= decr;
1107
1108 tcp_verify_left_out(tp);
1109}
1110
1111
1112
1113
1114
1115
1116int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
1117 struct sk_buff *skb, u32 len,
1118 unsigned int mss_now)
1119{
1120 struct tcp_sock *tp = tcp_sk(sk);
1121 struct sk_buff *buff;
1122 int nsize, old_factor;
1123 long limit;
1124 int nlen;
1125 u8 flags;
1126
1127 if (WARN_ON(len > skb->len))
1128 return -EINVAL;
1129
1130 nsize = skb_headlen(skb) - len;
1131 if (nsize < 0)
1132 nsize = 0;
1133
1134
1135
1136
1137
1138
1139 limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_MAX_SIZE);
1140 if (unlikely(((sk->sk_wmem_queued >> 1) > limit ||
1141 skb_queue_len(&sk->sk_write_queue) > 2048) &&
1142 tcp_queue != TCP_FRAG_IN_WRITE_QUEUE &&
1143 skb != tcp_write_queue_head(sk) &&
1144 skb != tcp_rtx_queue_tail(sk))) {
1145 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG);
1146 return -ENOMEM;
1147 }
1148
1149 if (skb_unclone(skb, GFP_ATOMIC))
1150 return -ENOMEM;
1151
1152
1153 buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
1154 if (buff == NULL)
1155 return -ENOMEM;
1156
1157 sk->sk_wmem_queued += buff->truesize;
1158 sk_mem_charge(sk, buff->truesize);
1159 nlen = skb->len - len - nsize;
1160 buff->truesize += nlen;
1161 skb->truesize -= nlen;
1162
1163
1164 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1165 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1166 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1167
1168
1169 flags = TCP_SKB_CB(skb)->tcp_flags;
1170 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1171 TCP_SKB_CB(buff)->tcp_flags = flags;
1172 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
1173
1174 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
1175
1176 buff->csum = csum_partial_copy_nocheck(skb->data + len,
1177 skb_put(buff, nsize),
1178 nsize, 0);
1179
1180 skb_trim(skb, len);
1181
1182 skb->csum = csum_block_sub(skb->csum, buff->csum, len);
1183 } else {
1184 skb->ip_summed = CHECKSUM_PARTIAL;
1185 skb_split(skb, buff, len);
1186 }
1187
1188 buff->ip_summed = skb->ip_summed;
1189
1190 buff->tstamp = skb->tstamp;
1191
1192 old_factor = tcp_skb_pcount(skb);
1193
1194
1195 tcp_set_skb_tso_segs(sk, skb, mss_now);
1196 tcp_set_skb_tso_segs(sk, buff, mss_now);
1197
1198
1199
1200
1201 if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) {
1202 int diff = old_factor - tcp_skb_pcount(skb) -
1203 tcp_skb_pcount(buff);
1204
1205 if (diff)
1206 tcp_adjust_pcount(sk, skb, diff);
1207 }
1208
1209
1210 __skb_header_release(buff);
1211 tcp_insert_write_queue_after(skb, buff, sk);
1212
1213 return 0;
1214}
1215
1216
1217
1218
1219
1220static int __pskb_trim_head(struct sk_buff *skb, int len)
1221{
1222 struct skb_shared_info *shinfo;
1223 int i, k, eat;
1224
1225 eat = min_t(int, len, skb_headlen(skb));
1226 if (eat) {
1227 __skb_pull(skb, eat);
1228 len -= eat;
1229 if (!len)
1230 return 0;
1231 }
1232 eat = len;
1233 k = 0;
1234 shinfo = skb_shinfo(skb);
1235 for (i = 0; i < shinfo->nr_frags; i++) {
1236 int size = skb_frag_size(&shinfo->frags[i]);
1237
1238 if (size <= eat) {
1239 skb_frag_unref(skb, i);
1240 eat -= size;
1241 } else {
1242 shinfo->frags[k] = shinfo->frags[i];
1243 if (eat) {
1244 shinfo->frags[k].page_offset += eat;
1245 skb_frag_size_sub(&shinfo->frags[k], eat);
1246 eat = 0;
1247 }
1248 k++;
1249 }
1250 }
1251 shinfo->nr_frags = k;
1252
1253 skb_reset_tail_pointer(skb);
1254 skb->data_len -= len;
1255 skb->len = skb->data_len;
1256 return len;
1257}
1258
1259
1260int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
1261{
1262 u32 delta_truesize;
1263
1264 if (skb_unclone(skb, GFP_ATOMIC))
1265 return -ENOMEM;
1266
1267 delta_truesize = __pskb_trim_head(skb, len);
1268
1269 TCP_SKB_CB(skb)->seq += len;
1270 skb->ip_summed = CHECKSUM_PARTIAL;
1271
1272 if (delta_truesize) {
1273 skb->truesize -= delta_truesize;
1274 sk->sk_wmem_queued -= delta_truesize;
1275 sk_mem_uncharge(sk, delta_truesize);
1276 sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
1277 }
1278
1279
1280 if (tcp_skb_pcount(skb) > 1)
1281 tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb));
1282
1283 return 0;
1284}
1285
1286
1287static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
1288{
1289 const struct tcp_sock *tp = tcp_sk(sk);
1290 const struct inet_connection_sock *icsk = inet_csk(sk);
1291 int mss_now;
1292
1293
1294
1295
1296 mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr);
1297
1298
1299 if (icsk->icsk_af_ops->net_frag_header_len) {
1300 const struct dst_entry *dst = __sk_dst_get(sk);
1301
1302 if (dst && dst_allfrag(dst))
1303 mss_now -= icsk->icsk_af_ops->net_frag_header_len;
1304 }
1305
1306
1307 if (mss_now > tp->rx_opt.mss_clamp)
1308 mss_now = tp->rx_opt.mss_clamp;
1309
1310
1311 mss_now -= icsk->icsk_ext_hdr_len;
1312
1313
1314 mss_now = max(mss_now, sock_net(sk)->ipv4_sysctl_tcp_min_snd_mss);
1315 return mss_now;
1316}
1317
1318
1319int tcp_mtu_to_mss(struct sock *sk, int pmtu)
1320{
1321
1322 return __tcp_mtu_to_mss(sk, pmtu) -
1323 (tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr));
1324}
1325
1326
1327int tcp_mss_to_mtu(struct sock *sk, int mss)
1328{
1329 const struct tcp_sock *tp = tcp_sk(sk);
1330 const struct inet_connection_sock *icsk = inet_csk(sk);
1331 int mtu;
1332
1333 mtu = mss +
1334 tp->tcp_header_len +
1335 icsk->icsk_ext_hdr_len +
1336 icsk->icsk_af_ops->net_header_len;
1337
1338
1339 if (icsk->icsk_af_ops->net_frag_header_len) {
1340 const struct dst_entry *dst = __sk_dst_get(sk);
1341
1342 if (dst && dst_allfrag(dst))
1343 mtu += icsk->icsk_af_ops->net_frag_header_len;
1344 }
1345 return mtu;
1346}
1347
1348
1349void tcp_mtup_init(struct sock *sk)
1350{
1351 struct tcp_sock *tp = tcp_sk(sk);
1352 struct inet_connection_sock *icsk = inet_csk(sk);
1353
1354 icsk->icsk_mtup.enabled = sysctl_tcp_mtu_probing > 1;
1355 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
1356 icsk->icsk_af_ops->net_header_len;
1357 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss);
1358 icsk->icsk_mtup.probe_size = 0;
1359}
1360EXPORT_SYMBOL(tcp_mtup_init);
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
1385{
1386 struct tcp_sock *tp = tcp_sk(sk);
1387 struct inet_connection_sock *icsk = inet_csk(sk);
1388 int mss_now;
1389
1390 if (icsk->icsk_mtup.search_high > pmtu)
1391 icsk->icsk_mtup.search_high = pmtu;
1392
1393 mss_now = tcp_mtu_to_mss(sk, pmtu);
1394 mss_now = tcp_bound_to_half_wnd(tp, mss_now);
1395
1396
1397 icsk->icsk_pmtu_cookie = pmtu;
1398 if (icsk->icsk_mtup.enabled)
1399 mss_now = min(mss_now, tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low));
1400 tp->mss_cache = mss_now;
1401
1402 return mss_now;
1403}
1404EXPORT_SYMBOL(tcp_sync_mss);
1405
1406
1407
1408
1409unsigned int tcp_current_mss(struct sock *sk)
1410{
1411 const struct tcp_sock *tp = tcp_sk(sk);
1412 const struct dst_entry *dst = __sk_dst_get(sk);
1413 u32 mss_now;
1414 unsigned int header_len;
1415 struct tcp_out_options opts;
1416 struct tcp_md5sig_key *md5;
1417
1418 mss_now = tp->mss_cache;
1419
1420 if (dst) {
1421 u32 mtu = dst_mtu(dst);
1422 if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
1423 mss_now = tcp_sync_mss(sk, mtu);
1424 }
1425
1426 header_len = tcp_established_options(sk, NULL, &opts, &md5) +
1427 sizeof(struct tcphdr);
1428
1429
1430
1431
1432 if (header_len != tp->tcp_header_len) {
1433 int delta = (int) header_len - tp->tcp_header_len;
1434 mss_now -= delta;
1435 }
1436
1437 return mss_now;
1438}
1439
1440
1441
1442
1443
1444static void tcp_cwnd_application_limited(struct sock *sk)
1445{
1446 struct tcp_sock *tp = tcp_sk(sk);
1447
1448 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
1449 sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
1450
1451 u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
1452 u32 win_used = max(tp->snd_cwnd_used, init_win);
1453 if (win_used < tp->snd_cwnd) {
1454 tp->snd_ssthresh = tcp_current_ssthresh(sk);
1455 tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
1456 }
1457 tp->snd_cwnd_used = 0;
1458 }
1459 tp->snd_cwnd_stamp = tcp_time_stamp;
1460}
1461
1462static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
1463{
1464 struct tcp_sock *tp = tcp_sk(sk);
1465
1466
1467
1468
1469 if (!before(tp->snd_una, tp->max_packets_seq) ||
1470 tp->packets_out > tp->max_packets_out) {
1471 tp->max_packets_out = tp->packets_out;
1472 tp->max_packets_seq = tp->snd_nxt;
1473 tp->is_cwnd_limited = is_cwnd_limited;
1474 }
1475
1476 if (tcp_is_cwnd_limited(sk)) {
1477
1478 tp->snd_cwnd_used = 0;
1479 tp->snd_cwnd_stamp = tcp_time_stamp;
1480 } else {
1481
1482 if (tp->packets_out > tp->snd_cwnd_used)
1483 tp->snd_cwnd_used = tp->packets_out;
1484
1485 if (sysctl_tcp_slow_start_after_idle &&
1486 (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
1487 tcp_cwnd_application_limited(sk);
1488 }
1489}
1490
1491
1492static bool tcp_minshall_check(const struct tcp_sock *tp)
1493{
1494 return after(tp->snd_sml, tp->snd_una) &&
1495 !after(tp->snd_sml, tp->snd_nxt);
1496}
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
1507 const struct sk_buff *skb)
1508{
1509 if (skb->len < tcp_skb_pcount(skb) * mss_now)
1510 tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
1511}
1512
1513
1514
1515
1516
1517
1518
1519
1520static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
1521 int nonagle)
1522{
1523 return partial &&
1524 ((nonagle & TCP_NAGLE_CORK) ||
1525 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1526}
1527
1528
1529
1530
1531static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now)
1532{
1533 u32 bytes, segs;
1534
1535 bytes = min(sk->sk_pacing_rate >> 10,
1536 sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
1537
1538
1539
1540
1541
1542
1543 segs = max_t(u32, bytes / mss_now, sysctl_tcp_min_tso_segs);
1544
1545 return min_t(u32, segs, sk->sk_gso_max_segs);
1546}
1547
1548
1549static unsigned int tcp_mss_split_point(const struct sock *sk,
1550 const struct sk_buff *skb,
1551 unsigned int mss_now,
1552 unsigned int max_segs,
1553 int nonagle)
1554{
1555 const struct tcp_sock *tp = tcp_sk(sk);
1556 u32 partial, needed, window, max_len;
1557
1558 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1559 max_len = mss_now * max_segs;
1560
1561 if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
1562 return max_len;
1563
1564 needed = min(skb->len, window);
1565
1566 if (max_len <= needed)
1567 return max_len;
1568
1569 partial = needed % mss_now;
1570
1571
1572
1573
1574 if (tcp_nagle_check(partial != 0, tp, nonagle))
1575 return needed - partial;
1576
1577 return needed;
1578}
1579
1580
1581
1582
1583static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
1584 const struct sk_buff *skb)
1585{
1586 u32 in_flight, cwnd, halfcwnd;
1587
1588
1589 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
1590 tcp_skb_pcount(skb) == 1)
1591 return 1;
1592
1593 in_flight = tcp_packets_in_flight(tp);
1594 cwnd = tp->snd_cwnd;
1595 if (in_flight >= cwnd)
1596 return 0;
1597
1598
1599
1600
1601 halfcwnd = max(cwnd >> 1, 1U);
1602 return min(halfcwnd, cwnd - in_flight);
1603}
1604
1605
1606
1607
1608
1609static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb,
1610 unsigned int mss_now)
1611{
1612 int tso_segs = tcp_skb_pcount(skb);
1613
1614 if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
1615 tcp_set_skb_tso_segs(sk, skb, mss_now);
1616 tso_segs = tcp_skb_pcount(skb);
1617 }
1618 return tso_segs;
1619}
1620
1621
1622
1623
1624
1625static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
1626 unsigned int cur_mss, int nonagle)
1627{
1628
1629
1630
1631
1632
1633
1634 if (nonagle & TCP_NAGLE_PUSH)
1635 return true;
1636
1637
1638 if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
1639 return true;
1640
1641 if (!tcp_nagle_check(skb->len < cur_mss, tp, nonagle))
1642 return true;
1643
1644 return false;
1645}
1646
1647
1648static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
1649 const struct sk_buff *skb,
1650 unsigned int cur_mss)
1651{
1652 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
1653
1654 if (skb->len > cur_mss)
1655 end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
1656
1657 return !after(end_seq, tcp_wnd_end(tp));
1658}
1659
1660
1661
1662
1663
1664static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
1665 unsigned int cur_mss, int nonagle)
1666{
1667 const struct tcp_sock *tp = tcp_sk(sk);
1668 unsigned int cwnd_quota;
1669
1670 tcp_init_tso_segs(sk, skb, cur_mss);
1671
1672 if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
1673 return 0;
1674
1675 cwnd_quota = tcp_cwnd_test(tp, skb);
1676 if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
1677 cwnd_quota = 0;
1678
1679 return cwnd_quota;
1680}
1681
1682
1683bool tcp_may_send_now(struct sock *sk)
1684{
1685 const struct tcp_sock *tp = tcp_sk(sk);
1686 struct sk_buff *skb = tcp_send_head(sk);
1687
1688 return skb &&
1689 tcp_snd_test(sk, skb, tcp_current_mss(sk),
1690 (tcp_skb_is_last(sk, skb) ?
1691 tp->nonagle : TCP_NAGLE_PUSH));
1692}
1693
1694
1695
1696
1697
1698
1699
1700
1701static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1702 unsigned int mss_now, gfp_t gfp)
1703{
1704 struct sk_buff *buff;
1705 int nlen = skb->len - len;
1706 u8 flags;
1707
1708
1709 if (skb->len != skb->data_len)
1710 return tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
1711 skb, len, mss_now);
1712
1713 buff = sk_stream_alloc_skb(sk, 0, gfp);
1714 if (unlikely(buff == NULL))
1715 return -ENOMEM;
1716
1717 sk->sk_wmem_queued += buff->truesize;
1718 sk_mem_charge(sk, buff->truesize);
1719 buff->truesize += nlen;
1720 skb->truesize -= nlen;
1721
1722
1723 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1724 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1725 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1726
1727
1728 flags = TCP_SKB_CB(skb)->tcp_flags;
1729 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1730 TCP_SKB_CB(buff)->tcp_flags = flags;
1731
1732
1733 TCP_SKB_CB(buff)->sacked = 0;
1734
1735 buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
1736 skb_split(skb, buff, len);
1737
1738
1739 tcp_set_skb_tso_segs(sk, skb, mss_now);
1740 tcp_set_skb_tso_segs(sk, buff, mss_now);
1741
1742
1743 __skb_header_release(buff);
1744 tcp_insert_write_queue_after(skb, buff, sk);
1745
1746 return 0;
1747}
1748
1749
1750
1751
1752
1753
1754static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1755 bool *is_cwnd_limited, u32 max_segs)
1756{
1757 struct tcp_sock *tp = tcp_sk(sk);
1758 const struct inet_connection_sock *icsk = inet_csk(sk);
1759 u32 send_win, cong_win, limit, in_flight;
1760 int win_divisor;
1761
1762 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1763 goto send_now;
1764
1765 if (icsk->icsk_ca_state != TCP_CA_Open)
1766 goto send_now;
1767
1768
1769 if (tp->tso_deferred &&
1770 (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1)
1771 goto send_now;
1772
1773 in_flight = tcp_packets_in_flight(tp);
1774
1775 BUG_ON(tcp_skb_pcount(skb) <= 1);
1776 BUG_ON(tp->snd_cwnd <= in_flight);
1777
1778 send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1779
1780
1781 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
1782
1783 limit = min(send_win, cong_win);
1784
1785
1786 if (limit >= max_segs * tp->mss_cache)
1787 goto send_now;
1788
1789
1790 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
1791 goto send_now;
1792
1793 win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor);
1794 if (win_divisor) {
1795 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
1796
1797
1798
1799
1800 chunk /= win_divisor;
1801 if (limit >= chunk)
1802 goto send_now;
1803 } else {
1804
1805
1806
1807
1808
1809 if (limit > tcp_max_tso_deferred_mss(tp) * tp->mss_cache)
1810 goto send_now;
1811 }
1812
1813
1814
1815
1816 if (!tp->tso_deferred)
1817 tp->tso_deferred = 1 | (jiffies << 1);
1818
1819 if (cong_win < send_win && cong_win < skb->len)
1820 *is_cwnd_limited = true;
1821
1822 return true;
1823
1824send_now:
1825 tp->tso_deferred = 0;
1826 return false;
1827}
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838static int tcp_mtu_probe(struct sock *sk)
1839{
1840 struct tcp_sock *tp = tcp_sk(sk);
1841 struct inet_connection_sock *icsk = inet_csk(sk);
1842 struct sk_buff *skb, *nskb, *next;
1843 int len;
1844 int probe_size;
1845 int size_needed;
1846 int copy;
1847 int mss_now;
1848
1849
1850
1851
1852
1853 if (!icsk->icsk_mtup.enabled ||
1854 icsk->icsk_mtup.probe_size ||
1855 inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
1856 tp->snd_cwnd < 11 ||
1857 tp->rx_opt.num_sacks || tp->rx_opt.dsack)
1858 return -1;
1859
1860
1861 mss_now = tcp_current_mss(sk);
1862 probe_size = 2 * tp->mss_cache;
1863 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
1864 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
1865
1866 return -1;
1867 }
1868
1869
1870 if (tp->write_seq - tp->snd_nxt < size_needed)
1871 return -1;
1872
1873 if (tp->snd_wnd < size_needed)
1874 return -1;
1875 if (after(tp->snd_nxt + size_needed, tcp_wnd_end(tp)))
1876 return 0;
1877
1878
1879 if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
1880 if (!tcp_packets_in_flight(tp))
1881 return -1;
1882 else
1883 return 0;
1884 }
1885
1886
1887 if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL)
1888 return -1;
1889 sk->sk_wmem_queued += nskb->truesize;
1890 sk_mem_charge(sk, nskb->truesize);
1891
1892 skb = tcp_send_head(sk);
1893
1894 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
1895 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
1896 TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
1897 TCP_SKB_CB(nskb)->sacked = 0;
1898 nskb->csum = 0;
1899 nskb->ip_summed = skb->ip_summed;
1900
1901 tcp_insert_write_queue_before(nskb, skb, sk);
1902 tcp_highest_sack_replace(sk, skb, nskb);
1903
1904 len = 0;
1905 tcp_for_write_queue_from_safe(skb, next, sk) {
1906 copy = min_t(int, skb->len, probe_size - len);
1907 if (nskb->ip_summed)
1908 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
1909 else
1910 nskb->csum = skb_copy_and_csum_bits(skb, 0,
1911 skb_put(nskb, copy),
1912 copy, nskb->csum);
1913
1914 if (skb->len <= copy) {
1915
1916
1917 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1918 tcp_unlink_write_queue(skb, sk);
1919 sk_wmem_free_skb(sk, skb);
1920 } else {
1921 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
1922 ~(TCPHDR_FIN|TCPHDR_PSH);
1923 if (!skb_shinfo(skb)->nr_frags) {
1924 skb_pull(skb, copy);
1925 if (skb->ip_summed != CHECKSUM_PARTIAL)
1926 skb->csum = csum_partial(skb->data,
1927 skb->len, 0);
1928 } else {
1929 __pskb_trim_head(skb, copy);
1930 tcp_set_skb_tso_segs(sk, skb, mss_now);
1931 }
1932 TCP_SKB_CB(skb)->seq += copy;
1933 }
1934
1935 len += copy;
1936
1937 if (len >= probe_size)
1938 break;
1939 }
1940 tcp_init_tso_segs(sk, nskb, nskb->len);
1941
1942
1943
1944
1945 if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
1946
1947
1948 tp->snd_cwnd--;
1949 tcp_event_new_data_sent(sk, nskb);
1950
1951 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
1952 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
1953 tp->mtu_probe.probe_seq_end = TCP_SKB_CB(nskb)->end_seq;
1954
1955 return 1;
1956 }
1957
1958 return -1;
1959}
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1976 int push_one, gfp_t gfp)
1977{
1978 struct tcp_sock *tp = tcp_sk(sk);
1979 struct sk_buff *skb;
1980 unsigned int tso_segs, sent_pkts;
1981 int cwnd_quota;
1982 int result;
1983 bool is_cwnd_limited = false;
1984 u32 max_segs;
1985
1986 sent_pkts = 0;
1987
1988 if (!push_one) {
1989
1990 result = tcp_mtu_probe(sk);
1991 if (!result) {
1992 return false;
1993 } else if (result > 0) {
1994 sent_pkts = 1;
1995 }
1996 }
1997
1998 max_segs = tcp_tso_autosize(sk, mss_now);
1999 while ((skb = tcp_send_head(sk))) {
2000 unsigned int limit;
2001
2002 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
2003 BUG_ON(!tso_segs);
2004
2005 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
2006
2007 skb_mstamp_get(&skb->skb_mstamp);
2008 goto repair;
2009 }
2010
2011 cwnd_quota = tcp_cwnd_test(tp, skb);
2012 if (!cwnd_quota) {
2013 is_cwnd_limited = true;
2014 if (push_one == 2)
2015
2016 cwnd_quota = 1;
2017 else
2018 break;
2019 }
2020
2021 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
2022 break;
2023
2024 if (tso_segs == 1 || !max_segs) {
2025 if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
2026 (tcp_skb_is_last(sk, skb) ?
2027 nonagle : TCP_NAGLE_PUSH))))
2028 break;
2029 } else {
2030 if (!push_one &&
2031 tcp_tso_should_defer(sk, skb, &is_cwnd_limited,
2032 max_segs))
2033 break;
2034 }
2035
2036 limit = mss_now;
2037 if (tso_segs > 1 && max_segs && !tcp_urg_mode(tp))
2038 limit = tcp_mss_split_point(sk, skb, mss_now,
2039 min_t(unsigned int,
2040 cwnd_quota,
2041 max_segs),
2042 nonagle);
2043
2044 if (skb->len > limit &&
2045 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
2046 break;
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058 limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
2059 limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
2060
2061 if (atomic_read(&sk->sk_wmem_alloc) > limit) {
2062 set_bit(TSQ_THROTTLED, &tp->tsq_flags);
2063
2064
2065
2066
2067
2068
2069 smp_mb__after_clear_bit();
2070 if (atomic_read(&sk->sk_wmem_alloc) > limit)
2071 break;
2072 }
2073
2074 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
2075 break;
2076
2077repair:
2078
2079
2080
2081 tcp_event_new_data_sent(sk, skb);
2082
2083 tcp_minshall_update(tp, mss_now, skb);
2084 sent_pkts += tcp_skb_pcount(skb);
2085
2086 if (push_one)
2087 break;
2088 }
2089
2090 if (likely(sent_pkts)) {
2091 if (tcp_in_cwnd_reduction(sk))
2092 tp->prr_out += sent_pkts;
2093
2094
2095 if (push_one != 2)
2096 tcp_schedule_loss_probe(sk);
2097 tcp_cwnd_validate(sk, is_cwnd_limited);
2098 return false;
2099 }
2100 return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));
2101}
2102
2103bool tcp_schedule_loss_probe(struct sock *sk)
2104{
2105 struct inet_connection_sock *icsk = inet_csk(sk);
2106 struct tcp_sock *tp = tcp_sk(sk);
2107 u32 timeout, tlp_time_stamp, rto_time_stamp;
2108 u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
2109
2110 if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS))
2111 return false;
2112
2113 if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
2114 tcp_rearm_rto(sk);
2115 return false;
2116 }
2117
2118
2119
2120 if (sk->sk_state == TCP_SYN_RECV)
2121 return false;
2122
2123
2124 if (icsk->icsk_pending != ICSK_TIME_RETRANS)
2125 return false;
2126
2127
2128
2129
2130 if (sysctl_tcp_early_retrans < 3 || !tp->srtt_us || !tp->packets_out ||
2131 !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
2132 return false;
2133
2134 if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
2135 tcp_send_head(sk))
2136 return false;
2137
2138
2139
2140
2141 timeout = rtt << 1;
2142 if (tp->packets_out == 1)
2143 timeout = max_t(u32, timeout,
2144 (rtt + (rtt >> 1) + TCP_DELACK_MAX));
2145 timeout = max_t(u32, timeout, msecs_to_jiffies(10));
2146
2147
2148 tlp_time_stamp = tcp_time_stamp + timeout;
2149 rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout;
2150 if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) {
2151 s32 delta = rto_time_stamp - tcp_time_stamp;
2152 if (delta > 0)
2153 timeout = delta;
2154 }
2155
2156 inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
2157 TCP_RTO_MAX);
2158 return true;
2159}
2160
2161
2162
2163
2164
2165
2166static bool skb_still_in_host_queue(const struct sock *sk,
2167 const struct sk_buff *skb)
2168{
2169 if (unlikely(skb_fclone_busy(sk, skb))) {
2170 NET_INC_STATS_BH(sock_net(sk),
2171 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
2172 return true;
2173 }
2174 return false;
2175}
2176
2177
2178
2179
2180void tcp_send_loss_probe(struct sock *sk)
2181{
2182 struct tcp_sock *tp = tcp_sk(sk);
2183 struct sk_buff *skb;
2184 int pcount;
2185 int mss = tcp_current_mss(sk);
2186 int err = -1;
2187
2188 if (tcp_send_head(sk) != NULL) {
2189 err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
2190 goto rearm_timer;
2191 }
2192
2193
2194 if (tp->tlp_high_seq)
2195 goto rearm_timer;
2196
2197
2198 skb = tcp_write_queue_tail(sk);
2199 if (WARN_ON(!skb))
2200 goto rearm_timer;
2201
2202 if (skb_still_in_host_queue(sk, skb))
2203 goto rearm_timer;
2204
2205 pcount = tcp_skb_pcount(skb);
2206 if (WARN_ON(!pcount))
2207 goto rearm_timer;
2208
2209 if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
2210 if (unlikely(tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
2211 (pcount - 1) * mss, mss)))
2212 goto rearm_timer;
2213 skb = tcp_write_queue_tail(sk);
2214 }
2215
2216 if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
2217 goto rearm_timer;
2218
2219 err = __tcp_retransmit_skb(sk, skb);
2220
2221
2222 if (likely(!err))
2223 tp->tlp_high_seq = tp->snd_nxt;
2224
2225rearm_timer:
2226 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2227 inet_csk(sk)->icsk_rto,
2228 TCP_RTO_MAX);
2229
2230 if (likely(!err))
2231 NET_INC_STATS_BH(sock_net(sk),
2232 LINUX_MIB_TCPLOSSPROBES);
2233 return;
2234}
2235
2236
2237
2238
2239
2240void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
2241 int nonagle)
2242{
2243
2244
2245
2246
2247 if (unlikely(sk->sk_state == TCP_CLOSE))
2248 return;
2249
2250 if (tcp_write_xmit(sk, cur_mss, nonagle, 0,
2251 sk_gfp_atomic(sk, GFP_ATOMIC)))
2252 tcp_check_probe_timer(sk);
2253}
2254
2255
2256
2257
2258void tcp_push_one(struct sock *sk, unsigned int mss_now)
2259{
2260 struct sk_buff *skb = tcp_send_head(sk);
2261
2262 BUG_ON(!skb || skb->len < mss_now);
2263
2264 tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation);
2265}
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319u32 __tcp_select_window(struct sock *sk)
2320{
2321 struct inet_connection_sock *icsk = inet_csk(sk);
2322 struct tcp_sock *tp = tcp_sk(sk);
2323
2324
2325
2326
2327
2328
2329 int mss = icsk->icsk_ack.rcv_mss;
2330 int free_space = tcp_space(sk);
2331 int allowed_space = tcp_full_space(sk);
2332 int full_space = min_t(int, tp->window_clamp, allowed_space);
2333 int window;
2334
2335 if (unlikely(mss > full_space)) {
2336 mss = full_space;
2337 if (mss <= 0)
2338 return 0;
2339 }
2340 if (free_space < (full_space >> 1)) {
2341 icsk->icsk_ack.quick = 0;
2342
2343 if (tcp_under_memory_pressure(sk))
2344 tp->rcv_ssthresh = min(tp->rcv_ssthresh,
2345 4U * tp->advmss);
2346
2347
2348
2349
2350 free_space = round_down(free_space, 1 << tp->rx_opt.rcv_wscale);
2351
2352
2353
2354
2355
2356
2357
2358
2359 if (free_space < (allowed_space >> 4) || free_space < mss)
2360 return 0;
2361 }
2362
2363 if (free_space > tp->rcv_ssthresh)
2364 free_space = tp->rcv_ssthresh;
2365
2366
2367
2368
2369 window = tp->rcv_wnd;
2370 if (tp->rx_opt.rcv_wscale) {
2371 window = free_space;
2372
2373
2374
2375
2376
2377 if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window)
2378 window = (((window >> tp->rx_opt.rcv_wscale) + 1)
2379 << tp->rx_opt.rcv_wscale);
2380 } else {
2381
2382
2383
2384
2385
2386
2387
2388
2389 if (window <= free_space - mss || window > free_space)
2390 window = (free_space / mss) * mss;
2391 else if (mss == full_space &&
2392 free_space > window + (full_space >> 1))
2393 window = free_space;
2394 }
2395
2396 return window;
2397}
2398
2399
2400static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2401{
2402 struct tcp_sock *tp = tcp_sk(sk);
2403 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
2404 int skb_size, next_skb_size;
2405
2406 skb_size = skb->len;
2407 next_skb_size = next_skb->len;
2408
2409 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
2410
2411 tcp_highest_sack_replace(sk, next_skb, skb);
2412
2413 tcp_unlink_write_queue(next_skb, sk);
2414
2415 skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size),
2416 next_skb_size);
2417
2418 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
2419 skb->ip_summed = CHECKSUM_PARTIAL;
2420
2421 if (skb->ip_summed != CHECKSUM_PARTIAL)
2422 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
2423
2424
2425 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
2426
2427
2428 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags;
2429
2430
2431
2432
2433 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
2434
2435
2436 tcp_clear_retrans_hints_partial(tp);
2437 if (next_skb == tp->retransmit_skb_hint)
2438 tp->retransmit_skb_hint = skb;
2439
2440 tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb));
2441
2442 sk_wmem_free_skb(sk, next_skb);
2443}
2444
2445
2446static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
2447{
2448 if (tcp_skb_pcount(skb) > 1)
2449 return false;
2450
2451 if (skb_shinfo(skb)->nr_frags != 0)
2452 return false;
2453 if (skb_cloned(skb))
2454 return false;
2455 if (skb == tcp_send_head(sk))
2456 return false;
2457
2458 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2459 return false;
2460
2461 return true;
2462}
2463
2464
2465
2466
2467static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2468 int space)
2469{
2470 struct tcp_sock *tp = tcp_sk(sk);
2471 struct sk_buff *skb = to, *tmp;
2472 bool first = true;
2473
2474 if (!sysctl_tcp_retrans_collapse)
2475 return;
2476 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2477 return;
2478
2479 tcp_for_write_queue_from_safe(skb, tmp, sk) {
2480 if (!tcp_can_collapse(sk, skb))
2481 break;
2482
2483 space -= skb->len;
2484
2485 if (first) {
2486 first = false;
2487 continue;
2488 }
2489
2490 if (space < 0)
2491 break;
2492
2493
2494
2495 if (skb->len > skb_availroom(to))
2496 break;
2497
2498 if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
2499 break;
2500
2501 tcp_collapse_retrans(sk, to);
2502 }
2503}
2504
2505
2506
2507
2508
2509int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2510{
2511 struct tcp_sock *tp = tcp_sk(sk);
2512 struct inet_connection_sock *icsk = inet_csk(sk);
2513 unsigned int cur_mss;
2514 int err;
2515
2516
2517 if (icsk->icsk_mtup.probe_size) {
2518 icsk->icsk_mtup.probe_size = 0;
2519 }
2520
2521
2522
2523
2524 if (atomic_read(&sk->sk_wmem_alloc) >
2525 min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
2526 return -EAGAIN;
2527
2528 if (skb_still_in_host_queue(sk, skb))
2529 return -EBUSY;
2530
2531 if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
2532 if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) {
2533 WARN_ON_ONCE(1);
2534 return -EINVAL;
2535 }
2536 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
2537 return -ENOMEM;
2538 }
2539
2540 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
2541 return -EHOSTUNREACH;
2542
2543 cur_mss = tcp_current_mss(sk);
2544
2545
2546
2547
2548
2549
2550 if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) &&
2551 TCP_SKB_CB(skb)->seq != tp->snd_una)
2552 return -EAGAIN;
2553
2554 if (skb->len > cur_mss) {
2555 if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
2556 cur_mss, cur_mss))
2557 return -ENOMEM;
2558 } else {
2559 int oldpcount = tcp_skb_pcount(skb);
2560
2561 if (unlikely(oldpcount > 1)) {
2562 if (skb_unclone(skb, GFP_ATOMIC))
2563 return -ENOMEM;
2564 tcp_init_tso_segs(sk, skb, cur_mss);
2565 tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
2566 }
2567 }
2568
2569 tcp_retrans_try_collapse(sk, skb, cur_mss);
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579 if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) ||
2580 skb_headroom(skb) >= 0xFFFF)) {
2581 struct sk_buff *nskb;
2582
2583 skb_mstamp_get(&skb->skb_mstamp);
2584 nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
2585 err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
2586 -ENOBUFS;
2587 } else {
2588 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2589 }
2590
2591 if (likely(!err)) {
2592 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
2593
2594 TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
2595 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2596 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
2597 tp->total_retrans++;
2598 }
2599 return err;
2600}
2601
2602int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2603{
2604 struct tcp_sock *tp = tcp_sk(sk);
2605 int err = __tcp_retransmit_skb(sk, skb);
2606
2607 if (err == 0) {
2608#if FASTRETRANS_DEBUG > 0
2609 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2610 net_dbg_ratelimited("retrans_out leaked\n");
2611 }
2612#endif
2613 if (!tp->retrans_out)
2614 tp->lost_retrans_low = tp->snd_nxt;
2615 TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
2616 tp->retrans_out += tcp_skb_pcount(skb);
2617
2618
2619 if (!tp->retrans_stamp)
2620 tp->retrans_stamp = tcp_skb_timestamp(skb);
2621
2622
2623
2624
2625 TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
2626 } else if (err != -EBUSY) {
2627 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
2628 }
2629
2630 if (tp->undo_retrans < 0)
2631 tp->undo_retrans = 0;
2632 tp->undo_retrans += tcp_skb_pcount(skb);
2633 return err;
2634}
2635
2636
2637
2638
2639static bool tcp_can_forward_retransmit(struct sock *sk)
2640{
2641 const struct inet_connection_sock *icsk = inet_csk(sk);
2642 const struct tcp_sock *tp = tcp_sk(sk);
2643
2644
2645 if (icsk->icsk_ca_state != TCP_CA_Recovery)
2646 return false;
2647
2648
2649 if (tcp_is_reno(tp))
2650 return false;
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660 if (tcp_may_send_now(sk))
2661 return false;
2662
2663 return true;
2664}
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674void tcp_xmit_retransmit_queue(struct sock *sk)
2675{
2676 const struct inet_connection_sock *icsk = inet_csk(sk);
2677 struct tcp_sock *tp = tcp_sk(sk);
2678 struct sk_buff *skb;
2679 struct sk_buff *hole = NULL;
2680 u32 last_lost;
2681 int mib_idx;
2682 int fwd_rexmitting = 0;
2683
2684 if (!tp->packets_out)
2685 return;
2686
2687 if (!tp->lost_out)
2688 tp->retransmit_high = tp->snd_una;
2689
2690 if (tp->retransmit_skb_hint) {
2691 skb = tp->retransmit_skb_hint;
2692 last_lost = TCP_SKB_CB(skb)->end_seq;
2693 if (after(last_lost, tp->retransmit_high))
2694 last_lost = tp->retransmit_high;
2695 } else {
2696 skb = tcp_write_queue_head(sk);
2697 last_lost = tp->snd_una;
2698 }
2699
2700 tcp_for_write_queue_from(skb, sk) {
2701 __u8 sacked = TCP_SKB_CB(skb)->sacked;
2702
2703 if (skb == tcp_send_head(sk))
2704 break;
2705
2706 if (hole == NULL)
2707 tp->retransmit_skb_hint = skb;
2708
2709
2710
2711
2712
2713
2714
2715
2716 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
2717 return;
2718
2719 if (fwd_rexmitting) {
2720begin_fwd:
2721 if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
2722 break;
2723 mib_idx = LINUX_MIB_TCPFORWARDRETRANS;
2724
2725 } else if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high)) {
2726 tp->retransmit_high = last_lost;
2727 if (!tcp_can_forward_retransmit(sk))
2728 break;
2729
2730 if (hole != NULL) {
2731 skb = hole;
2732 hole = NULL;
2733 }
2734 fwd_rexmitting = 1;
2735 goto begin_fwd;
2736
2737 } else if (!(sacked & TCPCB_LOST)) {
2738 if (hole == NULL && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
2739 hole = skb;
2740 continue;
2741
2742 } else {
2743 last_lost = TCP_SKB_CB(skb)->end_seq;
2744 if (icsk->icsk_ca_state != TCP_CA_Loss)
2745 mib_idx = LINUX_MIB_TCPFASTRETRANS;
2746 else
2747 mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
2748 }
2749
2750 if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
2751 continue;
2752
2753 if (tcp_retransmit_skb(sk, skb))
2754 return;
2755
2756 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2757
2758 if (tcp_in_cwnd_reduction(sk))
2759 tp->prr_out += tcp_skb_pcount(skb);
2760
2761 if (skb == tcp_write_queue_head(sk))
2762 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2763 inet_csk(sk)->icsk_rto,
2764 TCP_RTO_MAX);
2765 }
2766}
2767
2768
2769
2770
2771
2772
2773
2774void sk_forced_mem_schedule(struct sock *sk, int size)
2775{
2776 int amt, status;
2777
2778 if (size <= sk->sk_forward_alloc)
2779 return;
2780 amt = sk_mem_pages(size);
2781 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
2782 sk_memory_allocated_add(sk, amt, &status);
2783}
2784
2785
2786
2787
2788void tcp_send_fin(struct sock *sk)
2789{
2790 struct tcp_sock *tp = tcp_sk(sk);
2791 struct sk_buff *skb = tcp_write_queue_tail(sk);
2792 int mss_now;
2793
2794
2795
2796
2797
2798 mss_now = tcp_current_mss(sk);
2799
2800 if (tcp_send_head(sk) != NULL) {
2801 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN;
2802 TCP_SKB_CB(skb)->end_seq++;
2803 tp->write_seq++;
2804 } else {
2805
2806 for (;;) {
2807 skb = alloc_skb_fclone(MAX_TCP_HEADER,
2808 sk->sk_allocation);
2809 if (skb)
2810 break;
2811 yield();
2812 }
2813
2814
2815 skb_reserve(skb, MAX_TCP_HEADER);
2816
2817 tcp_init_nondata_skb(skb, tp->write_seq,
2818 TCPHDR_ACK | TCPHDR_FIN);
2819 tcp_queue_skb(sk, skb);
2820 }
2821 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
2822}
2823
2824
2825
2826
2827
2828
2829void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2830{
2831 struct sk_buff *skb;
2832
2833
2834 skb = alloc_skb(MAX_TCP_HEADER, priority);
2835 if (!skb) {
2836 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2837 return;
2838 }
2839
2840
2841 skb_reserve(skb, MAX_TCP_HEADER);
2842 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
2843 TCPHDR_ACK | TCPHDR_RST);
2844 skb_mstamp_get(&skb->skb_mstamp);
2845
2846 if (tcp_transmit_skb(sk, skb, 0, priority))
2847 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2848
2849 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
2850}
2851
2852
2853
2854
2855
2856
2857
2858int tcp_send_synack(struct sock *sk)
2859{
2860 struct sk_buff *skb;
2861
2862 skb = tcp_write_queue_head(sk);
2863 if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
2864 pr_debug("%s: wrong queue state\n", __func__);
2865 return -EFAULT;
2866 }
2867 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
2868 if (skb_cloned(skb)) {
2869 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
2870 if (nskb == NULL)
2871 return -ENOMEM;
2872 tcp_unlink_write_queue(skb, sk);
2873 __skb_header_release(nskb);
2874 __tcp_add_write_queue_head(sk, nskb);
2875 sk_wmem_free_skb(sk, skb);
2876 sk->sk_wmem_queued += nskb->truesize;
2877 sk_mem_charge(sk, nskb->truesize);
2878 skb = nskb;
2879 }
2880
2881 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK;
2882 TCP_ECN_send_synack(sk, skb);
2883 }
2884 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2885}
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2897 struct request_sock *req,
2898 struct tcp_fastopen_cookie *foc)
2899{
2900 struct tcp_out_options opts;
2901 struct inet_request_sock *ireq = inet_rsk(req);
2902 struct tcp_sock *tp = tcp_sk(sk);
2903 struct tcphdr *th;
2904 struct sk_buff *skb;
2905 struct tcp_md5sig_key *md5;
2906 int tcp_header_size;
2907 int mss;
2908
2909 skb = sock_wmalloc(sk, MAX_TCP_HEADER, 1, GFP_ATOMIC);
2910 if (unlikely(!skb)) {
2911 dst_release(dst);
2912 return NULL;
2913 }
2914
2915 skb_reserve(skb, MAX_TCP_HEADER);
2916
2917 skb_dst_set(skb, dst);
2918 security_skb_owned_by(skb, sk);
2919
2920 mss = dst_metric_advmss(dst);
2921 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
2922 mss = tp->rx_opt.user_mss;
2923
2924 memset(&opts, 0, sizeof(opts));
2925#ifdef CONFIG_SYN_COOKIES
2926 if (unlikely(req->cookie_ts))
2927 skb->skb_mstamp.stamp_jiffies = cookie_init_timestamp(req);
2928 else
2929#endif
2930 skb_mstamp_get(&skb->skb_mstamp);
2931 tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5,
2932 foc) + sizeof(*th);
2933
2934 skb_push(skb, tcp_header_size);
2935 skb_reset_transport_header(skb);
2936
2937 th = tcp_hdr(skb);
2938 memset(th, 0, sizeof(struct tcphdr));
2939 th->syn = 1;
2940 th->ack = 1;
2941 TCP_ECN_make_synack(req, th, sk);
2942 th->source = htons(ireq->ir_num);
2943 th->dest = ireq->ir_rmt_port;
2944
2945
2946
2947 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
2948 TCPHDR_SYN | TCPHDR_ACK);
2949
2950 th->seq = htonl(TCP_SKB_CB(skb)->seq);
2951
2952 th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
2953
2954
2955 th->window = htons(min(req->rcv_wnd, 65535U));
2956 tcp_options_write((__be32 *)(th + 1), tp, &opts);
2957 th->doff = (tcp_header_size >> 2);
2958 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_OUTSEGS);
2959
2960#ifdef CONFIG_TCP_MD5SIG
2961
2962 if (md5) {
2963 tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
2964 md5, NULL, req, skb);
2965 }
2966#endif
2967
2968
2969 skb->tstamp.tv64 = 0;
2970 return skb;
2971}
2972EXPORT_SYMBOL(tcp_make_synack);
2973
2974static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst)
2975{
2976 struct inet_connection_sock *icsk = inet_csk(sk);
2977 const struct tcp_congestion_ops *ca;
2978 u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
2979
2980 if (ca_key == TCP_CA_UNSPEC)
2981 return;
2982
2983 rcu_read_lock();
2984 ca = tcp_ca_find_key(ca_key);
2985 if (likely(ca && try_module_get(ca->owner))) {
2986 module_put(icsk->icsk_ca_ops->owner);
2987 icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst);
2988 icsk->icsk_ca_ops = ca;
2989 }
2990 rcu_read_unlock();
2991}
2992
2993
2994static void tcp_connect_init(struct sock *sk)
2995{
2996 const struct dst_entry *dst = __sk_dst_get(sk);
2997 struct tcp_sock *tp = tcp_sk(sk);
2998 __u8 rcv_wscale;
2999
3000
3001
3002
3003 tp->tcp_header_len = sizeof(struct tcphdr) +
3004 (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
3005
3006#ifdef CONFIG_TCP_MD5SIG
3007 if (tp->af_specific->md5_lookup(sk, sk) != NULL)
3008 tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
3009#endif
3010
3011
3012 if (tp->rx_opt.user_mss)
3013 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
3014 tp->max_window = 0;
3015 tcp_mtup_init(sk);
3016 tcp_sync_mss(sk, dst_mtu(dst));
3017
3018 tcp_ca_dst_init(sk, dst);
3019
3020 if (!tp->window_clamp)
3021 tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
3022 tp->advmss = dst_metric_advmss(dst);
3023 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss)
3024 tp->advmss = tp->rx_opt.user_mss;
3025
3026 tcp_initialize_rcv_mss(sk);
3027
3028
3029 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
3030 (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
3031 tp->window_clamp = tcp_full_space(sk);
3032
3033 tcp_select_initial_window(tcp_full_space(sk),
3034 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
3035 &tp->rcv_wnd,
3036 &tp->window_clamp,
3037 sysctl_tcp_window_scaling,
3038 &rcv_wscale,
3039 dst_metric(dst, RTAX_INITRWND));
3040
3041 tp->rx_opt.rcv_wscale = rcv_wscale;
3042 tp->rcv_ssthresh = tp->rcv_wnd;
3043
3044 sk->sk_err = 0;
3045 sock_reset_flag(sk, SOCK_DONE);
3046 tp->snd_wnd = 0;
3047 tcp_init_wl(tp, 0);
3048 tcp_write_queue_purge(sk);
3049 tp->snd_una = tp->write_seq;
3050 tp->snd_sml = tp->write_seq;
3051 tp->snd_up = tp->write_seq;
3052 tp->snd_nxt = tp->write_seq;
3053
3054 if (likely(!tp->repair))
3055 tp->rcv_nxt = 0;
3056 else
3057 tp->rcv_tstamp = tcp_time_stamp;
3058 tp->rcv_wup = tp->rcv_nxt;
3059 tp->copied_seq = tp->rcv_nxt;
3060
3061 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
3062 inet_csk(sk)->icsk_retransmits = 0;
3063 tcp_clear_retrans(tp);
3064}
3065
3066static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
3067{
3068 struct tcp_sock *tp = tcp_sk(sk);
3069 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
3070
3071 tcb->end_seq += skb->len;
3072 __skb_header_release(skb);
3073 __tcp_add_write_queue_tail(sk, skb);
3074 sk->sk_wmem_queued += skb->truesize;
3075 sk_mem_charge(sk, skb->truesize);
3076 tp->write_seq = tcb->end_seq;
3077 tp->packets_out += tcp_skb_pcount(skb);
3078}
3079
3080
3081
3082
3083
3084
3085
3086
3087static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3088{
3089 struct tcp_sock *tp = tcp_sk(sk);
3090 struct tcp_fastopen_request *fo = tp->fastopen_req;
3091 int syn_loss = 0, space, i, err = 0, iovlen = fo->data->msg_iovlen;
3092 struct sk_buff *syn_data = NULL, *data;
3093 unsigned long last_syn_loss = 0;
3094
3095 tp->rx_opt.mss_clamp = tp->advmss;
3096 tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie,
3097 &syn_loss, &last_syn_loss);
3098
3099 if (syn_loss > 1 &&
3100 time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) {
3101 fo->cookie.len = -1;
3102 goto fallback;
3103 }
3104
3105 if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE)
3106 fo->cookie.len = -1;
3107 else if (fo->cookie.len <= 0)
3108 goto fallback;
3109
3110
3111
3112
3113
3114 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->rx_opt.mss_clamp)
3115 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
3116 space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
3117 MAX_TCP_OPTION_SPACE;
3118
3119 space = min_t(size_t, space, fo->size);
3120
3121
3122 space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));
3123
3124 syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space,
3125 sk->sk_allocation);
3126 if (syn_data == NULL)
3127 goto fallback;
3128
3129 for (i = 0; i < iovlen && syn_data->len < space; ++i) {
3130 struct iovec *iov = &fo->data->msg_iov[i];
3131 unsigned char __user *from = iov->iov_base;
3132 int len = iov->iov_len;
3133
3134 if (syn_data->len + len > space)
3135 len = space - syn_data->len;
3136 else if (i + 1 == iovlen)
3137
3138 fo->data = NULL;
3139
3140 if (skb_add_data(syn_data, from, len))
3141 goto fallback;
3142 }
3143
3144
3145 data = pskb_copy(syn_data, sk->sk_allocation);
3146 if (data == NULL)
3147 goto fallback;
3148 TCP_SKB_CB(data)->seq++;
3149 TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN;
3150 TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH);
3151 tcp_connect_queue_skb(sk, data);
3152 fo->copied = data->len;
3153
3154
3155
3156
3157 skb_mstamp_get(&syn->skb_mstamp);
3158 data->skb_mstamp = syn->skb_mstamp;
3159
3160 if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) {
3161 tp->syn_data = (fo->copied > 0);
3162 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
3163 goto done;
3164 }
3165
3166
3167 sk->sk_send_head = data;
3168 tp->packets_out -= tcp_skb_pcount(data);
3169
3170 syn_data = NULL;
3171
3172fallback:
3173
3174 if (fo->cookie.len > 0)
3175 fo->cookie.len = 0;
3176 err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation);
3177 if (err)
3178 tp->syn_fastopen = 0;
3179 kfree_skb(syn_data);
3180done:
3181 fo->cookie.len = -1;
3182 return err;
3183}
3184
3185
3186int tcp_connect(struct sock *sk)
3187{
3188 struct tcp_sock *tp = tcp_sk(sk);
3189 struct sk_buff *buff;
3190 int err;
3191
3192 tcp_connect_init(sk);
3193
3194 if (unlikely(tp->repair)) {
3195 tcp_finish_connect(sk, NULL);
3196 return 0;
3197 }
3198
3199 buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
3200 if (unlikely(buff == NULL))
3201 return -ENOBUFS;
3202
3203
3204 skb_reserve(buff, MAX_TCP_HEADER);
3205
3206 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
3207 tp->retrans_stamp = tcp_time_stamp;
3208 tcp_connect_queue_skb(sk, buff);
3209 TCP_ECN_send_syn(sk, buff);
3210
3211
3212 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
3213 tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
3214 if (err == -ECONNREFUSED)
3215 return err;
3216
3217
3218
3219
3220 tp->snd_nxt = tp->write_seq;
3221 tp->pushed_seq = tp->write_seq;
3222 buff = tcp_send_head(sk);
3223 if (unlikely(buff)) {
3224 tp->snd_nxt = TCP_SKB_CB(buff)->seq;
3225 tp->pushed_seq = TCP_SKB_CB(buff)->seq;
3226 }
3227 TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
3228
3229
3230 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3231 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
3232 return 0;
3233}
3234EXPORT_SYMBOL(tcp_connect);
3235
3236
3237
3238
3239
3240void tcp_send_delayed_ack(struct sock *sk)
3241{
3242 struct inet_connection_sock *icsk = inet_csk(sk);
3243 int ato = icsk->icsk_ack.ato;
3244 unsigned long timeout;
3245
3246 tcp_ca_event(sk, CA_EVENT_DELAYED_ACK);
3247
3248 if (ato > TCP_DELACK_MIN) {
3249 const struct tcp_sock *tp = tcp_sk(sk);
3250 int max_ato = HZ / 2;
3251
3252 if (icsk->icsk_ack.pingpong ||
3253 (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
3254 max_ato = TCP_DELACK_MAX;
3255
3256
3257
3258
3259
3260
3261
3262 if (tp->srtt_us) {
3263 int rtt = max_t(int, usecs_to_jiffies(tp->srtt_us >> 3),
3264 TCP_DELACK_MIN);
3265
3266 if (rtt < max_ato)
3267 max_ato = rtt;
3268 }
3269
3270 ato = min(ato, max_ato);
3271 }
3272
3273
3274 timeout = jiffies + ato;
3275
3276
3277 if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
3278
3279
3280
3281 if (icsk->icsk_ack.blocked ||
3282 time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
3283 tcp_send_ack(sk);
3284 return;
3285 }
3286
3287 if (!time_before(timeout, icsk->icsk_ack.timeout))
3288 timeout = icsk->icsk_ack.timeout;
3289 }
3290 icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
3291 icsk->icsk_ack.timeout = timeout;
3292 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
3293}
3294
3295
3296void tcp_send_ack(struct sock *sk)
3297{
3298 struct sk_buff *buff;
3299
3300
3301 if (sk->sk_state == TCP_CLOSE)
3302 return;
3303
3304 tcp_ca_event(sk, CA_EVENT_NON_DELAYED_ACK);
3305
3306
3307
3308
3309
3310 buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
3311 if (buff == NULL) {
3312 inet_csk_schedule_ack(sk);
3313 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
3314 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
3315 TCP_DELACK_MAX, TCP_RTO_MAX);
3316 return;
3317 }
3318
3319
3320 skb_reserve(buff, MAX_TCP_HEADER);
3321 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
3322
3323
3324
3325
3326
3327
3328
3329 skb_set_tcp_pure_ack(buff);
3330
3331
3332 skb_mstamp_get(&buff->skb_mstamp);
3333 tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));
3334}
3335EXPORT_SYMBOL_GPL(tcp_send_ack);
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
3349{
3350 struct tcp_sock *tp = tcp_sk(sk);
3351 struct sk_buff *skb;
3352
3353
3354 skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
3355 if (skb == NULL)
3356 return -1;
3357
3358
3359 skb_reserve(skb, MAX_TCP_HEADER);
3360
3361
3362
3363
3364 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
3365 skb_mstamp_get(&skb->skb_mstamp);
3366 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
3367}
3368
3369void tcp_send_window_probe(struct sock *sk)
3370{
3371 if (sk->sk_state == TCP_ESTABLISHED) {
3372 tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
3373 tcp_xmit_probe_skb(sk, 0);
3374 }
3375}
3376
3377
3378int tcp_write_wakeup(struct sock *sk)
3379{
3380 struct tcp_sock *tp = tcp_sk(sk);
3381 struct sk_buff *skb;
3382
3383 if (sk->sk_state == TCP_CLOSE)
3384 return -1;
3385
3386 if ((skb = tcp_send_head(sk)) != NULL &&
3387 before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
3388 int err;
3389 unsigned int mss = tcp_current_mss(sk);
3390 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
3391
3392 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
3393 tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
3394
3395
3396
3397
3398
3399 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
3400 skb->len > mss) {
3401 seg_size = min(seg_size, mss);
3402 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3403 if (tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
3404 skb, seg_size, mss))
3405 return -1;
3406 } else if (!tcp_skb_pcount(skb))
3407 tcp_set_skb_tso_segs(sk, skb, mss);
3408
3409 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3410 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
3411 if (!err)
3412 tcp_event_new_data_sent(sk, skb);
3413 return err;
3414 } else {
3415 if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
3416 tcp_xmit_probe_skb(sk, 1);
3417 return tcp_xmit_probe_skb(sk, 0);
3418 }
3419}
3420
3421
3422
3423
3424void tcp_send_probe0(struct sock *sk)
3425{
3426 struct inet_connection_sock *icsk = inet_csk(sk);
3427 struct tcp_sock *tp = tcp_sk(sk);
3428 unsigned long probe_max;
3429 int err;
3430
3431 err = tcp_write_wakeup(sk);
3432
3433 if (tp->packets_out || !tcp_send_head(sk)) {
3434
3435 icsk->icsk_probes_out = 0;
3436 icsk->icsk_backoff = 0;
3437 return;
3438 }
3439
3440 if (err <= 0) {
3441 if (icsk->icsk_backoff < sysctl_tcp_retries2)
3442 icsk->icsk_backoff++;
3443 icsk->icsk_probes_out++;
3444 probe_max = TCP_RTO_MAX;
3445 } else {
3446
3447
3448
3449
3450
3451
3452 if (!icsk->icsk_probes_out)
3453 icsk->icsk_probes_out = 1;
3454 probe_max = TCP_RESOURCE_PROBE_INTERVAL;
3455 }
3456 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3457 inet_csk_rto_backoff(icsk, probe_max),
3458 TCP_RTO_MAX);
3459}
3460
3461int tcp_rtx_synack(struct sock *sk, struct request_sock *req)
3462{
3463 const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific;
3464 struct flowi fl;
3465 int res;
3466
3467 res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL);
3468 if (!res) {
3469 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
3470 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
3471 }
3472 return res;
3473}
3474EXPORT_SYMBOL(tcp_rtx_synack);
3475