1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37#define pr_fmt(fmt) "TCP: " fmt
38
39#include <net/tcp.h>
40
41#include <linux/compiler.h>
42#include <linux/gfp.h>
43#include <linux/module.h>
44
45
46int sysctl_tcp_retrans_collapse __read_mostly = 1;
47
48
49
50
51int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
52
53
54int sysctl_tcp_limit_output_bytes __read_mostly = 131072;
55
56
57
58
59
60int sysctl_tcp_tso_win_divisor __read_mostly = 3;
61
62int sysctl_tcp_mtu_probing __read_mostly = 0;
63int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
64
65
66int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
67
68static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
69 int push_one, gfp_t gfp);
70
71
72static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
73{
74 struct inet_connection_sock *icsk = inet_csk(sk);
75 struct tcp_sock *tp = tcp_sk(sk);
76 unsigned int prior_packets = tp->packets_out;
77
78 tcp_advance_send_head(sk, skb);
79 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
80
81 tp->packets_out += tcp_skb_pcount(skb);
82 if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
83 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
84 tcp_rearm_rto(sk);
85 }
86}
87
88
89
90
91
92
93
94static inline __u32 tcp_acceptable_seq(const struct sock *sk)
95{
96 const struct tcp_sock *tp = tcp_sk(sk);
97
98 if (!before(tcp_wnd_end(tp), tp->snd_nxt))
99 return tp->snd_nxt;
100 else
101 return tcp_wnd_end(tp);
102}
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118static __u16 tcp_advertise_mss(struct sock *sk)
119{
120 struct tcp_sock *tp = tcp_sk(sk);
121 const struct dst_entry *dst = __sk_dst_get(sk);
122 int mss = tp->advmss;
123
124 if (dst) {
125 unsigned int metric = dst_metric_advmss(dst);
126
127 if (metric < mss) {
128 mss = metric;
129 tp->advmss = mss;
130 }
131 }
132
133 return (__u16)mss;
134}
135
136
137
138static void tcp_cwnd_restart(struct sock *sk, const struct dst_entry *dst)
139{
140 struct tcp_sock *tp = tcp_sk(sk);
141 s32 delta = tcp_time_stamp - tp->lsndtime;
142 u32 restart_cwnd = tcp_init_cwnd(tp, dst);
143 u32 cwnd = tp->snd_cwnd;
144
145 tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
146
147 tp->snd_ssthresh = tcp_current_ssthresh(sk);
148 restart_cwnd = min(restart_cwnd, cwnd);
149
150 while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
151 cwnd >>= 1;
152 tp->snd_cwnd = max(cwnd, restart_cwnd);
153 tp->snd_cwnd_stamp = tcp_time_stamp;
154 tp->snd_cwnd_used = 0;
155}
156
157
158static void tcp_event_data_sent(struct tcp_sock *tp,
159 struct sock *sk)
160{
161 struct inet_connection_sock *icsk = inet_csk(sk);
162 const u32 now = tcp_time_stamp;
163
164 if (sysctl_tcp_slow_start_after_idle &&
165 (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto))
166 tcp_cwnd_restart(sk, __sk_dst_get(sk));
167
168 tp->lsndtime = now;
169
170
171
172
173 if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
174 icsk->icsk_ack.pingpong = 1;
175}
176
177
178static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
179{
180 tcp_dec_quickack_mode(sk, pkts);
181 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
182}
183
184
185
186
187
188
189
190
191void tcp_select_initial_window(int __space, __u32 mss,
192 __u32 *rcv_wnd, __u32 *window_clamp,
193 int wscale_ok, __u8 *rcv_wscale,
194 __u32 init_rcv_wnd)
195{
196 unsigned int space = (__space < 0 ? 0 : __space);
197
198
199 if (*window_clamp == 0)
200 (*window_clamp) = (65535 << 14);
201 space = min(*window_clamp, space);
202
203
204 if (space > mss)
205 space = (space / mss) * mss;
206
207
208
209
210
211
212
213
214
215 if (sysctl_tcp_workaround_signed_windows)
216 (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
217 else
218 (*rcv_wnd) = space;
219
220 (*rcv_wscale) = 0;
221 if (wscale_ok) {
222
223
224
225 space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
226 space = min_t(u32, space, *window_clamp);
227 while (space > 65535 && (*rcv_wscale) < 14) {
228 space >>= 1;
229 (*rcv_wscale)++;
230 }
231 }
232
233
234
235
236
237 if (mss > (1 << *rcv_wscale)) {
238 int init_cwnd = TCP_DEFAULT_INIT_RCVWND;
239 if (mss > 1460)
240 init_cwnd =
241 max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
242
243
244
245 if (init_rcv_wnd)
246 *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
247 else
248 *rcv_wnd = min(*rcv_wnd, init_cwnd * mss);
249 }
250
251
252 (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
253}
254EXPORT_SYMBOL(tcp_select_initial_window);
255
256
257
258
259
260
261static u16 tcp_select_window(struct sock *sk)
262{
263 struct tcp_sock *tp = tcp_sk(sk);
264 u32 cur_win = tcp_receive_window(tp);
265 u32 new_win = __tcp_select_window(sk);
266
267
268 if (new_win < cur_win) {
269
270
271
272
273
274
275
276 new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
277 }
278 tp->rcv_wnd = new_win;
279 tp->rcv_wup = tp->rcv_nxt;
280
281
282
283
284 if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
285 new_win = min(new_win, MAX_TCP_WINDOW);
286 else
287 new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
288
289
290 new_win >>= tp->rx_opt.rcv_wscale;
291
292
293 if (new_win == 0)
294 tp->pred_flags = 0;
295
296 return new_win;
297}
298
299
300static inline void TCP_ECN_send_synack(const struct tcp_sock *tp, struct sk_buff *skb)
301{
302 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
303 if (!(tp->ecn_flags & TCP_ECN_OK))
304 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
305}
306
307
308static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
309{
310 struct tcp_sock *tp = tcp_sk(sk);
311
312 tp->ecn_flags = 0;
313 if (sock_net(sk)->ipv4.sysctl_tcp_ecn == 1) {
314 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
315 tp->ecn_flags = TCP_ECN_OK;
316 }
317}
318
319static __inline__ void
320TCP_ECN_make_synack(const struct request_sock *req, struct tcphdr *th)
321{
322 if (inet_rsk(req)->ecn_ok)
323 th->ece = 1;
324}
325
326
327
328
329static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
330 int tcp_header_len)
331{
332 struct tcp_sock *tp = tcp_sk(sk);
333
334 if (tp->ecn_flags & TCP_ECN_OK) {
335
336 if (skb->len != tcp_header_len &&
337 !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
338 INET_ECN_xmit(sk);
339 if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) {
340 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
341 tcp_hdr(skb)->cwr = 1;
342 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
343 }
344 } else {
345
346 INET_ECN_dontxmit(sk);
347 }
348 if (tp->ecn_flags & TCP_ECN_DEMAND_CWR)
349 tcp_hdr(skb)->ece = 1;
350 }
351}
352
353
354
355
356static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
357{
358 skb->ip_summed = CHECKSUM_PARTIAL;
359 skb->csum = 0;
360
361 TCP_SKB_CB(skb)->tcp_flags = flags;
362 TCP_SKB_CB(skb)->sacked = 0;
363
364 skb_shinfo(skb)->gso_segs = 1;
365 skb_shinfo(skb)->gso_size = 0;
366 skb_shinfo(skb)->gso_type = 0;
367
368 TCP_SKB_CB(skb)->seq = seq;
369 if (flags & (TCPHDR_SYN | TCPHDR_FIN))
370 seq++;
371 TCP_SKB_CB(skb)->end_seq = seq;
372}
373
374static inline bool tcp_urg_mode(const struct tcp_sock *tp)
375{
376 return tp->snd_una != tp->snd_up;
377}
378
379#define OPTION_SACK_ADVERTISE (1 << 0)
380#define OPTION_TS (1 << 1)
381#define OPTION_MD5 (1 << 2)
382#define OPTION_WSCALE (1 << 3)
383#define OPTION_FAST_OPEN_COOKIE (1 << 8)
384
385struct tcp_out_options {
386 u16 options;
387 u16 mss;
388 u8 ws;
389 u8 num_sack_blocks;
390 u8 hash_size;
391 __u8 *hash_location;
392 __u32 tsval, tsecr;
393 struct tcp_fastopen_cookie *fastopen_cookie;
394};
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
410 struct tcp_out_options *opts)
411{
412 u16 options = opts->options;
413
414 if (unlikely(OPTION_MD5 & options)) {
415 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
416 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
417
418 opts->hash_location = (__u8 *)ptr;
419 ptr += 4;
420 }
421
422 if (unlikely(opts->mss)) {
423 *ptr++ = htonl((TCPOPT_MSS << 24) |
424 (TCPOLEN_MSS << 16) |
425 opts->mss);
426 }
427
428 if (likely(OPTION_TS & options)) {
429 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
430 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
431 (TCPOLEN_SACK_PERM << 16) |
432 (TCPOPT_TIMESTAMP << 8) |
433 TCPOLEN_TIMESTAMP);
434 options &= ~OPTION_SACK_ADVERTISE;
435 } else {
436 *ptr++ = htonl((TCPOPT_NOP << 24) |
437 (TCPOPT_NOP << 16) |
438 (TCPOPT_TIMESTAMP << 8) |
439 TCPOLEN_TIMESTAMP);
440 }
441 *ptr++ = htonl(opts->tsval);
442 *ptr++ = htonl(opts->tsecr);
443 }
444
445 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
446 *ptr++ = htonl((TCPOPT_NOP << 24) |
447 (TCPOPT_NOP << 16) |
448 (TCPOPT_SACK_PERM << 8) |
449 TCPOLEN_SACK_PERM);
450 }
451
452 if (unlikely(OPTION_WSCALE & options)) {
453 *ptr++ = htonl((TCPOPT_NOP << 24) |
454 (TCPOPT_WINDOW << 16) |
455 (TCPOLEN_WINDOW << 8) |
456 opts->ws);
457 }
458
459 if (unlikely(opts->num_sack_blocks)) {
460 struct tcp_sack_block *sp = tp->rx_opt.dsack ?
461 tp->duplicate_sack : tp->selective_acks;
462 int this_sack;
463
464 *ptr++ = htonl((TCPOPT_NOP << 24) |
465 (TCPOPT_NOP << 16) |
466 (TCPOPT_SACK << 8) |
467 (TCPOLEN_SACK_BASE + (opts->num_sack_blocks *
468 TCPOLEN_SACK_PERBLOCK)));
469
470 for (this_sack = 0; this_sack < opts->num_sack_blocks;
471 ++this_sack) {
472 *ptr++ = htonl(sp[this_sack].start_seq);
473 *ptr++ = htonl(sp[this_sack].end_seq);
474 }
475
476 tp->rx_opt.dsack = 0;
477 }
478
479 if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) {
480 struct tcp_fastopen_cookie *foc = opts->fastopen_cookie;
481
482 *ptr++ = htonl((TCPOPT_EXP << 24) |
483 ((TCPOLEN_EXP_FASTOPEN_BASE + foc->len) << 16) |
484 TCPOPT_FASTOPEN_MAGIC);
485
486 memcpy(ptr, foc->val, foc->len);
487 if ((foc->len & 3) == 2) {
488 u8 *align = ((u8 *)ptr) + foc->len;
489 align[0] = align[1] = TCPOPT_NOP;
490 }
491 ptr += (foc->len + 3) >> 2;
492 }
493}
494
495
496
497
498static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
499 struct tcp_out_options *opts,
500 struct tcp_md5sig_key **md5)
501{
502 struct tcp_sock *tp = tcp_sk(sk);
503 unsigned int remaining = MAX_TCP_OPTION_SPACE;
504 struct tcp_fastopen_request *fastopen = tp->fastopen_req;
505
506#ifdef CONFIG_TCP_MD5SIG
507 *md5 = tp->af_specific->md5_lookup(sk, sk);
508 if (*md5) {
509 opts->options |= OPTION_MD5;
510 remaining -= TCPOLEN_MD5SIG_ALIGNED;
511 }
512#else
513 *md5 = NULL;
514#endif
515
516
517
518
519
520
521
522
523
524
525 opts->mss = tcp_advertise_mss(sk);
526 remaining -= TCPOLEN_MSS_ALIGNED;
527
528 if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
529 opts->options |= OPTION_TS;
530 opts->tsval = TCP_SKB_CB(skb)->when + tp->tsoffset;
531 opts->tsecr = tp->rx_opt.ts_recent;
532 remaining -= TCPOLEN_TSTAMP_ALIGNED;
533 }
534 if (likely(sysctl_tcp_window_scaling)) {
535 opts->ws = tp->rx_opt.rcv_wscale;
536 opts->options |= OPTION_WSCALE;
537 remaining -= TCPOLEN_WSCALE_ALIGNED;
538 }
539 if (likely(sysctl_tcp_sack)) {
540 opts->options |= OPTION_SACK_ADVERTISE;
541 if (unlikely(!(OPTION_TS & opts->options)))
542 remaining -= TCPOLEN_SACKPERM_ALIGNED;
543 }
544
545 if (fastopen && fastopen->cookie.len >= 0) {
546 u32 need = TCPOLEN_EXP_FASTOPEN_BASE + fastopen->cookie.len;
547 need = (need + 3) & ~3U;
548 if (remaining >= need) {
549 opts->options |= OPTION_FAST_OPEN_COOKIE;
550 opts->fastopen_cookie = &fastopen->cookie;
551 remaining -= need;
552 tp->syn_fastopen = 1;
553 }
554 }
555
556 return MAX_TCP_OPTION_SPACE - remaining;
557}
558
559
560static unsigned int tcp_synack_options(struct sock *sk,
561 struct request_sock *req,
562 unsigned int mss, struct sk_buff *skb,
563 struct tcp_out_options *opts,
564 struct tcp_md5sig_key **md5,
565 struct tcp_fastopen_cookie *foc)
566{
567 struct inet_request_sock *ireq = inet_rsk(req);
568 unsigned int remaining = MAX_TCP_OPTION_SPACE;
569
570#ifdef CONFIG_TCP_MD5SIG
571 *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
572 if (*md5) {
573 opts->options |= OPTION_MD5;
574 remaining -= TCPOLEN_MD5SIG_ALIGNED;
575
576
577
578
579
580
581 ireq->tstamp_ok &= !ireq->sack_ok;
582 }
583#else
584 *md5 = NULL;
585#endif
586
587
588 opts->mss = mss;
589 remaining -= TCPOLEN_MSS_ALIGNED;
590
591 if (likely(ireq->wscale_ok)) {
592 opts->ws = ireq->rcv_wscale;
593 opts->options |= OPTION_WSCALE;
594 remaining -= TCPOLEN_WSCALE_ALIGNED;
595 }
596 if (likely(ireq->tstamp_ok)) {
597 opts->options |= OPTION_TS;
598 opts->tsval = TCP_SKB_CB(skb)->when;
599 opts->tsecr = req->ts_recent;
600 remaining -= TCPOLEN_TSTAMP_ALIGNED;
601 }
602 if (likely(ireq->sack_ok)) {
603 opts->options |= OPTION_SACK_ADVERTISE;
604 if (unlikely(!ireq->tstamp_ok))
605 remaining -= TCPOLEN_SACKPERM_ALIGNED;
606 }
607 if (foc != NULL) {
608 u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
609 need = (need + 3) & ~3U;
610 if (remaining >= need) {
611 opts->options |= OPTION_FAST_OPEN_COOKIE;
612 opts->fastopen_cookie = foc;
613 remaining -= need;
614 }
615 }
616
617 return MAX_TCP_OPTION_SPACE - remaining;
618}
619
620
621
622
623static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb,
624 struct tcp_out_options *opts,
625 struct tcp_md5sig_key **md5)
626{
627 struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
628 struct tcp_sock *tp = tcp_sk(sk);
629 unsigned int size = 0;
630 unsigned int eff_sacks;
631
632#ifdef CONFIG_TCP_MD5SIG
633 *md5 = tp->af_specific->md5_lookup(sk, sk);
634 if (unlikely(*md5)) {
635 opts->options |= OPTION_MD5;
636 size += TCPOLEN_MD5SIG_ALIGNED;
637 }
638#else
639 *md5 = NULL;
640#endif
641
642 if (likely(tp->rx_opt.tstamp_ok)) {
643 opts->options |= OPTION_TS;
644 opts->tsval = tcb ? tcb->when + tp->tsoffset : 0;
645 opts->tsecr = tp->rx_opt.ts_recent;
646 size += TCPOLEN_TSTAMP_ALIGNED;
647 }
648
649 eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
650 if (unlikely(eff_sacks)) {
651 const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
652 opts->num_sack_blocks =
653 min_t(unsigned int, eff_sacks,
654 (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
655 TCPOLEN_SACK_PERBLOCK);
656 size += TCPOLEN_SACK_BASE_ALIGNED +
657 opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
658 }
659
660 return size;
661}
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678struct tsq_tasklet {
679 struct tasklet_struct tasklet;
680 struct list_head head;
681};
682static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);
683
684static void tcp_tsq_handler(struct sock *sk)
685{
686 if ((1 << sk->sk_state) &
687 (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
688 TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
689 tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC);
690}
691
692
693
694
695
696
697static void tcp_tasklet_func(unsigned long data)
698{
699 struct tsq_tasklet *tsq = (struct tsq_tasklet *)data;
700 LIST_HEAD(list);
701 unsigned long flags;
702 struct list_head *q, *n;
703 struct tcp_sock *tp;
704 struct sock *sk;
705
706 local_irq_save(flags);
707 list_splice_init(&tsq->head, &list);
708 local_irq_restore(flags);
709
710 list_for_each_safe(q, n, &list) {
711 tp = list_entry(q, struct tcp_sock, tsq_node);
712 list_del(&tp->tsq_node);
713
714 sk = (struct sock *)tp;
715 bh_lock_sock(sk);
716
717 if (!sock_owned_by_user(sk)) {
718 tcp_tsq_handler(sk);
719 } else {
720
721 set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags);
722 }
723 bh_unlock_sock(sk);
724
725 clear_bit(TSQ_QUEUED, &tp->tsq_flags);
726 sk_free(sk);
727 }
728}
729
730#define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \
731 (1UL << TCP_WRITE_TIMER_DEFERRED) | \
732 (1UL << TCP_DELACK_TIMER_DEFERRED) | \
733 (1UL << TCP_MTU_REDUCED_DEFERRED))
734
735
736
737
738
739
740
741void tcp_release_cb(struct sock *sk)
742{
743 struct tcp_sock *tp = tcp_sk(sk);
744 unsigned long flags, nflags;
745
746
747 do {
748 flags = tp->tsq_flags;
749 if (!(flags & TCP_DEFERRED_ALL))
750 return;
751 nflags = flags & ~TCP_DEFERRED_ALL;
752 } while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags);
753
754 if (flags & (1UL << TCP_TSQ_DEFERRED))
755 tcp_tsq_handler(sk);
756
757 if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
758 tcp_write_timer_handler(sk);
759 __sock_put(sk);
760 }
761 if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) {
762 tcp_delack_timer_handler(sk);
763 __sock_put(sk);
764 }
765 if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
766 sk->sk_prot->mtu_reduced(sk);
767 __sock_put(sk);
768 }
769}
770EXPORT_SYMBOL(tcp_release_cb);
771
772void __init tcp_tasklet_init(void)
773{
774 int i;
775
776 for_each_possible_cpu(i) {
777 struct tsq_tasklet *tsq = &per_cpu(tsq_tasklet, i);
778
779 INIT_LIST_HEAD(&tsq->head);
780 tasklet_init(&tsq->tasklet,
781 tcp_tasklet_func,
782 (unsigned long)tsq);
783 }
784}
785
786
787
788
789
790
791void tcp_wfree(struct sk_buff *skb)
792{
793 struct sock *sk = skb->sk;
794 struct tcp_sock *tp = tcp_sk(sk);
795
796 if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) &&
797 !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) {
798 unsigned long flags;
799 struct tsq_tasklet *tsq;
800
801
802
803
804 atomic_sub(skb->truesize - 1, &sk->sk_wmem_alloc);
805
806
807 local_irq_save(flags);
808 tsq = &__get_cpu_var(tsq_tasklet);
809 list_add(&tp->tsq_node, &tsq->head);
810 tasklet_schedule(&tsq->tasklet);
811 local_irq_restore(flags);
812 } else {
813 sock_wfree(skb);
814 }
815}
816
817
818
819
820
821
822
823
824
825
826
827
828static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
829 gfp_t gfp_mask)
830{
831 const struct inet_connection_sock *icsk = inet_csk(sk);
832 struct inet_sock *inet;
833 struct tcp_sock *tp;
834 struct tcp_skb_cb *tcb;
835 struct tcp_out_options opts;
836 unsigned int tcp_options_size, tcp_header_size;
837 struct tcp_md5sig_key *md5;
838 struct tcphdr *th;
839 int err;
840
841 BUG_ON(!skb || !tcp_skb_pcount(skb));
842
843
844
845
846 if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
847 __net_timestamp(skb);
848
849 if (likely(clone_it)) {
850 const struct sk_buff *fclone = skb + 1;
851
852 if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
853 fclone->fclone == SKB_FCLONE_CLONE))
854 NET_INC_STATS_BH(sock_net(sk),
855 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
856
857 if (unlikely(skb_cloned(skb)))
858 skb = pskb_copy(skb, gfp_mask);
859 else
860 skb = skb_clone(skb, gfp_mask);
861 if (unlikely(!skb))
862 return -ENOBUFS;
863 }
864
865 inet = inet_sk(sk);
866 tp = tcp_sk(sk);
867 tcb = TCP_SKB_CB(skb);
868 memset(&opts, 0, sizeof(opts));
869
870 if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
871 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
872 else
873 tcp_options_size = tcp_established_options(sk, skb, &opts,
874 &md5);
875 tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
876
877 if (tcp_packets_in_flight(tp) == 0)
878 tcp_ca_event(sk, CA_EVENT_TX_START);
879
880
881
882
883 skb->ooo_okay = sk_wmem_alloc_get(sk) == 0;
884
885 skb_push(skb, tcp_header_size);
886 skb_reset_transport_header(skb);
887
888 skb_orphan(skb);
889 skb->sk = sk;
890 skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ?
891 tcp_wfree : sock_wfree;
892 atomic_add(skb->truesize, &sk->sk_wmem_alloc);
893
894
895 th = tcp_hdr(skb);
896 th->source = inet->inet_sport;
897 th->dest = inet->inet_dport;
898 th->seq = htonl(tcb->seq);
899 th->ack_seq = htonl(tp->rcv_nxt);
900 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
901 tcb->tcp_flags);
902
903 if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
904
905
906
907 th->window = htons(min(tp->rcv_wnd, 65535U));
908 } else {
909 th->window = htons(tcp_select_window(sk));
910 }
911 th->check = 0;
912 th->urg_ptr = 0;
913
914
915 if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
916 if (before(tp->snd_up, tcb->seq + 0x10000)) {
917 th->urg_ptr = htons(tp->snd_up - tcb->seq);
918 th->urg = 1;
919 } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
920 th->urg_ptr = htons(0xFFFF);
921 th->urg = 1;
922 }
923 }
924
925 tcp_options_write((__be32 *)(th + 1), tp, &opts);
926 if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0))
927 TCP_ECN_send(sk, skb, tcp_header_size);
928
929#ifdef CONFIG_TCP_MD5SIG
930
931 if (md5) {
932 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
933 tp->af_specific->calc_md5_hash(opts.hash_location,
934 md5, sk, NULL, skb);
935 }
936#endif
937
938 icsk->icsk_af_ops->send_check(sk, skb);
939
940 if (likely(tcb->tcp_flags & TCPHDR_ACK))
941 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
942
943 if (skb->len != tcp_header_size)
944 tcp_event_data_sent(tp, sk);
945
946 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
947 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
948 tcp_skb_pcount(skb));
949
950 err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl);
951 if (likely(err <= 0))
952 return err;
953
954 tcp_enter_cwr(sk, 1);
955
956 return net_xmit_eval(err);
957}
958
959
960
961
962
963
964static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
965{
966 struct tcp_sock *tp = tcp_sk(sk);
967
968
969 tp->write_seq = TCP_SKB_CB(skb)->end_seq;
970 skb_header_release(skb);
971 tcp_add_write_queue_tail(sk, skb);
972 sk->sk_wmem_queued += skb->truesize;
973 sk_mem_charge(sk, skb->truesize);
974}
975
976
977static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
978 unsigned int mss_now)
979{
980 if (skb->len <= mss_now || !sk_can_gso(sk) ||
981 skb->ip_summed == CHECKSUM_NONE) {
982
983
984
985 skb_shinfo(skb)->gso_segs = 1;
986 skb_shinfo(skb)->gso_size = 0;
987 skb_shinfo(skb)->gso_type = 0;
988 } else {
989 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now);
990 skb_shinfo(skb)->gso_size = mss_now;
991 skb_shinfo(skb)->gso_type = sk->sk_gso_type;
992 }
993}
994
995
996
997
998static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb,
999 int decr)
1000{
1001 struct tcp_sock *tp = tcp_sk(sk);
1002
1003 if (!tp->sacked_out || tcp_is_reno(tp))
1004 return;
1005
1006 if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
1007 tp->fackets_out -= decr;
1008}
1009
1010
1011
1012
1013static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
1014{
1015 struct tcp_sock *tp = tcp_sk(sk);
1016
1017 tp->packets_out -= decr;
1018
1019 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1020 tp->sacked_out -= decr;
1021 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
1022 tp->retrans_out -= decr;
1023 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
1024 tp->lost_out -= decr;
1025
1026
1027 if (tcp_is_reno(tp) && decr > 0)
1028 tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
1029
1030 tcp_adjust_fackets_out(sk, skb, decr);
1031
1032 if (tp->lost_skb_hint &&
1033 before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
1034 (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)))
1035 tp->lost_cnt_hint -= decr;
1036
1037 tcp_verify_left_out(tp);
1038}
1039
1040
1041
1042
1043
1044
1045int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1046 unsigned int mss_now)
1047{
1048 struct tcp_sock *tp = tcp_sk(sk);
1049 struct sk_buff *buff;
1050 int nsize, old_factor;
1051 int nlen;
1052 u8 flags;
1053
1054 if (WARN_ON(len > skb->len))
1055 return -EINVAL;
1056
1057 nsize = skb_headlen(skb) - len;
1058 if (nsize < 0)
1059 nsize = 0;
1060
1061 if (skb_cloned(skb) &&
1062 skb_is_nonlinear(skb) &&
1063 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
1064 return -ENOMEM;
1065
1066
1067 buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
1068 if (buff == NULL)
1069 return -ENOMEM;
1070
1071 sk->sk_wmem_queued += buff->truesize;
1072 sk_mem_charge(sk, buff->truesize);
1073 nlen = skb->len - len - nsize;
1074 buff->truesize += nlen;
1075 skb->truesize -= nlen;
1076
1077
1078 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1079 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1080 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1081
1082
1083 flags = TCP_SKB_CB(skb)->tcp_flags;
1084 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1085 TCP_SKB_CB(buff)->tcp_flags = flags;
1086 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
1087
1088 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
1089
1090 buff->csum = csum_partial_copy_nocheck(skb->data + len,
1091 skb_put(buff, nsize),
1092 nsize, 0);
1093
1094 skb_trim(skb, len);
1095
1096 skb->csum = csum_block_sub(skb->csum, buff->csum, len);
1097 } else {
1098 skb->ip_summed = CHECKSUM_PARTIAL;
1099 skb_split(skb, buff, len);
1100 }
1101
1102 buff->ip_summed = skb->ip_summed;
1103
1104
1105
1106
1107 TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
1108 buff->tstamp = skb->tstamp;
1109
1110 old_factor = tcp_skb_pcount(skb);
1111
1112
1113 tcp_set_skb_tso_segs(sk, skb, mss_now);
1114 tcp_set_skb_tso_segs(sk, buff, mss_now);
1115
1116
1117
1118
1119 if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) {
1120 int diff = old_factor - tcp_skb_pcount(skb) -
1121 tcp_skb_pcount(buff);
1122
1123 if (diff)
1124 tcp_adjust_pcount(sk, skb, diff);
1125 }
1126
1127
1128 skb_header_release(buff);
1129 tcp_insert_write_queue_after(skb, buff, sk);
1130
1131 return 0;
1132}
1133
1134
1135
1136
1137
1138static void __pskb_trim_head(struct sk_buff *skb, int len)
1139{
1140 int i, k, eat;
1141
1142 eat = min_t(int, len, skb_headlen(skb));
1143 if (eat) {
1144 __skb_pull(skb, eat);
1145 len -= eat;
1146 if (!len)
1147 return;
1148 }
1149 eat = len;
1150 k = 0;
1151 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1152 int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
1153
1154 if (size <= eat) {
1155 skb_frag_unref(skb, i);
1156 eat -= size;
1157 } else {
1158 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
1159 if (eat) {
1160 skb_shinfo(skb)->frags[k].page_offset += eat;
1161 skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
1162 eat = 0;
1163 }
1164 k++;
1165 }
1166 }
1167 skb_shinfo(skb)->nr_frags = k;
1168
1169 skb_reset_tail_pointer(skb);
1170 skb->data_len -= len;
1171 skb->len = skb->data_len;
1172}
1173
1174
1175int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
1176{
1177 if (skb_unclone(skb, GFP_ATOMIC))
1178 return -ENOMEM;
1179
1180 __pskb_trim_head(skb, len);
1181
1182 TCP_SKB_CB(skb)->seq += len;
1183 skb->ip_summed = CHECKSUM_PARTIAL;
1184
1185 skb->truesize -= len;
1186 sk->sk_wmem_queued -= len;
1187 sk_mem_uncharge(sk, len);
1188 sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
1189
1190
1191 if (tcp_skb_pcount(skb) > 1)
1192 tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb));
1193
1194 return 0;
1195}
1196
1197
1198static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
1199{
1200 const struct tcp_sock *tp = tcp_sk(sk);
1201 const struct inet_connection_sock *icsk = inet_csk(sk);
1202 int mss_now;
1203
1204
1205
1206
1207 mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr);
1208
1209
1210 if (icsk->icsk_af_ops->net_frag_header_len) {
1211 const struct dst_entry *dst = __sk_dst_get(sk);
1212
1213 if (dst && dst_allfrag(dst))
1214 mss_now -= icsk->icsk_af_ops->net_frag_header_len;
1215 }
1216
1217
1218 if (mss_now > tp->rx_opt.mss_clamp)
1219 mss_now = tp->rx_opt.mss_clamp;
1220
1221
1222 mss_now -= icsk->icsk_ext_hdr_len;
1223
1224
1225 if (mss_now < 48)
1226 mss_now = 48;
1227 return mss_now;
1228}
1229
1230
1231int tcp_mtu_to_mss(struct sock *sk, int pmtu)
1232{
1233
1234 return __tcp_mtu_to_mss(sk, pmtu) -
1235 (tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr));
1236}
1237
1238
1239int tcp_mss_to_mtu(struct sock *sk, int mss)
1240{
1241 const struct tcp_sock *tp = tcp_sk(sk);
1242 const struct inet_connection_sock *icsk = inet_csk(sk);
1243 int mtu;
1244
1245 mtu = mss +
1246 tp->tcp_header_len +
1247 icsk->icsk_ext_hdr_len +
1248 icsk->icsk_af_ops->net_header_len;
1249
1250
1251 if (icsk->icsk_af_ops->net_frag_header_len) {
1252 const struct dst_entry *dst = __sk_dst_get(sk);
1253
1254 if (dst && dst_allfrag(dst))
1255 mtu += icsk->icsk_af_ops->net_frag_header_len;
1256 }
1257 return mtu;
1258}
1259
1260
1261void tcp_mtup_init(struct sock *sk)
1262{
1263 struct tcp_sock *tp = tcp_sk(sk);
1264 struct inet_connection_sock *icsk = inet_csk(sk);
1265
1266 icsk->icsk_mtup.enabled = sysctl_tcp_mtu_probing > 1;
1267 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
1268 icsk->icsk_af_ops->net_header_len;
1269 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss);
1270 icsk->icsk_mtup.probe_size = 0;
1271}
1272EXPORT_SYMBOL(tcp_mtup_init);
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
1297{
1298 struct tcp_sock *tp = tcp_sk(sk);
1299 struct inet_connection_sock *icsk = inet_csk(sk);
1300 int mss_now;
1301
1302 if (icsk->icsk_mtup.search_high > pmtu)
1303 icsk->icsk_mtup.search_high = pmtu;
1304
1305 mss_now = tcp_mtu_to_mss(sk, pmtu);
1306 mss_now = tcp_bound_to_half_wnd(tp, mss_now);
1307
1308
1309 icsk->icsk_pmtu_cookie = pmtu;
1310 if (icsk->icsk_mtup.enabled)
1311 mss_now = min(mss_now, tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low));
1312 tp->mss_cache = mss_now;
1313
1314 return mss_now;
1315}
1316EXPORT_SYMBOL(tcp_sync_mss);
1317
1318
1319
1320
1321unsigned int tcp_current_mss(struct sock *sk)
1322{
1323 const struct tcp_sock *tp = tcp_sk(sk);
1324 const struct dst_entry *dst = __sk_dst_get(sk);
1325 u32 mss_now;
1326 unsigned int header_len;
1327 struct tcp_out_options opts;
1328 struct tcp_md5sig_key *md5;
1329
1330 mss_now = tp->mss_cache;
1331
1332 if (dst) {
1333 u32 mtu = dst_mtu(dst);
1334 if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
1335 mss_now = tcp_sync_mss(sk, mtu);
1336 }
1337
1338 header_len = tcp_established_options(sk, NULL, &opts, &md5) +
1339 sizeof(struct tcphdr);
1340
1341
1342
1343
1344 if (header_len != tp->tcp_header_len) {
1345 int delta = (int) header_len - tp->tcp_header_len;
1346 mss_now -= delta;
1347 }
1348
1349 return mss_now;
1350}
1351
1352
1353static void tcp_cwnd_validate(struct sock *sk)
1354{
1355 struct tcp_sock *tp = tcp_sk(sk);
1356
1357 if (tp->packets_out >= tp->snd_cwnd) {
1358
1359 tp->snd_cwnd_used = 0;
1360 tp->snd_cwnd_stamp = tcp_time_stamp;
1361 } else {
1362
1363 if (tp->packets_out > tp->snd_cwnd_used)
1364 tp->snd_cwnd_used = tp->packets_out;
1365
1366 if (sysctl_tcp_slow_start_after_idle &&
1367 (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
1368 tcp_cwnd_application_limited(sk);
1369 }
1370}
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb,
1385 unsigned int mss_now, unsigned int max_segs)
1386{
1387 const struct tcp_sock *tp = tcp_sk(sk);
1388 u32 needed, window, max_len;
1389
1390 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1391 max_len = mss_now * max_segs;
1392
1393 if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
1394 return max_len;
1395
1396 needed = min(skb->len, window);
1397
1398 if (max_len <= needed)
1399 return max_len;
1400
1401 return needed - needed % mss_now;
1402}
1403
1404
1405
1406
1407static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
1408 const struct sk_buff *skb)
1409{
1410 u32 in_flight, cwnd;
1411
1412
1413 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
1414 tcp_skb_pcount(skb) == 1)
1415 return 1;
1416
1417 in_flight = tcp_packets_in_flight(tp);
1418 cwnd = tp->snd_cwnd;
1419 if (in_flight < cwnd)
1420 return (cwnd - in_flight);
1421
1422 return 0;
1423}
1424
1425
1426
1427
1428
1429static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb,
1430 unsigned int mss_now)
1431{
1432 int tso_segs = tcp_skb_pcount(skb);
1433
1434 if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
1435 tcp_set_skb_tso_segs(sk, skb, mss_now);
1436 tso_segs = tcp_skb_pcount(skb);
1437 }
1438 return tso_segs;
1439}
1440
1441
1442static inline bool tcp_minshall_check(const struct tcp_sock *tp)
1443{
1444 return after(tp->snd_sml, tp->snd_una) &&
1445 !after(tp->snd_sml, tp->snd_nxt);
1446}
1447
1448
1449
1450
1451
1452
1453
1454
1455static inline bool tcp_nagle_check(const struct tcp_sock *tp,
1456 const struct sk_buff *skb,
1457 unsigned int mss_now, int nonagle)
1458{
1459 return skb->len < mss_now &&
1460 ((nonagle & TCP_NAGLE_CORK) ||
1461 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1462}
1463
1464
1465
1466
1467static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
1468 unsigned int cur_mss, int nonagle)
1469{
1470
1471
1472
1473
1474
1475
1476 if (nonagle & TCP_NAGLE_PUSH)
1477 return true;
1478
1479
1480 if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
1481 return true;
1482
1483 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
1484 return true;
1485
1486 return false;
1487}
1488
1489
1490static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
1491 const struct sk_buff *skb,
1492 unsigned int cur_mss)
1493{
1494 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
1495
1496 if (skb->len > cur_mss)
1497 end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
1498
1499 return !after(end_seq, tcp_wnd_end(tp));
1500}
1501
1502
1503
1504
1505
1506static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
1507 unsigned int cur_mss, int nonagle)
1508{
1509 const struct tcp_sock *tp = tcp_sk(sk);
1510 unsigned int cwnd_quota;
1511
1512 tcp_init_tso_segs(sk, skb, cur_mss);
1513
1514 if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
1515 return 0;
1516
1517 cwnd_quota = tcp_cwnd_test(tp, skb);
1518 if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
1519 cwnd_quota = 0;
1520
1521 return cwnd_quota;
1522}
1523
1524
1525bool tcp_may_send_now(struct sock *sk)
1526{
1527 const struct tcp_sock *tp = tcp_sk(sk);
1528 struct sk_buff *skb = tcp_send_head(sk);
1529
1530 return skb &&
1531 tcp_snd_test(sk, skb, tcp_current_mss(sk),
1532 (tcp_skb_is_last(sk, skb) ?
1533 tp->nonagle : TCP_NAGLE_PUSH));
1534}
1535
1536
1537
1538
1539
1540
1541
1542
1543static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1544 unsigned int mss_now, gfp_t gfp)
1545{
1546 struct sk_buff *buff;
1547 int nlen = skb->len - len;
1548 u8 flags;
1549
1550
1551 if (skb->len != skb->data_len)
1552 return tcp_fragment(sk, skb, len, mss_now);
1553
1554 buff = sk_stream_alloc_skb(sk, 0, gfp);
1555 if (unlikely(buff == NULL))
1556 return -ENOMEM;
1557
1558 sk->sk_wmem_queued += buff->truesize;
1559 sk_mem_charge(sk, buff->truesize);
1560 buff->truesize += nlen;
1561 skb->truesize -= nlen;
1562
1563
1564 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1565 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1566 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1567
1568
1569 flags = TCP_SKB_CB(skb)->tcp_flags;
1570 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1571 TCP_SKB_CB(buff)->tcp_flags = flags;
1572
1573
1574 TCP_SKB_CB(buff)->sacked = 0;
1575
1576 buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
1577 skb_split(skb, buff, len);
1578
1579
1580 tcp_set_skb_tso_segs(sk, skb, mss_now);
1581 tcp_set_skb_tso_segs(sk, buff, mss_now);
1582
1583
1584 skb_header_release(buff);
1585 tcp_insert_write_queue_after(skb, buff, sk);
1586
1587 return 0;
1588}
1589
1590
1591
1592
1593
1594
1595static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1596{
1597 struct tcp_sock *tp = tcp_sk(sk);
1598 const struct inet_connection_sock *icsk = inet_csk(sk);
1599 u32 send_win, cong_win, limit, in_flight;
1600 int win_divisor;
1601
1602 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1603 goto send_now;
1604
1605 if (icsk->icsk_ca_state != TCP_CA_Open)
1606 goto send_now;
1607
1608
1609 if (tp->tso_deferred &&
1610 (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1)
1611 goto send_now;
1612
1613 in_flight = tcp_packets_in_flight(tp);
1614
1615 BUG_ON(tcp_skb_pcount(skb) <= 1 || (tp->snd_cwnd <= in_flight));
1616
1617 send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1618
1619
1620 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
1621
1622 limit = min(send_win, cong_win);
1623
1624
1625 if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
1626 sk->sk_gso_max_segs * tp->mss_cache))
1627 goto send_now;
1628
1629
1630 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
1631 goto send_now;
1632
1633 win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor);
1634 if (win_divisor) {
1635 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
1636
1637
1638
1639
1640 chunk /= win_divisor;
1641 if (limit >= chunk)
1642 goto send_now;
1643 } else {
1644
1645
1646
1647
1648
1649 if (limit > tcp_max_tso_deferred_mss(tp) * tp->mss_cache)
1650 goto send_now;
1651 }
1652
1653
1654
1655
1656 if (!tp->tso_deferred)
1657 tp->tso_deferred = 1 | (jiffies << 1);
1658
1659 return true;
1660
1661send_now:
1662 tp->tso_deferred = 0;
1663 return false;
1664}
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675static int tcp_mtu_probe(struct sock *sk)
1676{
1677 struct tcp_sock *tp = tcp_sk(sk);
1678 struct inet_connection_sock *icsk = inet_csk(sk);
1679 struct sk_buff *skb, *nskb, *next;
1680 int len;
1681 int probe_size;
1682 int size_needed;
1683 int copy;
1684 int mss_now;
1685
1686
1687
1688
1689
1690 if (!icsk->icsk_mtup.enabled ||
1691 icsk->icsk_mtup.probe_size ||
1692 inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
1693 tp->snd_cwnd < 11 ||
1694 tp->rx_opt.num_sacks || tp->rx_opt.dsack)
1695 return -1;
1696
1697
1698 mss_now = tcp_current_mss(sk);
1699 probe_size = 2 * tp->mss_cache;
1700 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
1701 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
1702
1703 return -1;
1704 }
1705
1706
1707 if (tp->write_seq - tp->snd_nxt < size_needed)
1708 return -1;
1709
1710 if (tp->snd_wnd < size_needed)
1711 return -1;
1712 if (after(tp->snd_nxt + size_needed, tcp_wnd_end(tp)))
1713 return 0;
1714
1715
1716 if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
1717 if (!tcp_packets_in_flight(tp))
1718 return -1;
1719 else
1720 return 0;
1721 }
1722
1723
1724 if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL)
1725 return -1;
1726 sk->sk_wmem_queued += nskb->truesize;
1727 sk_mem_charge(sk, nskb->truesize);
1728
1729 skb = tcp_send_head(sk);
1730
1731 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
1732 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
1733 TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
1734 TCP_SKB_CB(nskb)->sacked = 0;
1735 nskb->csum = 0;
1736 nskb->ip_summed = skb->ip_summed;
1737
1738 tcp_insert_write_queue_before(nskb, skb, sk);
1739
1740 len = 0;
1741 tcp_for_write_queue_from_safe(skb, next, sk) {
1742 copy = min_t(int, skb->len, probe_size - len);
1743 if (nskb->ip_summed)
1744 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
1745 else
1746 nskb->csum = skb_copy_and_csum_bits(skb, 0,
1747 skb_put(nskb, copy),
1748 copy, nskb->csum);
1749
1750 if (skb->len <= copy) {
1751
1752
1753 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1754 tcp_unlink_write_queue(skb, sk);
1755 sk_wmem_free_skb(sk, skb);
1756 } else {
1757 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
1758 ~(TCPHDR_FIN|TCPHDR_PSH);
1759 if (!skb_shinfo(skb)->nr_frags) {
1760 skb_pull(skb, copy);
1761 if (skb->ip_summed != CHECKSUM_PARTIAL)
1762 skb->csum = csum_partial(skb->data,
1763 skb->len, 0);
1764 } else {
1765 __pskb_trim_head(skb, copy);
1766 tcp_set_skb_tso_segs(sk, skb, mss_now);
1767 }
1768 TCP_SKB_CB(skb)->seq += copy;
1769 }
1770
1771 len += copy;
1772
1773 if (len >= probe_size)
1774 break;
1775 }
1776 tcp_init_tso_segs(sk, nskb, nskb->len);
1777
1778
1779
1780 TCP_SKB_CB(nskb)->when = tcp_time_stamp;
1781 if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
1782
1783
1784 tp->snd_cwnd--;
1785 tcp_event_new_data_sent(sk, nskb);
1786
1787 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
1788 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
1789 tp->mtu_probe.probe_seq_end = TCP_SKB_CB(nskb)->end_seq;
1790
1791 return 1;
1792 }
1793
1794 return -1;
1795}
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1812 int push_one, gfp_t gfp)
1813{
1814 struct tcp_sock *tp = tcp_sk(sk);
1815 struct sk_buff *skb;
1816 unsigned int tso_segs, sent_pkts;
1817 int cwnd_quota;
1818 int result;
1819
1820 sent_pkts = 0;
1821
1822 if (!push_one) {
1823
1824 result = tcp_mtu_probe(sk);
1825 if (!result) {
1826 return false;
1827 } else if (result > 0) {
1828 sent_pkts = 1;
1829 }
1830 }
1831
1832 while ((skb = tcp_send_head(sk))) {
1833 unsigned int limit;
1834
1835
1836 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
1837 BUG_ON(!tso_segs);
1838
1839 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE)
1840 goto repair;
1841
1842 cwnd_quota = tcp_cwnd_test(tp, skb);
1843 if (!cwnd_quota) {
1844 if (push_one == 2)
1845
1846 cwnd_quota = 1;
1847 else
1848 break;
1849 }
1850
1851 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
1852 break;
1853
1854 if (tso_segs == 1) {
1855 if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
1856 (tcp_skb_is_last(sk, skb) ?
1857 nonagle : TCP_NAGLE_PUSH))))
1858 break;
1859 } else {
1860 if (!push_one && tcp_tso_should_defer(sk, skb))
1861 break;
1862 }
1863
1864
1865
1866
1867 if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) {
1868 set_bit(TSQ_THROTTLED, &tp->tsq_flags);
1869 break;
1870 }
1871 limit = mss_now;
1872 if (tso_segs > 1 && !tcp_urg_mode(tp))
1873 limit = tcp_mss_split_point(sk, skb, mss_now,
1874 min_t(unsigned int,
1875 cwnd_quota,
1876 sk->sk_gso_max_segs));
1877
1878 if (skb->len > limit &&
1879 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
1880 break;
1881
1882 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1883
1884 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
1885 break;
1886
1887repair:
1888
1889
1890
1891 tcp_event_new_data_sent(sk, skb);
1892
1893 tcp_minshall_update(tp, mss_now, skb);
1894 sent_pkts += tcp_skb_pcount(skb);
1895
1896 if (push_one)
1897 break;
1898 }
1899
1900 if (likely(sent_pkts)) {
1901 if (tcp_in_cwnd_reduction(sk))
1902 tp->prr_out += sent_pkts;
1903
1904
1905 if (push_one != 2)
1906 tcp_schedule_loss_probe(sk);
1907 tcp_cwnd_validate(sk);
1908 return false;
1909 }
1910 return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));
1911}
1912
1913bool tcp_schedule_loss_probe(struct sock *sk)
1914{
1915 struct inet_connection_sock *icsk = inet_csk(sk);
1916 struct tcp_sock *tp = tcp_sk(sk);
1917 u32 timeout, tlp_time_stamp, rto_time_stamp;
1918 u32 rtt = tp->srtt >> 3;
1919
1920 if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS))
1921 return false;
1922
1923 if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
1924 tcp_rearm_rto(sk);
1925 return false;
1926 }
1927
1928
1929
1930 if (sk->sk_state == TCP_SYN_RECV)
1931 return false;
1932
1933
1934 if (icsk->icsk_pending != ICSK_TIME_RETRANS)
1935 return false;
1936
1937
1938
1939
1940 if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out ||
1941 !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
1942 return false;
1943
1944 if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
1945 tcp_send_head(sk))
1946 return false;
1947
1948
1949
1950
1951 timeout = rtt << 1;
1952 if (tp->packets_out == 1)
1953 timeout = max_t(u32, timeout,
1954 (rtt + (rtt >> 1) + TCP_DELACK_MAX));
1955 timeout = max_t(u32, timeout, msecs_to_jiffies(10));
1956
1957
1958 tlp_time_stamp = tcp_time_stamp + timeout;
1959 rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout;
1960 if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) {
1961 s32 delta = rto_time_stamp - tcp_time_stamp;
1962 if (delta > 0)
1963 timeout = delta;
1964 }
1965
1966 inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
1967 TCP_RTO_MAX);
1968 return true;
1969}
1970
1971
1972
1973
1974void tcp_send_loss_probe(struct sock *sk)
1975{
1976 struct tcp_sock *tp = tcp_sk(sk);
1977 struct sk_buff *skb;
1978 int pcount;
1979 int mss = tcp_current_mss(sk);
1980 int err = -1;
1981
1982 if (tcp_send_head(sk) != NULL) {
1983 err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
1984 goto rearm_timer;
1985 }
1986
1987
1988 if (tp->tlp_high_seq)
1989 goto rearm_timer;
1990
1991
1992 skb = tcp_write_queue_tail(sk);
1993 if (WARN_ON(!skb))
1994 goto rearm_timer;
1995
1996 pcount = tcp_skb_pcount(skb);
1997 if (WARN_ON(!pcount))
1998 goto rearm_timer;
1999
2000 if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
2001 if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss)))
2002 goto rearm_timer;
2003 skb = tcp_write_queue_tail(sk);
2004 }
2005
2006 if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
2007 goto rearm_timer;
2008
2009
2010 if (skb->len > 0)
2011 err = __tcp_retransmit_skb(sk, skb);
2012
2013
2014 if (likely(!err))
2015 tp->tlp_high_seq = tp->snd_nxt;
2016
2017rearm_timer:
2018 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2019 inet_csk(sk)->icsk_rto,
2020 TCP_RTO_MAX);
2021
2022 if (likely(!err))
2023 NET_INC_STATS_BH(sock_net(sk),
2024 LINUX_MIB_TCPLOSSPROBES);
2025 return;
2026}
2027
2028
2029
2030
2031
2032void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
2033 int nonagle)
2034{
2035
2036
2037
2038
2039 if (unlikely(sk->sk_state == TCP_CLOSE))
2040 return;
2041
2042 if (tcp_write_xmit(sk, cur_mss, nonagle, 0,
2043 sk_gfp_atomic(sk, GFP_ATOMIC)))
2044 tcp_check_probe_timer(sk);
2045}
2046
2047
2048
2049
2050void tcp_push_one(struct sock *sk, unsigned int mss_now)
2051{
2052 struct sk_buff *skb = tcp_send_head(sk);
2053
2054 BUG_ON(!skb || skb->len < mss_now);
2055
2056 tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation);
2057}
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111u32 __tcp_select_window(struct sock *sk)
2112{
2113 struct inet_connection_sock *icsk = inet_csk(sk);
2114 struct tcp_sock *tp = tcp_sk(sk);
2115
2116
2117
2118
2119
2120
2121 int mss = icsk->icsk_ack.rcv_mss;
2122 int free_space = tcp_space(sk);
2123 int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk));
2124 int window;
2125
2126 if (mss > full_space)
2127 mss = full_space;
2128
2129 if (free_space < (full_space >> 1)) {
2130 icsk->icsk_ack.quick = 0;
2131
2132 if (sk_under_memory_pressure(sk))
2133 tp->rcv_ssthresh = min(tp->rcv_ssthresh,
2134 4U * tp->advmss);
2135
2136 if (free_space < mss)
2137 return 0;
2138 }
2139
2140 if (free_space > tp->rcv_ssthresh)
2141 free_space = tp->rcv_ssthresh;
2142
2143
2144
2145
2146 window = tp->rcv_wnd;
2147 if (tp->rx_opt.rcv_wscale) {
2148 window = free_space;
2149
2150
2151
2152
2153
2154 if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window)
2155 window = (((window >> tp->rx_opt.rcv_wscale) + 1)
2156 << tp->rx_opt.rcv_wscale);
2157 } else {
2158
2159
2160
2161
2162
2163
2164
2165
2166 if (window <= free_space - mss || window > free_space)
2167 window = (free_space / mss) * mss;
2168 else if (mss == full_space &&
2169 free_space > window + (full_space >> 1))
2170 window = free_space;
2171 }
2172
2173 return window;
2174}
2175
2176
2177static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2178{
2179 struct tcp_sock *tp = tcp_sk(sk);
2180 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
2181 int skb_size, next_skb_size;
2182
2183 skb_size = skb->len;
2184 next_skb_size = next_skb->len;
2185
2186 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
2187
2188 tcp_highest_sack_combine(sk, next_skb, skb);
2189
2190 tcp_unlink_write_queue(next_skb, sk);
2191
2192 skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size),
2193 next_skb_size);
2194
2195 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
2196 skb->ip_summed = CHECKSUM_PARTIAL;
2197
2198 if (skb->ip_summed != CHECKSUM_PARTIAL)
2199 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
2200
2201
2202 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
2203
2204
2205 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags;
2206
2207
2208
2209
2210 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
2211
2212
2213 tcp_clear_retrans_hints_partial(tp);
2214 if (next_skb == tp->retransmit_skb_hint)
2215 tp->retransmit_skb_hint = skb;
2216
2217 tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb));
2218
2219 sk_wmem_free_skb(sk, next_skb);
2220}
2221
2222
2223static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
2224{
2225 if (tcp_skb_pcount(skb) > 1)
2226 return false;
2227
2228 if (skb_shinfo(skb)->nr_frags != 0)
2229 return false;
2230 if (skb_cloned(skb))
2231 return false;
2232 if (skb == tcp_send_head(sk))
2233 return false;
2234
2235 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2236 return false;
2237
2238 return true;
2239}
2240
2241
2242
2243
2244static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2245 int space)
2246{
2247 struct tcp_sock *tp = tcp_sk(sk);
2248 struct sk_buff *skb = to, *tmp;
2249 bool first = true;
2250
2251 if (!sysctl_tcp_retrans_collapse)
2252 return;
2253 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2254 return;
2255
2256 tcp_for_write_queue_from_safe(skb, tmp, sk) {
2257 if (!tcp_can_collapse(sk, skb))
2258 break;
2259
2260 space -= skb->len;
2261
2262 if (first) {
2263 first = false;
2264 continue;
2265 }
2266
2267 if (space < 0)
2268 break;
2269
2270
2271
2272 if (skb->len > skb_availroom(to))
2273 break;
2274
2275 if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
2276 break;
2277
2278 tcp_collapse_retrans(sk, to);
2279 }
2280}
2281
2282
2283
2284
2285
2286int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2287{
2288 struct tcp_sock *tp = tcp_sk(sk);
2289 struct inet_connection_sock *icsk = inet_csk(sk);
2290 unsigned int cur_mss;
2291
2292
2293 if (icsk->icsk_mtup.probe_size) {
2294 icsk->icsk_mtup.probe_size = 0;
2295 }
2296
2297
2298
2299
2300 if (atomic_read(&sk->sk_wmem_alloc) >
2301 min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
2302 return -EAGAIN;
2303
2304 if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
2305 if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
2306 BUG();
2307 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
2308 return -ENOMEM;
2309 }
2310
2311 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
2312 return -EHOSTUNREACH;
2313
2314 cur_mss = tcp_current_mss(sk);
2315
2316
2317
2318
2319
2320
2321 if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) &&
2322 TCP_SKB_CB(skb)->seq != tp->snd_una)
2323 return -EAGAIN;
2324
2325 if (skb->len > cur_mss) {
2326 if (tcp_fragment(sk, skb, cur_mss, cur_mss))
2327 return -ENOMEM;
2328 } else {
2329 int oldpcount = tcp_skb_pcount(skb);
2330
2331 if (unlikely(oldpcount > 1)) {
2332 tcp_init_tso_segs(sk, skb, cur_mss);
2333 tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
2334 }
2335 }
2336
2337 tcp_retrans_try_collapse(sk, skb, cur_mss);
2338
2339
2340
2341
2342
2343 if (skb->len > 0 &&
2344 (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
2345 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
2346 if (!pskb_trim(skb, 0)) {
2347
2348 tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1,
2349 TCP_SKB_CB(skb)->tcp_flags);
2350 skb->ip_summed = CHECKSUM_NONE;
2351 }
2352 }
2353
2354
2355
2356
2357 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2358
2359
2360
2361
2362
2363 if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) ||
2364 skb_headroom(skb) >= 0xFFFF)) {
2365 struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
2366 GFP_ATOMIC);
2367 return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
2368 -ENOBUFS;
2369 } else {
2370 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2371 }
2372}
2373
2374int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2375{
2376 struct tcp_sock *tp = tcp_sk(sk);
2377 int err = __tcp_retransmit_skb(sk, skb);
2378
2379 if (err == 0) {
2380
2381 TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
2382
2383 tp->total_retrans++;
2384
2385#if FASTRETRANS_DEBUG > 0
2386 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2387 net_dbg_ratelimited("retrans_out leaked\n");
2388 }
2389#endif
2390 if (!tp->retrans_out)
2391 tp->lost_retrans_low = tp->snd_nxt;
2392 TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
2393 tp->retrans_out += tcp_skb_pcount(skb);
2394
2395
2396 if (!tp->retrans_stamp)
2397 tp->retrans_stamp = TCP_SKB_CB(skb)->when;
2398
2399 tp->undo_retrans += tcp_skb_pcount(skb);
2400
2401
2402
2403
2404 TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
2405 }
2406 return err;
2407}
2408
2409
2410
2411
2412static bool tcp_can_forward_retransmit(struct sock *sk)
2413{
2414 const struct inet_connection_sock *icsk = inet_csk(sk);
2415 const struct tcp_sock *tp = tcp_sk(sk);
2416
2417
2418 if (icsk->icsk_ca_state != TCP_CA_Recovery)
2419 return false;
2420
2421
2422 if (tcp_is_reno(tp))
2423 return false;
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433 if (tcp_may_send_now(sk))
2434 return false;
2435
2436 return true;
2437}
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447void tcp_xmit_retransmit_queue(struct sock *sk)
2448{
2449 const struct inet_connection_sock *icsk = inet_csk(sk);
2450 struct tcp_sock *tp = tcp_sk(sk);
2451 struct sk_buff *skb;
2452 struct sk_buff *hole = NULL;
2453 u32 last_lost;
2454 int mib_idx;
2455 int fwd_rexmitting = 0;
2456
2457 if (!tp->packets_out)
2458 return;
2459
2460 if (!tp->lost_out)
2461 tp->retransmit_high = tp->snd_una;
2462
2463 if (tp->retransmit_skb_hint) {
2464 skb = tp->retransmit_skb_hint;
2465 last_lost = TCP_SKB_CB(skb)->end_seq;
2466 if (after(last_lost, tp->retransmit_high))
2467 last_lost = tp->retransmit_high;
2468 } else {
2469 skb = tcp_write_queue_head(sk);
2470 last_lost = tp->snd_una;
2471 }
2472
2473 tcp_for_write_queue_from(skb, sk) {
2474 __u8 sacked = TCP_SKB_CB(skb)->sacked;
2475
2476 if (skb == tcp_send_head(sk))
2477 break;
2478
2479 if (hole == NULL)
2480 tp->retransmit_skb_hint = skb;
2481
2482
2483
2484
2485
2486
2487
2488
2489 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
2490 return;
2491
2492 if (fwd_rexmitting) {
2493begin_fwd:
2494 if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
2495 break;
2496 mib_idx = LINUX_MIB_TCPFORWARDRETRANS;
2497
2498 } else if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high)) {
2499 tp->retransmit_high = last_lost;
2500 if (!tcp_can_forward_retransmit(sk))
2501 break;
2502
2503 if (hole != NULL) {
2504 skb = hole;
2505 hole = NULL;
2506 }
2507 fwd_rexmitting = 1;
2508 goto begin_fwd;
2509
2510 } else if (!(sacked & TCPCB_LOST)) {
2511 if (hole == NULL && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
2512 hole = skb;
2513 continue;
2514
2515 } else {
2516 last_lost = TCP_SKB_CB(skb)->end_seq;
2517 if (icsk->icsk_ca_state != TCP_CA_Loss)
2518 mib_idx = LINUX_MIB_TCPFASTRETRANS;
2519 else
2520 mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
2521 }
2522
2523 if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
2524 continue;
2525
2526 if (tcp_retransmit_skb(sk, skb)) {
2527 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
2528 return;
2529 }
2530 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2531
2532 if (tcp_in_cwnd_reduction(sk))
2533 tp->prr_out += tcp_skb_pcount(skb);
2534
2535 if (skb == tcp_write_queue_head(sk))
2536 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2537 inet_csk(sk)->icsk_rto,
2538 TCP_RTO_MAX);
2539 }
2540}
2541
2542
2543
2544
2545void tcp_send_fin(struct sock *sk)
2546{
2547 struct tcp_sock *tp = tcp_sk(sk);
2548 struct sk_buff *skb = tcp_write_queue_tail(sk);
2549 int mss_now;
2550
2551
2552
2553
2554
2555 mss_now = tcp_current_mss(sk);
2556
2557 if (tcp_send_head(sk) != NULL) {
2558 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN;
2559 TCP_SKB_CB(skb)->end_seq++;
2560 tp->write_seq++;
2561 } else {
2562
2563 for (;;) {
2564 skb = alloc_skb_fclone(MAX_TCP_HEADER,
2565 sk->sk_allocation);
2566 if (skb)
2567 break;
2568 yield();
2569 }
2570
2571
2572 skb_reserve(skb, MAX_TCP_HEADER);
2573
2574 tcp_init_nondata_skb(skb, tp->write_seq,
2575 TCPHDR_ACK | TCPHDR_FIN);
2576 tcp_queue_skb(sk, skb);
2577 }
2578 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
2579}
2580
2581
2582
2583
2584
2585
2586void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2587{
2588 struct sk_buff *skb;
2589
2590
2591 skb = alloc_skb(MAX_TCP_HEADER, priority);
2592 if (!skb) {
2593 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2594 return;
2595 }
2596
2597
2598 skb_reserve(skb, MAX_TCP_HEADER);
2599 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
2600 TCPHDR_ACK | TCPHDR_RST);
2601
2602 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2603 if (tcp_transmit_skb(sk, skb, 0, priority))
2604 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2605
2606 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
2607}
2608
2609
2610
2611
2612
2613
2614
2615int tcp_send_synack(struct sock *sk)
2616{
2617 struct sk_buff *skb;
2618
2619 skb = tcp_write_queue_head(sk);
2620 if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
2621 pr_debug("%s: wrong queue state\n", __func__);
2622 return -EFAULT;
2623 }
2624 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
2625 if (skb_cloned(skb)) {
2626 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
2627 if (nskb == NULL)
2628 return -ENOMEM;
2629 tcp_unlink_write_queue(skb, sk);
2630 skb_header_release(nskb);
2631 __tcp_add_write_queue_head(sk, nskb);
2632 sk_wmem_free_skb(sk, skb);
2633 sk->sk_wmem_queued += nskb->truesize;
2634 sk_mem_charge(sk, nskb->truesize);
2635 skb = nskb;
2636 }
2637
2638 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK;
2639 TCP_ECN_send_synack(tcp_sk(sk), skb);
2640 }
2641 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2642 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2643}
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2655 struct request_sock *req,
2656 struct tcp_fastopen_cookie *foc)
2657{
2658 struct tcp_out_options opts;
2659 struct inet_request_sock *ireq = inet_rsk(req);
2660 struct tcp_sock *tp = tcp_sk(sk);
2661 struct tcphdr *th;
2662 struct sk_buff *skb;
2663 struct tcp_md5sig_key *md5;
2664 int tcp_header_size;
2665 int mss;
2666
2667 skb = alloc_skb(MAX_TCP_HEADER + 15, sk_gfp_atomic(sk, GFP_ATOMIC));
2668 if (unlikely(!skb)) {
2669 dst_release(dst);
2670 return NULL;
2671 }
2672
2673 skb_reserve(skb, MAX_TCP_HEADER);
2674
2675 skb_dst_set(skb, dst);
2676 security_skb_owned_by(skb, sk);
2677
2678 mss = dst_metric_advmss(dst);
2679 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
2680 mss = tp->rx_opt.user_mss;
2681
2682 if (req->rcv_wnd == 0) {
2683 __u8 rcv_wscale;
2684
2685 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
2686
2687
2688 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2689 (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
2690 req->window_clamp = tcp_full_space(sk);
2691
2692
2693 tcp_select_initial_window(tcp_full_space(sk),
2694 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
2695 &req->rcv_wnd,
2696 &req->window_clamp,
2697 ireq->wscale_ok,
2698 &rcv_wscale,
2699 dst_metric(dst, RTAX_INITRWND));
2700 ireq->rcv_wscale = rcv_wscale;
2701 }
2702
2703 memset(&opts, 0, sizeof(opts));
2704#ifdef CONFIG_SYN_COOKIES
2705 if (unlikely(req->cookie_ts))
2706 TCP_SKB_CB(skb)->when = cookie_init_timestamp(req);
2707 else
2708#endif
2709 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2710 tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5,
2711 foc) + sizeof(*th);
2712
2713 skb_push(skb, tcp_header_size);
2714 skb_reset_transport_header(skb);
2715
2716 th = tcp_hdr(skb);
2717 memset(th, 0, sizeof(struct tcphdr));
2718 th->syn = 1;
2719 th->ack = 1;
2720 TCP_ECN_make_synack(req, th);
2721 th->source = ireq->loc_port;
2722 th->dest = ireq->rmt_port;
2723
2724
2725
2726 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
2727 TCPHDR_SYN | TCPHDR_ACK);
2728
2729 th->seq = htonl(TCP_SKB_CB(skb)->seq);
2730
2731 th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
2732
2733
2734 th->window = htons(min(req->rcv_wnd, 65535U));
2735 tcp_options_write((__be32 *)(th + 1), tp, &opts);
2736 th->doff = (tcp_header_size >> 2);
2737 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb));
2738
2739#ifdef CONFIG_TCP_MD5SIG
2740
2741 if (md5) {
2742 tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
2743 md5, NULL, req, skb);
2744 }
2745#endif
2746
2747 return skb;
2748}
2749EXPORT_SYMBOL(tcp_make_synack);
2750
2751
2752void tcp_connect_init(struct sock *sk)
2753{
2754 const struct dst_entry *dst = __sk_dst_get(sk);
2755 struct tcp_sock *tp = tcp_sk(sk);
2756 __u8 rcv_wscale;
2757
2758
2759
2760
2761 tp->tcp_header_len = sizeof(struct tcphdr) +
2762 (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
2763
2764#ifdef CONFIG_TCP_MD5SIG
2765 if (tp->af_specific->md5_lookup(sk, sk) != NULL)
2766 tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
2767#endif
2768
2769
2770 if (tp->rx_opt.user_mss)
2771 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
2772 tp->max_window = 0;
2773 tcp_mtup_init(sk);
2774 tcp_sync_mss(sk, dst_mtu(dst));
2775
2776 if (!tp->window_clamp)
2777 tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
2778 tp->advmss = dst_metric_advmss(dst);
2779 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss)
2780 tp->advmss = tp->rx_opt.user_mss;
2781
2782 tcp_initialize_rcv_mss(sk);
2783
2784
2785 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2786 (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
2787 tp->window_clamp = tcp_full_space(sk);
2788
2789 tcp_select_initial_window(tcp_full_space(sk),
2790 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
2791 &tp->rcv_wnd,
2792 &tp->window_clamp,
2793 sysctl_tcp_window_scaling,
2794 &rcv_wscale,
2795 dst_metric(dst, RTAX_INITRWND));
2796
2797 tp->rx_opt.rcv_wscale = rcv_wscale;
2798 tp->rcv_ssthresh = tp->rcv_wnd;
2799
2800 sk->sk_err = 0;
2801 sock_reset_flag(sk, SOCK_DONE);
2802 tp->snd_wnd = 0;
2803 tcp_init_wl(tp, 0);
2804 tp->snd_una = tp->write_seq;
2805 tp->snd_sml = tp->write_seq;
2806 tp->snd_up = tp->write_seq;
2807 tp->snd_nxt = tp->write_seq;
2808
2809 if (likely(!tp->repair))
2810 tp->rcv_nxt = 0;
2811 tp->rcv_wup = tp->rcv_nxt;
2812 tp->copied_seq = tp->rcv_nxt;
2813
2814 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
2815 inet_csk(sk)->icsk_retransmits = 0;
2816 tcp_clear_retrans(tp);
2817}
2818
2819static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
2820{
2821 struct tcp_sock *tp = tcp_sk(sk);
2822 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
2823
2824 tcb->end_seq += skb->len;
2825 skb_header_release(skb);
2826 __tcp_add_write_queue_tail(sk, skb);
2827 sk->sk_wmem_queued += skb->truesize;
2828 sk_mem_charge(sk, skb->truesize);
2829 tp->write_seq = tcb->end_seq;
2830 tp->packets_out += tcp_skb_pcount(skb);
2831}
2832
2833
2834
2835
2836
2837
2838
2839
2840static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
2841{
2842 struct tcp_sock *tp = tcp_sk(sk);
2843 struct tcp_fastopen_request *fo = tp->fastopen_req;
2844 int syn_loss = 0, space, i, err = 0, iovlen = fo->data->msg_iovlen;
2845 struct sk_buff *syn_data = NULL, *data;
2846 unsigned long last_syn_loss = 0;
2847
2848 tp->rx_opt.mss_clamp = tp->advmss;
2849 tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie,
2850 &syn_loss, &last_syn_loss);
2851
2852 if (syn_loss > 1 &&
2853 time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) {
2854 fo->cookie.len = -1;
2855 goto fallback;
2856 }
2857
2858 if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE)
2859 fo->cookie.len = -1;
2860 else if (fo->cookie.len <= 0)
2861 goto fallback;
2862
2863
2864
2865
2866
2867 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->rx_opt.mss_clamp)
2868 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
2869 space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
2870 MAX_TCP_OPTION_SPACE;
2871
2872 syn_data = skb_copy_expand(syn, skb_headroom(syn), space,
2873 sk->sk_allocation);
2874 if (syn_data == NULL)
2875 goto fallback;
2876
2877 for (i = 0; i < iovlen && syn_data->len < space; ++i) {
2878 struct iovec *iov = &fo->data->msg_iov[i];
2879 unsigned char __user *from = iov->iov_base;
2880 int len = iov->iov_len;
2881
2882 if (syn_data->len + len > space)
2883 len = space - syn_data->len;
2884 else if (i + 1 == iovlen)
2885
2886 fo->data = NULL;
2887
2888 if (skb_add_data(syn_data, from, len))
2889 goto fallback;
2890 }
2891
2892
2893 data = pskb_copy(syn_data, sk->sk_allocation);
2894 if (data == NULL)
2895 goto fallback;
2896 TCP_SKB_CB(data)->seq++;
2897 TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN;
2898 TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH);
2899 tcp_connect_queue_skb(sk, data);
2900 fo->copied = data->len;
2901
2902 if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) {
2903 tp->syn_data = (fo->copied > 0);
2904 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
2905 goto done;
2906 }
2907 syn_data = NULL;
2908
2909fallback:
2910
2911 if (fo->cookie.len > 0)
2912 fo->cookie.len = 0;
2913 err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation);
2914 if (err)
2915 tp->syn_fastopen = 0;
2916 kfree_skb(syn_data);
2917done:
2918 fo->cookie.len = -1;
2919 return err;
2920}
2921
2922
2923int tcp_connect(struct sock *sk)
2924{
2925 struct tcp_sock *tp = tcp_sk(sk);
2926 struct sk_buff *buff;
2927 int err;
2928
2929 tcp_connect_init(sk);
2930
2931 if (unlikely(tp->repair)) {
2932 tcp_finish_connect(sk, NULL);
2933 return 0;
2934 }
2935
2936 buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
2937 if (unlikely(buff == NULL))
2938 return -ENOBUFS;
2939
2940
2941 skb_reserve(buff, MAX_TCP_HEADER);
2942
2943 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
2944 tp->retrans_stamp = TCP_SKB_CB(buff)->when = tcp_time_stamp;
2945 tcp_connect_queue_skb(sk, buff);
2946 TCP_ECN_send_syn(sk, buff);
2947
2948
2949 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
2950 tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
2951 if (err == -ECONNREFUSED)
2952 return err;
2953
2954
2955
2956
2957 tp->snd_nxt = tp->write_seq;
2958 tp->pushed_seq = tp->write_seq;
2959 TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
2960
2961
2962 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2963 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
2964 return 0;
2965}
2966EXPORT_SYMBOL(tcp_connect);
2967
2968
2969
2970
2971
2972void tcp_send_delayed_ack(struct sock *sk)
2973{
2974 struct inet_connection_sock *icsk = inet_csk(sk);
2975 int ato = icsk->icsk_ack.ato;
2976 unsigned long timeout;
2977
2978 if (ato > TCP_DELACK_MIN) {
2979 const struct tcp_sock *tp = tcp_sk(sk);
2980 int max_ato = HZ / 2;
2981
2982 if (icsk->icsk_ack.pingpong ||
2983 (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
2984 max_ato = TCP_DELACK_MAX;
2985
2986
2987
2988
2989
2990
2991
2992 if (tp->srtt) {
2993 int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN);
2994
2995 if (rtt < max_ato)
2996 max_ato = rtt;
2997 }
2998
2999 ato = min(ato, max_ato);
3000 }
3001
3002
3003 timeout = jiffies + ato;
3004
3005
3006 if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
3007
3008
3009
3010 if (icsk->icsk_ack.blocked ||
3011 time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
3012 tcp_send_ack(sk);
3013 return;
3014 }
3015
3016 if (!time_before(timeout, icsk->icsk_ack.timeout))
3017 timeout = icsk->icsk_ack.timeout;
3018 }
3019 icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
3020 icsk->icsk_ack.timeout = timeout;
3021 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
3022}
3023
3024
3025void tcp_send_ack(struct sock *sk)
3026{
3027 struct sk_buff *buff;
3028
3029
3030 if (sk->sk_state == TCP_CLOSE)
3031 return;
3032
3033
3034
3035
3036
3037 buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
3038 if (buff == NULL) {
3039 inet_csk_schedule_ack(sk);
3040 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
3041 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
3042 TCP_DELACK_MAX, TCP_RTO_MAX);
3043 return;
3044 }
3045
3046
3047 skb_reserve(buff, MAX_TCP_HEADER);
3048 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
3049
3050
3051 TCP_SKB_CB(buff)->when = tcp_time_stamp;
3052 tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));
3053}
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
3067{
3068 struct tcp_sock *tp = tcp_sk(sk);
3069 struct sk_buff *skb;
3070
3071
3072 skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
3073 if (skb == NULL)
3074 return -1;
3075
3076
3077 skb_reserve(skb, MAX_TCP_HEADER);
3078
3079
3080
3081
3082 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
3083 TCP_SKB_CB(skb)->when = tcp_time_stamp;
3084 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
3085}
3086
3087void tcp_send_window_probe(struct sock *sk)
3088{
3089 if (sk->sk_state == TCP_ESTABLISHED) {
3090 tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
3091 tcp_sk(sk)->snd_nxt = tcp_sk(sk)->write_seq;
3092 tcp_xmit_probe_skb(sk, 0);
3093 }
3094}
3095
3096
3097int tcp_write_wakeup(struct sock *sk)
3098{
3099 struct tcp_sock *tp = tcp_sk(sk);
3100 struct sk_buff *skb;
3101
3102 if (sk->sk_state == TCP_CLOSE)
3103 return -1;
3104
3105 if ((skb = tcp_send_head(sk)) != NULL &&
3106 before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
3107 int err;
3108 unsigned int mss = tcp_current_mss(sk);
3109 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
3110
3111 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
3112 tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
3113
3114
3115
3116
3117
3118 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
3119 skb->len > mss) {
3120 seg_size = min(seg_size, mss);
3121 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3122 if (tcp_fragment(sk, skb, seg_size, mss))
3123 return -1;
3124 } else if (!tcp_skb_pcount(skb))
3125 tcp_set_skb_tso_segs(sk, skb, mss);
3126
3127 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3128 TCP_SKB_CB(skb)->when = tcp_time_stamp;
3129 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
3130 if (!err)
3131 tcp_event_new_data_sent(sk, skb);
3132 return err;
3133 } else {
3134 if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
3135 tcp_xmit_probe_skb(sk, 1);
3136 return tcp_xmit_probe_skb(sk, 0);
3137 }
3138}
3139
3140
3141
3142
3143void tcp_send_probe0(struct sock *sk)
3144{
3145 struct inet_connection_sock *icsk = inet_csk(sk);
3146 struct tcp_sock *tp = tcp_sk(sk);
3147 int err;
3148
3149 err = tcp_write_wakeup(sk);
3150
3151 if (tp->packets_out || !tcp_send_head(sk)) {
3152
3153 icsk->icsk_probes_out = 0;
3154 icsk->icsk_backoff = 0;
3155 return;
3156 }
3157
3158 if (err <= 0) {
3159 if (icsk->icsk_backoff < sysctl_tcp_retries2)
3160 icsk->icsk_backoff++;
3161 icsk->icsk_probes_out++;
3162 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3163 min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
3164 TCP_RTO_MAX);
3165 } else {
3166
3167
3168
3169
3170
3171
3172 if (!icsk->icsk_probes_out)
3173 icsk->icsk_probes_out = 1;
3174 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3175 min(icsk->icsk_rto << icsk->icsk_backoff,
3176 TCP_RESOURCE_PROBE_INTERVAL),
3177 TCP_RTO_MAX);
3178 }
3179}
3180