1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37#include <net/tcp.h>
38
39#include <linux/compiler.h>
40#include <linux/gfp.h>
41#include <linux/module.h>
42
43
44int sysctl_tcp_retrans_collapse __read_mostly = 1;
45
46
47
48
49int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
50
51
52
53
54
55int sysctl_tcp_tso_win_divisor __read_mostly = 3;
56
57int sysctl_tcp_mtu_probing __read_mostly = 0;
58int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
59
60
61int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
62
63int sysctl_tcp_cookie_size __read_mostly = 0;
64EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size);
65
66
67
68static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
69{
70 struct tcp_sock *tp = tcp_sk(sk);
71 unsigned int prior_packets = tp->packets_out;
72
73 tcp_advance_send_head(sk, skb);
74 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
75
76
77 if (tp->frto_counter == 2)
78 tp->frto_counter = 3;
79
80 tp->packets_out += tcp_skb_pcount(skb);
81 if (!prior_packets)
82 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
83 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
84}
85
86
87
88
89
90
91
92static inline __u32 tcp_acceptable_seq(const struct sock *sk)
93{
94 const struct tcp_sock *tp = tcp_sk(sk);
95
96 if (!before(tcp_wnd_end(tp), tp->snd_nxt))
97 return tp->snd_nxt;
98 else
99 return tcp_wnd_end(tp);
100}
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116static __u16 tcp_advertise_mss(struct sock *sk)
117{
118 struct tcp_sock *tp = tcp_sk(sk);
119 const struct dst_entry *dst = __sk_dst_get(sk);
120 int mss = tp->advmss;
121
122 if (dst) {
123 unsigned int metric = dst_metric_advmss(dst);
124
125 if (metric < mss) {
126 mss = metric;
127 tp->advmss = mss;
128 }
129 }
130
131 return (__u16)mss;
132}
133
134
135
136static void tcp_cwnd_restart(struct sock *sk, const struct dst_entry *dst)
137{
138 struct tcp_sock *tp = tcp_sk(sk);
139 s32 delta = tcp_time_stamp - tp->lsndtime;
140 u32 restart_cwnd = tcp_init_cwnd(tp, dst);
141 u32 cwnd = tp->snd_cwnd;
142
143 tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
144
145 tp->snd_ssthresh = tcp_current_ssthresh(sk);
146 restart_cwnd = min(restart_cwnd, cwnd);
147
148 while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
149 cwnd >>= 1;
150 tp->snd_cwnd = max(cwnd, restart_cwnd);
151 tp->snd_cwnd_stamp = tcp_time_stamp;
152 tp->snd_cwnd_used = 0;
153}
154
155
156static void tcp_event_data_sent(struct tcp_sock *tp,
157 struct sock *sk)
158{
159 struct inet_connection_sock *icsk = inet_csk(sk);
160 const u32 now = tcp_time_stamp;
161
162 if (sysctl_tcp_slow_start_after_idle &&
163 (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto))
164 tcp_cwnd_restart(sk, __sk_dst_get(sk));
165
166 tp->lsndtime = now;
167
168
169
170
171 if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
172 icsk->icsk_ack.pingpong = 1;
173}
174
175
176static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
177{
178 tcp_dec_quickack_mode(sk, pkts);
179 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
180}
181
182
183
184
185
186
187
188
189void tcp_select_initial_window(int __space, __u32 mss,
190 __u32 *rcv_wnd, __u32 *window_clamp,
191 int wscale_ok, __u8 *rcv_wscale,
192 __u32 init_rcv_wnd)
193{
194 unsigned int space = (__space < 0 ? 0 : __space);
195
196
197 if (*window_clamp == 0)
198 (*window_clamp) = (65535 << 14);
199 space = min(*window_clamp, space);
200
201
202 if (space > mss)
203 space = (space / mss) * mss;
204
205
206
207
208
209
210
211
212
213 if (sysctl_tcp_workaround_signed_windows)
214 (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
215 else
216 (*rcv_wnd) = space;
217
218 (*rcv_wscale) = 0;
219 if (wscale_ok) {
220
221
222
223 space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
224 space = min_t(u32, space, *window_clamp);
225 while (space > 65535 && (*rcv_wscale) < 14) {
226 space >>= 1;
227 (*rcv_wscale)++;
228 }
229 }
230
231
232
233
234
235 if (mss > (1 << *rcv_wscale)) {
236 int init_cwnd = TCP_DEFAULT_INIT_RCVWND;
237 if (mss > 1460)
238 init_cwnd =
239 max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
240
241
242
243 if (init_rcv_wnd)
244 *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
245 else
246 *rcv_wnd = min(*rcv_wnd, init_cwnd * mss);
247 }
248
249
250 (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
251}
252EXPORT_SYMBOL(tcp_select_initial_window);
253
254
255
256
257
258
259static u16 tcp_select_window(struct sock *sk)
260{
261 struct tcp_sock *tp = tcp_sk(sk);
262 u32 cur_win = tcp_receive_window(tp);
263 u32 new_win = __tcp_select_window(sk);
264
265
266 if (new_win < cur_win) {
267
268
269
270
271
272
273
274 new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
275 }
276 tp->rcv_wnd = new_win;
277 tp->rcv_wup = tp->rcv_nxt;
278
279
280
281
282 if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
283 new_win = min(new_win, MAX_TCP_WINDOW);
284 else
285 new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
286
287
288 new_win >>= tp->rx_opt.rcv_wscale;
289
290
291 if (new_win == 0)
292 tp->pred_flags = 0;
293
294 return new_win;
295}
296
297
298static inline void TCP_ECN_send_synack(const struct tcp_sock *tp, struct sk_buff *skb)
299{
300 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
301 if (!(tp->ecn_flags & TCP_ECN_OK))
302 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
303}
304
305
306static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
307{
308 struct tcp_sock *tp = tcp_sk(sk);
309
310 tp->ecn_flags = 0;
311 if (sysctl_tcp_ecn == 1) {
312 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
313 tp->ecn_flags = TCP_ECN_OK;
314 }
315}
316
317static __inline__ void
318TCP_ECN_make_synack(const struct request_sock *req, struct tcphdr *th)
319{
320 if (inet_rsk(req)->ecn_ok)
321 th->ece = 1;
322}
323
324
325
326
327static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
328 int tcp_header_len)
329{
330 struct tcp_sock *tp = tcp_sk(sk);
331
332 if (tp->ecn_flags & TCP_ECN_OK) {
333
334 if (skb->len != tcp_header_len &&
335 !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
336 INET_ECN_xmit(sk);
337 if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) {
338 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
339 tcp_hdr(skb)->cwr = 1;
340 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
341 }
342 } else {
343
344 INET_ECN_dontxmit(sk);
345 }
346 if (tp->ecn_flags & TCP_ECN_DEMAND_CWR)
347 tcp_hdr(skb)->ece = 1;
348 }
349}
350
351
352
353
354static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
355{
356 skb->ip_summed = CHECKSUM_PARTIAL;
357 skb->csum = 0;
358
359 TCP_SKB_CB(skb)->tcp_flags = flags;
360 TCP_SKB_CB(skb)->sacked = 0;
361
362 skb_shinfo(skb)->gso_segs = 1;
363 skb_shinfo(skb)->gso_size = 0;
364 skb_shinfo(skb)->gso_type = 0;
365
366 TCP_SKB_CB(skb)->seq = seq;
367 if (flags & (TCPHDR_SYN | TCPHDR_FIN))
368 seq++;
369 TCP_SKB_CB(skb)->end_seq = seq;
370}
371
372static inline int tcp_urg_mode(const struct tcp_sock *tp)
373{
374 return tp->snd_una != tp->snd_up;
375}
376
377#define OPTION_SACK_ADVERTISE (1 << 0)
378#define OPTION_TS (1 << 1)
379#define OPTION_MD5 (1 << 2)
380#define OPTION_WSCALE (1 << 3)
381#define OPTION_COOKIE_EXTENSION (1 << 4)
382
383struct tcp_out_options {
384 u8 options;
385 u8 ws;
386 u8 num_sack_blocks;
387 u8 hash_size;
388 u16 mss;
389 __u32 tsval, tsecr;
390 __u8 *hash_location;
391};
392
393
394
395static u8 tcp_cookie_size_check(u8 desired)
396{
397 int cookie_size;
398
399 if (desired > 0)
400
401 return desired;
402
403 cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size);
404 if (cookie_size <= 0)
405
406 return 0;
407
408 if (cookie_size <= TCP_COOKIE_MIN)
409
410 return TCP_COOKIE_MIN;
411
412 if (cookie_size >= TCP_COOKIE_MAX)
413
414 return TCP_COOKIE_MAX;
415
416 if (cookie_size & 1)
417
418 cookie_size++;
419
420 return (u8)cookie_size;
421}
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
437 struct tcp_out_options *opts)
438{
439 u8 options = opts->options;
440
441
442
443
444
445
446
447
448
449 if (unlikely(OPTION_MD5 & options)) {
450 if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
451 *ptr++ = htonl((TCPOPT_COOKIE << 24) |
452 (TCPOLEN_COOKIE_BASE << 16) |
453 (TCPOPT_MD5SIG << 8) |
454 TCPOLEN_MD5SIG);
455 } else {
456 *ptr++ = htonl((TCPOPT_NOP << 24) |
457 (TCPOPT_NOP << 16) |
458 (TCPOPT_MD5SIG << 8) |
459 TCPOLEN_MD5SIG);
460 }
461 options &= ~OPTION_COOKIE_EXTENSION;
462
463 opts->hash_location = (__u8 *)ptr;
464 ptr += 4;
465 }
466
467 if (unlikely(opts->mss)) {
468 *ptr++ = htonl((TCPOPT_MSS << 24) |
469 (TCPOLEN_MSS << 16) |
470 opts->mss);
471 }
472
473 if (likely(OPTION_TS & options)) {
474 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
475 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
476 (TCPOLEN_SACK_PERM << 16) |
477 (TCPOPT_TIMESTAMP << 8) |
478 TCPOLEN_TIMESTAMP);
479 options &= ~OPTION_SACK_ADVERTISE;
480 } else {
481 *ptr++ = htonl((TCPOPT_NOP << 24) |
482 (TCPOPT_NOP << 16) |
483 (TCPOPT_TIMESTAMP << 8) |
484 TCPOLEN_TIMESTAMP);
485 }
486 *ptr++ = htonl(opts->tsval);
487 *ptr++ = htonl(opts->tsecr);
488 }
489
490
491
492
493
494
495
496 if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
497 __u8 *cookie_copy = opts->hash_location;
498 u8 cookie_size = opts->hash_size;
499
500
501
502
503 if (0x2 & cookie_size) {
504 __u8 *p = (__u8 *)ptr;
505
506
507 *p++ = TCPOPT_COOKIE;
508 *p++ = TCPOLEN_COOKIE_BASE + cookie_size;
509 *p++ = *cookie_copy++;
510 *p++ = *cookie_copy++;
511 ptr++;
512 cookie_size -= 2;
513 } else {
514
515 *ptr++ = htonl(((TCPOPT_NOP << 24) |
516 (TCPOPT_NOP << 16) |
517 (TCPOPT_COOKIE << 8) |
518 TCPOLEN_COOKIE_BASE) +
519 cookie_size);
520 }
521
522 if (cookie_size > 0) {
523 memcpy(ptr, cookie_copy, cookie_size);
524 ptr += (cookie_size / 4);
525 }
526 }
527
528 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
529 *ptr++ = htonl((TCPOPT_NOP << 24) |
530 (TCPOPT_NOP << 16) |
531 (TCPOPT_SACK_PERM << 8) |
532 TCPOLEN_SACK_PERM);
533 }
534
535 if (unlikely(OPTION_WSCALE & options)) {
536 *ptr++ = htonl((TCPOPT_NOP << 24) |
537 (TCPOPT_WINDOW << 16) |
538 (TCPOLEN_WINDOW << 8) |
539 opts->ws);
540 }
541
542 if (unlikely(opts->num_sack_blocks)) {
543 struct tcp_sack_block *sp = tp->rx_opt.dsack ?
544 tp->duplicate_sack : tp->selective_acks;
545 int this_sack;
546
547 *ptr++ = htonl((TCPOPT_NOP << 24) |
548 (TCPOPT_NOP << 16) |
549 (TCPOPT_SACK << 8) |
550 (TCPOLEN_SACK_BASE + (opts->num_sack_blocks *
551 TCPOLEN_SACK_PERBLOCK)));
552
553 for (this_sack = 0; this_sack < opts->num_sack_blocks;
554 ++this_sack) {
555 *ptr++ = htonl(sp[this_sack].start_seq);
556 *ptr++ = htonl(sp[this_sack].end_seq);
557 }
558
559 tp->rx_opt.dsack = 0;
560 }
561}
562
563
564
565
566static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
567 struct tcp_out_options *opts,
568 struct tcp_md5sig_key **md5)
569{
570 struct tcp_sock *tp = tcp_sk(sk);
571 struct tcp_cookie_values *cvp = tp->cookie_values;
572 unsigned remaining = MAX_TCP_OPTION_SPACE;
573 u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ?
574 tcp_cookie_size_check(cvp->cookie_desired) :
575 0;
576
577#ifdef CONFIG_TCP_MD5SIG
578 *md5 = tp->af_specific->md5_lookup(sk, sk);
579 if (*md5) {
580 opts->options |= OPTION_MD5;
581 remaining -= TCPOLEN_MD5SIG_ALIGNED;
582 }
583#else
584 *md5 = NULL;
585#endif
586
587
588
589
590
591
592
593
594
595
596 opts->mss = tcp_advertise_mss(sk);
597 remaining -= TCPOLEN_MSS_ALIGNED;
598
599 if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
600 opts->options |= OPTION_TS;
601 opts->tsval = TCP_SKB_CB(skb)->when;
602 opts->tsecr = tp->rx_opt.ts_recent;
603 remaining -= TCPOLEN_TSTAMP_ALIGNED;
604 }
605 if (likely(sysctl_tcp_window_scaling)) {
606 opts->ws = tp->rx_opt.rcv_wscale;
607 opts->options |= OPTION_WSCALE;
608 remaining -= TCPOLEN_WSCALE_ALIGNED;
609 }
610 if (likely(sysctl_tcp_sack)) {
611 opts->options |= OPTION_SACK_ADVERTISE;
612 if (unlikely(!(OPTION_TS & opts->options)))
613 remaining -= TCPOLEN_SACKPERM_ALIGNED;
614 }
615
616
617
618
619
620
621
622 if (*md5 == NULL &&
623 (OPTION_TS & opts->options) &&
624 cookie_size > 0) {
625 int need = TCPOLEN_COOKIE_BASE + cookie_size;
626
627 if (0x2 & need) {
628
629 need += 2;
630
631 if (need > remaining) {
632
633 cookie_size -= 2;
634 need -= 4;
635 }
636 }
637 while (need > remaining && TCP_COOKIE_MIN <= cookie_size) {
638 cookie_size -= 4;
639 need -= 4;
640 }
641 if (TCP_COOKIE_MIN <= cookie_size) {
642 opts->options |= OPTION_COOKIE_EXTENSION;
643 opts->hash_location = (__u8 *)&cvp->cookie_pair[0];
644 opts->hash_size = cookie_size;
645
646
647 cvp->cookie_desired = cookie_size;
648
649 if (cvp->cookie_desired != cvp->cookie_pair_size) {
650
651
652
653
654 get_random_bytes(&cvp->cookie_pair[0],
655 cookie_size);
656 cvp->cookie_pair_size = cookie_size;
657 }
658
659 remaining -= need;
660 }
661 }
662 return MAX_TCP_OPTION_SPACE - remaining;
663}
664
665
666static unsigned tcp_synack_options(struct sock *sk,
667 struct request_sock *req,
668 unsigned mss, struct sk_buff *skb,
669 struct tcp_out_options *opts,
670 struct tcp_md5sig_key **md5,
671 struct tcp_extend_values *xvp)
672{
673 struct inet_request_sock *ireq = inet_rsk(req);
674 unsigned remaining = MAX_TCP_OPTION_SPACE;
675 u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ?
676 xvp->cookie_plus :
677 0;
678
679#ifdef CONFIG_TCP_MD5SIG
680 *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
681 if (*md5) {
682 opts->options |= OPTION_MD5;
683 remaining -= TCPOLEN_MD5SIG_ALIGNED;
684
685
686
687
688
689
690 ireq->tstamp_ok &= !ireq->sack_ok;
691 }
692#else
693 *md5 = NULL;
694#endif
695
696
697 opts->mss = mss;
698 remaining -= TCPOLEN_MSS_ALIGNED;
699
700 if (likely(ireq->wscale_ok)) {
701 opts->ws = ireq->rcv_wscale;
702 opts->options |= OPTION_WSCALE;
703 remaining -= TCPOLEN_WSCALE_ALIGNED;
704 }
705 if (likely(ireq->tstamp_ok)) {
706 opts->options |= OPTION_TS;
707 opts->tsval = TCP_SKB_CB(skb)->when;
708 opts->tsecr = req->ts_recent;
709 remaining -= TCPOLEN_TSTAMP_ALIGNED;
710 }
711 if (likely(ireq->sack_ok)) {
712 opts->options |= OPTION_SACK_ADVERTISE;
713 if (unlikely(!ireq->tstamp_ok))
714 remaining -= TCPOLEN_SACKPERM_ALIGNED;
715 }
716
717
718
719
720 if (*md5 == NULL &&
721 ireq->tstamp_ok &&
722 cookie_plus > TCPOLEN_COOKIE_BASE) {
723 int need = cookie_plus;
724
725 if (0x2 & need) {
726
727 need += 2;
728 }
729 if (need <= remaining) {
730 opts->options |= OPTION_COOKIE_EXTENSION;
731 opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE;
732 remaining -= need;
733 } else {
734
735 xvp->cookie_out_never = 1;
736 opts->hash_size = 0;
737 }
738 }
739 return MAX_TCP_OPTION_SPACE - remaining;
740}
741
742
743
744
745static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
746 struct tcp_out_options *opts,
747 struct tcp_md5sig_key **md5)
748{
749 struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
750 struct tcp_sock *tp = tcp_sk(sk);
751 unsigned size = 0;
752 unsigned int eff_sacks;
753
754#ifdef CONFIG_TCP_MD5SIG
755 *md5 = tp->af_specific->md5_lookup(sk, sk);
756 if (unlikely(*md5)) {
757 opts->options |= OPTION_MD5;
758 size += TCPOLEN_MD5SIG_ALIGNED;
759 }
760#else
761 *md5 = NULL;
762#endif
763
764 if (likely(tp->rx_opt.tstamp_ok)) {
765 opts->options |= OPTION_TS;
766 opts->tsval = tcb ? tcb->when : 0;
767 opts->tsecr = tp->rx_opt.ts_recent;
768 size += TCPOLEN_TSTAMP_ALIGNED;
769 }
770
771 eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
772 if (unlikely(eff_sacks)) {
773 const unsigned remaining = MAX_TCP_OPTION_SPACE - size;
774 opts->num_sack_blocks =
775 min_t(unsigned, eff_sacks,
776 (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
777 TCPOLEN_SACK_PERBLOCK);
778 size += TCPOLEN_SACK_BASE_ALIGNED +
779 opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
780 }
781
782 return size;
783}
784
785
786
787
788
789
790
791
792
793
794
795
796static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
797 gfp_t gfp_mask)
798{
799 const struct inet_connection_sock *icsk = inet_csk(sk);
800 struct inet_sock *inet;
801 struct tcp_sock *tp;
802 struct tcp_skb_cb *tcb;
803 struct tcp_out_options opts;
804 unsigned tcp_options_size, tcp_header_size;
805 struct tcp_md5sig_key *md5;
806 struct tcphdr *th;
807 int err;
808
809 BUG_ON(!skb || !tcp_skb_pcount(skb));
810
811
812
813
814 if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
815 __net_timestamp(skb);
816
817 if (likely(clone_it)) {
818 if (unlikely(skb_cloned(skb)))
819 skb = pskb_copy(skb, gfp_mask);
820 else
821 skb = skb_clone(skb, gfp_mask);
822 if (unlikely(!skb))
823 return -ENOBUFS;
824 }
825
826 inet = inet_sk(sk);
827 tp = tcp_sk(sk);
828 tcb = TCP_SKB_CB(skb);
829 memset(&opts, 0, sizeof(opts));
830
831 if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
832 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
833 else
834 tcp_options_size = tcp_established_options(sk, skb, &opts,
835 &md5);
836 tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
837
838 if (tcp_packets_in_flight(tp) == 0) {
839 tcp_ca_event(sk, CA_EVENT_TX_START);
840 skb->ooo_okay = 1;
841 } else
842 skb->ooo_okay = 0;
843
844 skb_push(skb, tcp_header_size);
845 skb_reset_transport_header(skb);
846 skb_set_owner_w(skb, sk);
847
848
849 th = tcp_hdr(skb);
850 th->source = inet->inet_sport;
851 th->dest = inet->inet_dport;
852 th->seq = htonl(tcb->seq);
853 th->ack_seq = htonl(tp->rcv_nxt);
854 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
855 tcb->tcp_flags);
856
857 if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
858
859
860
861 th->window = htons(min(tp->rcv_wnd, 65535U));
862 } else {
863 th->window = htons(tcp_select_window(sk));
864 }
865 th->check = 0;
866 th->urg_ptr = 0;
867
868
869 if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
870 if (before(tp->snd_up, tcb->seq + 0x10000)) {
871 th->urg_ptr = htons(tp->snd_up - tcb->seq);
872 th->urg = 1;
873 } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
874 th->urg_ptr = htons(0xFFFF);
875 th->urg = 1;
876 }
877 }
878
879 tcp_options_write((__be32 *)(th + 1), tp, &opts);
880 if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0))
881 TCP_ECN_send(sk, skb, tcp_header_size);
882
883#ifdef CONFIG_TCP_MD5SIG
884
885 if (md5) {
886 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
887 tp->af_specific->calc_md5_hash(opts.hash_location,
888 md5, sk, NULL, skb);
889 }
890#endif
891
892 icsk->icsk_af_ops->send_check(sk, skb);
893
894 if (likely(tcb->tcp_flags & TCPHDR_ACK))
895 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
896
897 if (skb->len != tcp_header_size)
898 tcp_event_data_sent(tp, sk);
899
900 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
901 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
902 tcp_skb_pcount(skb));
903
904 err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl);
905 if (likely(err <= 0))
906 return err;
907
908 tcp_enter_cwr(sk, 1);
909
910 return net_xmit_eval(err);
911}
912
913
914
915
916
917
918static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
919{
920 struct tcp_sock *tp = tcp_sk(sk);
921
922
923 tp->write_seq = TCP_SKB_CB(skb)->end_seq;
924 skb_header_release(skb);
925 tcp_add_write_queue_tail(sk, skb);
926 sk->sk_wmem_queued += skb->truesize;
927 sk_mem_charge(sk, skb->truesize);
928}
929
930
931static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
932 unsigned int mss_now)
933{
934 if (skb->len <= mss_now || !sk_can_gso(sk) ||
935 skb->ip_summed == CHECKSUM_NONE) {
936
937
938
939 skb_shinfo(skb)->gso_segs = 1;
940 skb_shinfo(skb)->gso_size = 0;
941 skb_shinfo(skb)->gso_type = 0;
942 } else {
943 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now);
944 skb_shinfo(skb)->gso_size = mss_now;
945 skb_shinfo(skb)->gso_type = sk->sk_gso_type;
946 }
947}
948
949
950
951
952static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb,
953 int decr)
954{
955 struct tcp_sock *tp = tcp_sk(sk);
956
957 if (!tp->sacked_out || tcp_is_reno(tp))
958 return;
959
960 if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
961 tp->fackets_out -= decr;
962}
963
964
965
966
967static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
968{
969 struct tcp_sock *tp = tcp_sk(sk);
970
971 tp->packets_out -= decr;
972
973 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
974 tp->sacked_out -= decr;
975 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
976 tp->retrans_out -= decr;
977 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
978 tp->lost_out -= decr;
979
980
981 if (tcp_is_reno(tp) && decr > 0)
982 tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
983
984 tcp_adjust_fackets_out(sk, skb, decr);
985
986 if (tp->lost_skb_hint &&
987 before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
988 (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)))
989 tp->lost_cnt_hint -= decr;
990
991 tcp_verify_left_out(tp);
992}
993
994
995
996
997
998
999int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1000 unsigned int mss_now)
1001{
1002 struct tcp_sock *tp = tcp_sk(sk);
1003 struct sk_buff *buff;
1004 int nsize, old_factor;
1005 int nlen;
1006 u8 flags;
1007
1008 if (WARN_ON(len > skb->len))
1009 return -EINVAL;
1010
1011 nsize = skb_headlen(skb) - len;
1012 if (nsize < 0)
1013 nsize = 0;
1014
1015 if (skb_cloned(skb) &&
1016 skb_is_nonlinear(skb) &&
1017 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
1018 return -ENOMEM;
1019
1020
1021 buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
1022 if (buff == NULL)
1023 return -ENOMEM;
1024
1025 sk->sk_wmem_queued += buff->truesize;
1026 sk_mem_charge(sk, buff->truesize);
1027 nlen = skb->len - len - nsize;
1028 buff->truesize += nlen;
1029 skb->truesize -= nlen;
1030
1031
1032 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1033 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1034 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1035
1036
1037 flags = TCP_SKB_CB(skb)->tcp_flags;
1038 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1039 TCP_SKB_CB(buff)->tcp_flags = flags;
1040 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
1041
1042 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
1043
1044 buff->csum = csum_partial_copy_nocheck(skb->data + len,
1045 skb_put(buff, nsize),
1046 nsize, 0);
1047
1048 skb_trim(skb, len);
1049
1050 skb->csum = csum_block_sub(skb->csum, buff->csum, len);
1051 } else {
1052 skb->ip_summed = CHECKSUM_PARTIAL;
1053 skb_split(skb, buff, len);
1054 }
1055
1056 buff->ip_summed = skb->ip_summed;
1057
1058
1059
1060
1061 TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
1062 buff->tstamp = skb->tstamp;
1063
1064 old_factor = tcp_skb_pcount(skb);
1065
1066
1067 tcp_set_skb_tso_segs(sk, skb, mss_now);
1068 tcp_set_skb_tso_segs(sk, buff, mss_now);
1069
1070
1071
1072
1073 if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) {
1074 int diff = old_factor - tcp_skb_pcount(skb) -
1075 tcp_skb_pcount(buff);
1076
1077 if (diff)
1078 tcp_adjust_pcount(sk, skb, diff);
1079 }
1080
1081
1082 skb_header_release(buff);
1083 tcp_insert_write_queue_after(skb, buff, sk);
1084
1085 return 0;
1086}
1087
1088
1089
1090
1091
1092static void __pskb_trim_head(struct sk_buff *skb, int len)
1093{
1094 int i, k, eat;
1095
1096 eat = len;
1097 k = 0;
1098 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1099 int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
1100
1101 if (size <= eat) {
1102 skb_frag_unref(skb, i);
1103 eat -= size;
1104 } else {
1105 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
1106 if (eat) {
1107 skb_shinfo(skb)->frags[k].page_offset += eat;
1108 skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
1109 eat = 0;
1110 }
1111 k++;
1112 }
1113 }
1114 skb_shinfo(skb)->nr_frags = k;
1115
1116 skb_reset_tail_pointer(skb);
1117 skb->data_len -= len;
1118 skb->len = skb->data_len;
1119}
1120
1121
1122int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
1123{
1124 if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
1125 return -ENOMEM;
1126
1127
1128 if (unlikely(len < skb_headlen(skb)))
1129 __skb_pull(skb, len);
1130 else
1131 __pskb_trim_head(skb, len - skb_headlen(skb));
1132
1133 TCP_SKB_CB(skb)->seq += len;
1134 skb->ip_summed = CHECKSUM_PARTIAL;
1135
1136 skb->truesize -= len;
1137 sk->sk_wmem_queued -= len;
1138 sk_mem_uncharge(sk, len);
1139 sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
1140
1141
1142
1143
1144 if (tcp_skb_pcount(skb) > 1)
1145 tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk));
1146
1147 return 0;
1148}
1149
1150
1151int tcp_mtu_to_mss(const struct sock *sk, int pmtu)
1152{
1153 const struct tcp_sock *tp = tcp_sk(sk);
1154 const struct inet_connection_sock *icsk = inet_csk(sk);
1155 int mss_now;
1156
1157
1158
1159
1160 mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr);
1161
1162
1163 if (mss_now > tp->rx_opt.mss_clamp)
1164 mss_now = tp->rx_opt.mss_clamp;
1165
1166
1167 mss_now -= icsk->icsk_ext_hdr_len;
1168
1169
1170 if (mss_now < 48)
1171 mss_now = 48;
1172
1173
1174 mss_now -= tp->tcp_header_len - sizeof(struct tcphdr);
1175
1176 return mss_now;
1177}
1178
1179
1180int tcp_mss_to_mtu(const struct sock *sk, int mss)
1181{
1182 const struct tcp_sock *tp = tcp_sk(sk);
1183 const struct inet_connection_sock *icsk = inet_csk(sk);
1184 int mtu;
1185
1186 mtu = mss +
1187 tp->tcp_header_len +
1188 icsk->icsk_ext_hdr_len +
1189 icsk->icsk_af_ops->net_header_len;
1190
1191 return mtu;
1192}
1193
1194
1195void tcp_mtup_init(struct sock *sk)
1196{
1197 struct tcp_sock *tp = tcp_sk(sk);
1198 struct inet_connection_sock *icsk = inet_csk(sk);
1199
1200 icsk->icsk_mtup.enabled = sysctl_tcp_mtu_probing > 1;
1201 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
1202 icsk->icsk_af_ops->net_header_len;
1203 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss);
1204 icsk->icsk_mtup.probe_size = 0;
1205}
1206EXPORT_SYMBOL(tcp_mtup_init);
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
1231{
1232 struct tcp_sock *tp = tcp_sk(sk);
1233 struct inet_connection_sock *icsk = inet_csk(sk);
1234 int mss_now;
1235
1236 if (icsk->icsk_mtup.search_high > pmtu)
1237 icsk->icsk_mtup.search_high = pmtu;
1238
1239 mss_now = tcp_mtu_to_mss(sk, pmtu);
1240 mss_now = tcp_bound_to_half_wnd(tp, mss_now);
1241
1242
1243 icsk->icsk_pmtu_cookie = pmtu;
1244 if (icsk->icsk_mtup.enabled)
1245 mss_now = min(mss_now, tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low));
1246 tp->mss_cache = mss_now;
1247
1248 return mss_now;
1249}
1250EXPORT_SYMBOL(tcp_sync_mss);
1251
1252
1253
1254
1255unsigned int tcp_current_mss(struct sock *sk)
1256{
1257 const struct tcp_sock *tp = tcp_sk(sk);
1258 const struct dst_entry *dst = __sk_dst_get(sk);
1259 u32 mss_now;
1260 unsigned header_len;
1261 struct tcp_out_options opts;
1262 struct tcp_md5sig_key *md5;
1263
1264 mss_now = tp->mss_cache;
1265
1266 if (dst) {
1267 u32 mtu = dst_mtu(dst);
1268 if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
1269 mss_now = tcp_sync_mss(sk, mtu);
1270 }
1271
1272 header_len = tcp_established_options(sk, NULL, &opts, &md5) +
1273 sizeof(struct tcphdr);
1274
1275
1276
1277
1278 if (header_len != tp->tcp_header_len) {
1279 int delta = (int) header_len - tp->tcp_header_len;
1280 mss_now -= delta;
1281 }
1282
1283 return mss_now;
1284}
1285
1286
1287static void tcp_cwnd_validate(struct sock *sk)
1288{
1289 struct tcp_sock *tp = tcp_sk(sk);
1290
1291 if (tp->packets_out >= tp->snd_cwnd) {
1292
1293 tp->snd_cwnd_used = 0;
1294 tp->snd_cwnd_stamp = tcp_time_stamp;
1295 } else {
1296
1297 if (tp->packets_out > tp->snd_cwnd_used)
1298 tp->snd_cwnd_used = tp->packets_out;
1299
1300 if (sysctl_tcp_slow_start_after_idle &&
1301 (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
1302 tcp_cwnd_application_limited(sk);
1303 }
1304}
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb,
1319 unsigned int mss_now, unsigned int cwnd)
1320{
1321 const struct tcp_sock *tp = tcp_sk(sk);
1322 u32 needed, window, cwnd_len;
1323
1324 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1325 cwnd_len = mss_now * cwnd;
1326
1327 if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
1328 return cwnd_len;
1329
1330 needed = min(skb->len, window);
1331
1332 if (cwnd_len <= needed)
1333 return cwnd_len;
1334
1335 return needed - needed % mss_now;
1336}
1337
1338
1339
1340
1341static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
1342 const struct sk_buff *skb)
1343{
1344 u32 in_flight, cwnd;
1345
1346
1347 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
1348 tcp_skb_pcount(skb) == 1)
1349 return 1;
1350
1351 in_flight = tcp_packets_in_flight(tp);
1352 cwnd = tp->snd_cwnd;
1353 if (in_flight < cwnd)
1354 return (cwnd - in_flight);
1355
1356 return 0;
1357}
1358
1359
1360
1361
1362
1363static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb,
1364 unsigned int mss_now)
1365{
1366 int tso_segs = tcp_skb_pcount(skb);
1367
1368 if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
1369 tcp_set_skb_tso_segs(sk, skb, mss_now);
1370 tso_segs = tcp_skb_pcount(skb);
1371 }
1372 return tso_segs;
1373}
1374
1375
1376static inline int tcp_minshall_check(const struct tcp_sock *tp)
1377{
1378 return after(tp->snd_sml, tp->snd_una) &&
1379 !after(tp->snd_sml, tp->snd_nxt);
1380}
1381
1382
1383
1384
1385
1386
1387
1388
1389static inline int tcp_nagle_check(const struct tcp_sock *tp,
1390 const struct sk_buff *skb,
1391 unsigned mss_now, int nonagle)
1392{
1393 return skb->len < mss_now &&
1394 ((nonagle & TCP_NAGLE_CORK) ||
1395 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1396}
1397
1398
1399
1400
1401static inline int tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
1402 unsigned int cur_mss, int nonagle)
1403{
1404
1405
1406
1407
1408
1409
1410 if (nonagle & TCP_NAGLE_PUSH)
1411 return 1;
1412
1413
1414
1415
1416 if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
1417 (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
1418 return 1;
1419
1420 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
1421 return 1;
1422
1423 return 0;
1424}
1425
1426
1427static inline int tcp_snd_wnd_test(const struct tcp_sock *tp, const struct sk_buff *skb,
1428 unsigned int cur_mss)
1429{
1430 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
1431
1432 if (skb->len > cur_mss)
1433 end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
1434
1435 return !after(end_seq, tcp_wnd_end(tp));
1436}
1437
1438
1439
1440
1441
1442static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
1443 unsigned int cur_mss, int nonagle)
1444{
1445 const struct tcp_sock *tp = tcp_sk(sk);
1446 unsigned int cwnd_quota;
1447
1448 tcp_init_tso_segs(sk, skb, cur_mss);
1449
1450 if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
1451 return 0;
1452
1453 cwnd_quota = tcp_cwnd_test(tp, skb);
1454 if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
1455 cwnd_quota = 0;
1456
1457 return cwnd_quota;
1458}
1459
1460
1461int tcp_may_send_now(struct sock *sk)
1462{
1463 const struct tcp_sock *tp = tcp_sk(sk);
1464 struct sk_buff *skb = tcp_send_head(sk);
1465
1466 return skb &&
1467 tcp_snd_test(sk, skb, tcp_current_mss(sk),
1468 (tcp_skb_is_last(sk, skb) ?
1469 tp->nonagle : TCP_NAGLE_PUSH));
1470}
1471
1472
1473
1474
1475
1476
1477
1478
1479static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1480 unsigned int mss_now, gfp_t gfp)
1481{
1482 struct sk_buff *buff;
1483 int nlen = skb->len - len;
1484 u8 flags;
1485
1486
1487 if (skb->len != skb->data_len)
1488 return tcp_fragment(sk, skb, len, mss_now);
1489
1490 buff = sk_stream_alloc_skb(sk, 0, gfp);
1491 if (unlikely(buff == NULL))
1492 return -ENOMEM;
1493
1494 sk->sk_wmem_queued += buff->truesize;
1495 sk_mem_charge(sk, buff->truesize);
1496 buff->truesize += nlen;
1497 skb->truesize -= nlen;
1498
1499
1500 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1501 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1502 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1503
1504
1505 flags = TCP_SKB_CB(skb)->tcp_flags;
1506 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1507 TCP_SKB_CB(buff)->tcp_flags = flags;
1508
1509
1510 TCP_SKB_CB(buff)->sacked = 0;
1511
1512 buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
1513 skb_split(skb, buff, len);
1514
1515
1516 tcp_set_skb_tso_segs(sk, skb, mss_now);
1517 tcp_set_skb_tso_segs(sk, buff, mss_now);
1518
1519
1520 skb_header_release(buff);
1521 tcp_insert_write_queue_after(skb, buff, sk);
1522
1523 return 0;
1524}
1525
1526
1527
1528
1529
1530
1531static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1532{
1533 struct tcp_sock *tp = tcp_sk(sk);
1534 const struct inet_connection_sock *icsk = inet_csk(sk);
1535 u32 send_win, cong_win, limit, in_flight;
1536 int win_divisor;
1537
1538 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1539 goto send_now;
1540
1541 if (icsk->icsk_ca_state != TCP_CA_Open)
1542 goto send_now;
1543
1544
1545 if (tp->tso_deferred &&
1546 (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1)
1547 goto send_now;
1548
1549 in_flight = tcp_packets_in_flight(tp);
1550
1551 BUG_ON(tcp_skb_pcount(skb) <= 1 || (tp->snd_cwnd <= in_flight));
1552
1553 send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1554
1555
1556 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
1557
1558 limit = min(send_win, cong_win);
1559
1560
1561 if (limit >= sk->sk_gso_max_size)
1562 goto send_now;
1563
1564
1565 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
1566 goto send_now;
1567
1568 win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor);
1569 if (win_divisor) {
1570 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
1571
1572
1573
1574
1575 chunk /= win_divisor;
1576 if (limit >= chunk)
1577 goto send_now;
1578 } else {
1579
1580
1581
1582
1583
1584 if (limit > tcp_max_burst(tp) * tp->mss_cache)
1585 goto send_now;
1586 }
1587
1588
1589 tp->tso_deferred = 1 | (jiffies << 1);
1590
1591 return 1;
1592
1593send_now:
1594 tp->tso_deferred = 0;
1595 return 0;
1596}
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607static int tcp_mtu_probe(struct sock *sk)
1608{
1609 struct tcp_sock *tp = tcp_sk(sk);
1610 struct inet_connection_sock *icsk = inet_csk(sk);
1611 struct sk_buff *skb, *nskb, *next;
1612 int len;
1613 int probe_size;
1614 int size_needed;
1615 int copy;
1616 int mss_now;
1617
1618
1619
1620
1621
1622 if (!icsk->icsk_mtup.enabled ||
1623 icsk->icsk_mtup.probe_size ||
1624 inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
1625 tp->snd_cwnd < 11 ||
1626 tp->rx_opt.num_sacks || tp->rx_opt.dsack)
1627 return -1;
1628
1629
1630 mss_now = tcp_current_mss(sk);
1631 probe_size = 2 * tp->mss_cache;
1632 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
1633 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
1634
1635 return -1;
1636 }
1637
1638
1639 if (tp->write_seq - tp->snd_nxt < size_needed)
1640 return -1;
1641
1642 if (tp->snd_wnd < size_needed)
1643 return -1;
1644 if (after(tp->snd_nxt + size_needed, tcp_wnd_end(tp)))
1645 return 0;
1646
1647
1648 if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
1649 if (!tcp_packets_in_flight(tp))
1650 return -1;
1651 else
1652 return 0;
1653 }
1654
1655
1656 if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL)
1657 return -1;
1658 sk->sk_wmem_queued += nskb->truesize;
1659 sk_mem_charge(sk, nskb->truesize);
1660
1661 skb = tcp_send_head(sk);
1662
1663 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
1664 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
1665 TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
1666 TCP_SKB_CB(nskb)->sacked = 0;
1667 nskb->csum = 0;
1668 nskb->ip_summed = skb->ip_summed;
1669
1670 tcp_insert_write_queue_before(nskb, skb, sk);
1671
1672 len = 0;
1673 tcp_for_write_queue_from_safe(skb, next, sk) {
1674 copy = min_t(int, skb->len, probe_size - len);
1675 if (nskb->ip_summed)
1676 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
1677 else
1678 nskb->csum = skb_copy_and_csum_bits(skb, 0,
1679 skb_put(nskb, copy),
1680 copy, nskb->csum);
1681
1682 if (skb->len <= copy) {
1683
1684
1685 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1686 tcp_unlink_write_queue(skb, sk);
1687 sk_wmem_free_skb(sk, skb);
1688 } else {
1689 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
1690 ~(TCPHDR_FIN|TCPHDR_PSH);
1691 if (!skb_shinfo(skb)->nr_frags) {
1692 skb_pull(skb, copy);
1693 if (skb->ip_summed != CHECKSUM_PARTIAL)
1694 skb->csum = csum_partial(skb->data,
1695 skb->len, 0);
1696 } else {
1697 __pskb_trim_head(skb, copy);
1698 tcp_set_skb_tso_segs(sk, skb, mss_now);
1699 }
1700 TCP_SKB_CB(skb)->seq += copy;
1701 }
1702
1703 len += copy;
1704
1705 if (len >= probe_size)
1706 break;
1707 }
1708 tcp_init_tso_segs(sk, nskb, nskb->len);
1709
1710
1711
1712 TCP_SKB_CB(nskb)->when = tcp_time_stamp;
1713 if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
1714
1715
1716 tp->snd_cwnd--;
1717 tcp_event_new_data_sent(sk, nskb);
1718
1719 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
1720 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
1721 tp->mtu_probe.probe_seq_end = TCP_SKB_CB(nskb)->end_seq;
1722
1723 return 1;
1724 }
1725
1726 return -1;
1727}
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1741 int push_one, gfp_t gfp)
1742{
1743 struct tcp_sock *tp = tcp_sk(sk);
1744 struct sk_buff *skb;
1745 unsigned int tso_segs, sent_pkts;
1746 int cwnd_quota;
1747 int result;
1748
1749 sent_pkts = 0;
1750
1751 if (!push_one) {
1752
1753 result = tcp_mtu_probe(sk);
1754 if (!result) {
1755 return 0;
1756 } else if (result > 0) {
1757 sent_pkts = 1;
1758 }
1759 }
1760
1761 while ((skb = tcp_send_head(sk))) {
1762 unsigned int limit;
1763
1764 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
1765 BUG_ON(!tso_segs);
1766
1767 cwnd_quota = tcp_cwnd_test(tp, skb);
1768 if (!cwnd_quota)
1769 break;
1770
1771 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
1772 break;
1773
1774 if (tso_segs == 1) {
1775 if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
1776 (tcp_skb_is_last(sk, skb) ?
1777 nonagle : TCP_NAGLE_PUSH))))
1778 break;
1779 } else {
1780 if (!push_one && tcp_tso_should_defer(sk, skb))
1781 break;
1782 }
1783
1784 limit = mss_now;
1785 if (tso_segs > 1 && !tcp_urg_mode(tp))
1786 limit = tcp_mss_split_point(sk, skb, mss_now,
1787 cwnd_quota);
1788
1789 if (skb->len > limit &&
1790 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
1791 break;
1792
1793 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1794
1795 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
1796 break;
1797
1798
1799
1800
1801 tcp_event_new_data_sent(sk, skb);
1802
1803 tcp_minshall_update(tp, mss_now, skb);
1804 sent_pkts += tcp_skb_pcount(skb);
1805
1806 if (push_one)
1807 break;
1808 }
1809 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
1810 tp->prr_out += sent_pkts;
1811
1812 if (likely(sent_pkts)) {
1813 tcp_cwnd_validate(sk);
1814 return 0;
1815 }
1816 return !tp->packets_out && tcp_send_head(sk);
1817}
1818
1819
1820
1821
1822
1823void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
1824 int nonagle)
1825{
1826
1827
1828
1829
1830 if (unlikely(sk->sk_state == TCP_CLOSE))
1831 return;
1832
1833 if (tcp_write_xmit(sk, cur_mss, nonagle, 0, GFP_ATOMIC))
1834 tcp_check_probe_timer(sk);
1835}
1836
1837
1838
1839
1840void tcp_push_one(struct sock *sk, unsigned int mss_now)
1841{
1842 struct sk_buff *skb = tcp_send_head(sk);
1843
1844 BUG_ON(!skb || skb->len < mss_now);
1845
1846 tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation);
1847}
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901u32 __tcp_select_window(struct sock *sk)
1902{
1903 struct inet_connection_sock *icsk = inet_csk(sk);
1904 struct tcp_sock *tp = tcp_sk(sk);
1905
1906
1907
1908
1909
1910
1911 int mss = icsk->icsk_ack.rcv_mss;
1912 int free_space = tcp_space(sk);
1913 int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk));
1914 int window;
1915
1916 if (mss > full_space)
1917 mss = full_space;
1918
1919 if (free_space < (full_space >> 1)) {
1920 icsk->icsk_ack.quick = 0;
1921
1922 if (tcp_memory_pressure)
1923 tp->rcv_ssthresh = min(tp->rcv_ssthresh,
1924 4U * tp->advmss);
1925
1926 if (free_space < mss)
1927 return 0;
1928 }
1929
1930 if (free_space > tp->rcv_ssthresh)
1931 free_space = tp->rcv_ssthresh;
1932
1933
1934
1935
1936 window = tp->rcv_wnd;
1937 if (tp->rx_opt.rcv_wscale) {
1938 window = free_space;
1939
1940
1941
1942
1943
1944 if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window)
1945 window = (((window >> tp->rx_opt.rcv_wscale) + 1)
1946 << tp->rx_opt.rcv_wscale);
1947 } else {
1948
1949
1950
1951
1952
1953
1954
1955
1956 if (window <= free_space - mss || window > free_space)
1957 window = (free_space / mss) * mss;
1958 else if (mss == full_space &&
1959 free_space > window + (full_space >> 1))
1960 window = free_space;
1961 }
1962
1963 return window;
1964}
1965
1966
1967static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
1968{
1969 struct tcp_sock *tp = tcp_sk(sk);
1970 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
1971 int skb_size, next_skb_size;
1972
1973 skb_size = skb->len;
1974 next_skb_size = next_skb->len;
1975
1976 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
1977
1978 tcp_highest_sack_combine(sk, next_skb, skb);
1979
1980 tcp_unlink_write_queue(next_skb, sk);
1981
1982 skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size),
1983 next_skb_size);
1984
1985 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
1986 skb->ip_summed = CHECKSUM_PARTIAL;
1987
1988 if (skb->ip_summed != CHECKSUM_PARTIAL)
1989 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
1990
1991
1992 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
1993
1994
1995 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags;
1996
1997
1998
1999
2000 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
2001
2002
2003 tcp_clear_retrans_hints_partial(tp);
2004 if (next_skb == tp->retransmit_skb_hint)
2005 tp->retransmit_skb_hint = skb;
2006
2007 tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb));
2008
2009 sk_wmem_free_skb(sk, next_skb);
2010}
2011
2012
2013static int tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
2014{
2015 if (tcp_skb_pcount(skb) > 1)
2016 return 0;
2017
2018 if (skb_shinfo(skb)->nr_frags != 0)
2019 return 0;
2020 if (skb_cloned(skb))
2021 return 0;
2022 if (skb == tcp_send_head(sk))
2023 return 0;
2024
2025 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2026 return 0;
2027
2028 return 1;
2029}
2030
2031
2032
2033
2034static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2035 int space)
2036{
2037 struct tcp_sock *tp = tcp_sk(sk);
2038 struct sk_buff *skb = to, *tmp;
2039 int first = 1;
2040
2041 if (!sysctl_tcp_retrans_collapse)
2042 return;
2043 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2044 return;
2045
2046 tcp_for_write_queue_from_safe(skb, tmp, sk) {
2047 if (!tcp_can_collapse(sk, skb))
2048 break;
2049
2050 space -= skb->len;
2051
2052 if (first) {
2053 first = 0;
2054 continue;
2055 }
2056
2057 if (space < 0)
2058 break;
2059
2060
2061
2062 if (skb->len > skb_tailroom(to))
2063 break;
2064
2065 if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
2066 break;
2067
2068 tcp_collapse_retrans(sk, to);
2069 }
2070}
2071
2072
2073
2074
2075
2076int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2077{
2078 struct tcp_sock *tp = tcp_sk(sk);
2079 struct inet_connection_sock *icsk = inet_csk(sk);
2080 unsigned int cur_mss;
2081 int err;
2082
2083
2084 if (icsk->icsk_mtup.probe_size) {
2085 icsk->icsk_mtup.probe_size = 0;
2086 }
2087
2088
2089
2090
2091 if (atomic_read(&sk->sk_wmem_alloc) >
2092 min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
2093 return -EAGAIN;
2094
2095 if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
2096 if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
2097 BUG();
2098 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
2099 return -ENOMEM;
2100 }
2101
2102 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
2103 return -EHOSTUNREACH;
2104
2105 cur_mss = tcp_current_mss(sk);
2106
2107
2108
2109
2110
2111
2112 if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) &&
2113 TCP_SKB_CB(skb)->seq != tp->snd_una)
2114 return -EAGAIN;
2115
2116 if (skb->len > cur_mss) {
2117 if (tcp_fragment(sk, skb, cur_mss, cur_mss))
2118 return -ENOMEM;
2119 } else {
2120 int oldpcount = tcp_skb_pcount(skb);
2121
2122 if (unlikely(oldpcount > 1)) {
2123 tcp_init_tso_segs(sk, skb, cur_mss);
2124 tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
2125 }
2126 }
2127
2128 tcp_retrans_try_collapse(sk, skb, cur_mss);
2129
2130
2131
2132
2133
2134 if (skb->len > 0 &&
2135 (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
2136 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
2137 if (!pskb_trim(skb, 0)) {
2138
2139 tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1,
2140 TCP_SKB_CB(skb)->tcp_flags);
2141 skb->ip_summed = CHECKSUM_NONE;
2142 }
2143 }
2144
2145
2146
2147
2148 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2149
2150 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2151
2152 if (err == 0) {
2153
2154 TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
2155
2156 tp->total_retrans++;
2157
2158#if FASTRETRANS_DEBUG > 0
2159 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2160 if (net_ratelimit())
2161 printk(KERN_DEBUG "retrans_out leaked.\n");
2162 }
2163#endif
2164 if (!tp->retrans_out)
2165 tp->lost_retrans_low = tp->snd_nxt;
2166 TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
2167 tp->retrans_out += tcp_skb_pcount(skb);
2168
2169
2170 if (!tp->retrans_stamp)
2171 tp->retrans_stamp = TCP_SKB_CB(skb)->when;
2172
2173 tp->undo_retrans += tcp_skb_pcount(skb);
2174
2175
2176
2177
2178 TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
2179 }
2180 return err;
2181}
2182
2183
2184
2185
2186static int tcp_can_forward_retransmit(struct sock *sk)
2187{
2188 const struct inet_connection_sock *icsk = inet_csk(sk);
2189 const struct tcp_sock *tp = tcp_sk(sk);
2190
2191
2192 if (icsk->icsk_ca_state != TCP_CA_Recovery)
2193 return 0;
2194
2195
2196 if (tcp_is_reno(tp))
2197 return 0;
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207 if (tcp_may_send_now(sk))
2208 return 0;
2209
2210 return 1;
2211}
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221void tcp_xmit_retransmit_queue(struct sock *sk)
2222{
2223 const struct inet_connection_sock *icsk = inet_csk(sk);
2224 struct tcp_sock *tp = tcp_sk(sk);
2225 struct sk_buff *skb;
2226 struct sk_buff *hole = NULL;
2227 u32 last_lost;
2228 int mib_idx;
2229 int fwd_rexmitting = 0;
2230
2231 if (!tp->packets_out)
2232 return;
2233
2234 if (!tp->lost_out)
2235 tp->retransmit_high = tp->snd_una;
2236
2237 if (tp->retransmit_skb_hint) {
2238 skb = tp->retransmit_skb_hint;
2239 last_lost = TCP_SKB_CB(skb)->end_seq;
2240 if (after(last_lost, tp->retransmit_high))
2241 last_lost = tp->retransmit_high;
2242 } else {
2243 skb = tcp_write_queue_head(sk);
2244 last_lost = tp->snd_una;
2245 }
2246
2247 tcp_for_write_queue_from(skb, sk) {
2248 __u8 sacked = TCP_SKB_CB(skb)->sacked;
2249
2250 if (skb == tcp_send_head(sk))
2251 break;
2252
2253 if (hole == NULL)
2254 tp->retransmit_skb_hint = skb;
2255
2256
2257
2258
2259
2260
2261
2262
2263 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
2264 return;
2265
2266 if (fwd_rexmitting) {
2267begin_fwd:
2268 if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
2269 break;
2270 mib_idx = LINUX_MIB_TCPFORWARDRETRANS;
2271
2272 } else if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high)) {
2273 tp->retransmit_high = last_lost;
2274 if (!tcp_can_forward_retransmit(sk))
2275 break;
2276
2277 if (hole != NULL) {
2278 skb = hole;
2279 hole = NULL;
2280 }
2281 fwd_rexmitting = 1;
2282 goto begin_fwd;
2283
2284 } else if (!(sacked & TCPCB_LOST)) {
2285 if (hole == NULL && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
2286 hole = skb;
2287 continue;
2288
2289 } else {
2290 last_lost = TCP_SKB_CB(skb)->end_seq;
2291 if (icsk->icsk_ca_state != TCP_CA_Loss)
2292 mib_idx = LINUX_MIB_TCPFASTRETRANS;
2293 else
2294 mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
2295 }
2296
2297 if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
2298 continue;
2299
2300 if (tcp_retransmit_skb(sk, skb))
2301 return;
2302 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2303
2304 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
2305 tp->prr_out += tcp_skb_pcount(skb);
2306
2307 if (skb == tcp_write_queue_head(sk))
2308 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2309 inet_csk(sk)->icsk_rto,
2310 TCP_RTO_MAX);
2311 }
2312}
2313
2314
2315
2316
2317void tcp_send_fin(struct sock *sk)
2318{
2319 struct tcp_sock *tp = tcp_sk(sk);
2320 struct sk_buff *skb = tcp_write_queue_tail(sk);
2321 int mss_now;
2322
2323
2324
2325
2326
2327 mss_now = tcp_current_mss(sk);
2328
2329 if (tcp_send_head(sk) != NULL) {
2330 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN;
2331 TCP_SKB_CB(skb)->end_seq++;
2332 tp->write_seq++;
2333 } else {
2334
2335 for (;;) {
2336 skb = alloc_skb_fclone(MAX_TCP_HEADER,
2337 sk->sk_allocation);
2338 if (skb)
2339 break;
2340 yield();
2341 }
2342
2343
2344 skb_reserve(skb, MAX_TCP_HEADER);
2345
2346 tcp_init_nondata_skb(skb, tp->write_seq,
2347 TCPHDR_ACK | TCPHDR_FIN);
2348 tcp_queue_skb(sk, skb);
2349 }
2350 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
2351}
2352
2353
2354
2355
2356
2357
2358void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2359{
2360 struct sk_buff *skb;
2361
2362
2363 skb = alloc_skb(MAX_TCP_HEADER, priority);
2364 if (!skb) {
2365 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2366 return;
2367 }
2368
2369
2370 skb_reserve(skb, MAX_TCP_HEADER);
2371 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
2372 TCPHDR_ACK | TCPHDR_RST);
2373
2374 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2375 if (tcp_transmit_skb(sk, skb, 0, priority))
2376 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2377
2378 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
2379}
2380
2381
2382
2383
2384
2385
2386
2387int tcp_send_synack(struct sock *sk)
2388{
2389 struct sk_buff *skb;
2390
2391 skb = tcp_write_queue_head(sk);
2392 if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
2393 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
2394 return -EFAULT;
2395 }
2396 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
2397 if (skb_cloned(skb)) {
2398 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
2399 if (nskb == NULL)
2400 return -ENOMEM;
2401 tcp_unlink_write_queue(skb, sk);
2402 skb_header_release(nskb);
2403 __tcp_add_write_queue_head(sk, nskb);
2404 sk_wmem_free_skb(sk, skb);
2405 sk->sk_wmem_queued += nskb->truesize;
2406 sk_mem_charge(sk, nskb->truesize);
2407 skb = nskb;
2408 }
2409
2410 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK;
2411 TCP_ECN_send_synack(tcp_sk(sk), skb);
2412 }
2413 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2414 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2415}
2416
2417
2418struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2419 struct request_sock *req,
2420 struct request_values *rvp)
2421{
2422 struct tcp_out_options opts;
2423 struct tcp_extend_values *xvp = tcp_xv(rvp);
2424 struct inet_request_sock *ireq = inet_rsk(req);
2425 struct tcp_sock *tp = tcp_sk(sk);
2426 const struct tcp_cookie_values *cvp = tp->cookie_values;
2427 struct tcphdr *th;
2428 struct sk_buff *skb;
2429 struct tcp_md5sig_key *md5;
2430 int tcp_header_size;
2431 int mss;
2432 int s_data_desired = 0;
2433
2434 if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired)
2435 s_data_desired = cvp->s_data_desired;
2436 skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15 + s_data_desired, 1, GFP_ATOMIC);
2437 if (skb == NULL)
2438 return NULL;
2439
2440
2441 skb_reserve(skb, MAX_TCP_HEADER);
2442
2443 skb_dst_set(skb, dst_clone(dst));
2444
2445 mss = dst_metric_advmss(dst);
2446 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
2447 mss = tp->rx_opt.user_mss;
2448
2449 if (req->rcv_wnd == 0) {
2450 __u8 rcv_wscale;
2451
2452 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
2453
2454
2455 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2456 (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
2457 req->window_clamp = tcp_full_space(sk);
2458
2459
2460 tcp_select_initial_window(tcp_full_space(sk),
2461 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
2462 &req->rcv_wnd,
2463 &req->window_clamp,
2464 ireq->wscale_ok,
2465 &rcv_wscale,
2466 dst_metric(dst, RTAX_INITRWND));
2467 ireq->rcv_wscale = rcv_wscale;
2468 }
2469
2470 memset(&opts, 0, sizeof(opts));
2471#ifdef CONFIG_SYN_COOKIES
2472 if (unlikely(req->cookie_ts))
2473 TCP_SKB_CB(skb)->when = cookie_init_timestamp(req);
2474 else
2475#endif
2476 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2477 tcp_header_size = tcp_synack_options(sk, req, mss,
2478 skb, &opts, &md5, xvp)
2479 + sizeof(*th);
2480
2481 skb_push(skb, tcp_header_size);
2482 skb_reset_transport_header(skb);
2483
2484 th = tcp_hdr(skb);
2485 memset(th, 0, sizeof(struct tcphdr));
2486 th->syn = 1;
2487 th->ack = 1;
2488 TCP_ECN_make_synack(req, th);
2489 th->source = ireq->loc_port;
2490 th->dest = ireq->rmt_port;
2491
2492
2493
2494 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
2495 TCPHDR_SYN | TCPHDR_ACK);
2496
2497 if (OPTION_COOKIE_EXTENSION & opts.options) {
2498 if (s_data_desired) {
2499 u8 *buf = skb_put(skb, s_data_desired);
2500
2501
2502 memcpy(buf, cvp->s_data_payload, s_data_desired);
2503 TCP_SKB_CB(skb)->end_seq += s_data_desired;
2504 }
2505
2506 if (opts.hash_size > 0) {
2507 __u32 workspace[SHA_WORKSPACE_WORDS];
2508 u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS];
2509 u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1];
2510
2511
2512
2513
2514
2515 *tail-- ^= opts.tsval;
2516 *tail-- ^= tcp_rsk(req)->rcv_isn + 1;
2517 *tail-- ^= TCP_SKB_CB(skb)->seq + 1;
2518
2519
2520 *tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source);
2521 *tail-- ^= (u32)(unsigned long)cvp;
2522
2523 sha_transform((__u32 *)&xvp->cookie_bakery[0],
2524 (char *)mess,
2525 &workspace[0]);
2526 opts.hash_location =
2527 (__u8 *)&xvp->cookie_bakery[0];
2528 }
2529 }
2530
2531 th->seq = htonl(TCP_SKB_CB(skb)->seq);
2532 th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
2533
2534
2535 th->window = htons(min(req->rcv_wnd, 65535U));
2536 tcp_options_write((__be32 *)(th + 1), tp, &opts);
2537 th->doff = (tcp_header_size >> 2);
2538 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb));
2539
2540#ifdef CONFIG_TCP_MD5SIG
2541
2542 if (md5) {
2543 tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
2544 md5, NULL, req, skb);
2545 }
2546#endif
2547
2548 return skb;
2549}
2550EXPORT_SYMBOL(tcp_make_synack);
2551
2552
2553static void tcp_connect_init(struct sock *sk)
2554{
2555 const struct dst_entry *dst = __sk_dst_get(sk);
2556 struct tcp_sock *tp = tcp_sk(sk);
2557 __u8 rcv_wscale;
2558
2559
2560
2561
2562 tp->tcp_header_len = sizeof(struct tcphdr) +
2563 (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
2564
2565#ifdef CONFIG_TCP_MD5SIG
2566 if (tp->af_specific->md5_lookup(sk, sk) != NULL)
2567 tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
2568#endif
2569
2570
2571 if (tp->rx_opt.user_mss)
2572 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
2573 tp->max_window = 0;
2574 tcp_mtup_init(sk);
2575 tcp_sync_mss(sk, dst_mtu(dst));
2576
2577 if (!tp->window_clamp)
2578 tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
2579 tp->advmss = dst_metric_advmss(dst);
2580 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss)
2581 tp->advmss = tp->rx_opt.user_mss;
2582
2583 tcp_initialize_rcv_mss(sk);
2584
2585
2586 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2587 (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
2588 tp->window_clamp = tcp_full_space(sk);
2589
2590 tcp_select_initial_window(tcp_full_space(sk),
2591 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
2592 &tp->rcv_wnd,
2593 &tp->window_clamp,
2594 sysctl_tcp_window_scaling,
2595 &rcv_wscale,
2596 dst_metric(dst, RTAX_INITRWND));
2597
2598 tp->rx_opt.rcv_wscale = rcv_wscale;
2599 tp->rcv_ssthresh = tp->rcv_wnd;
2600
2601 sk->sk_err = 0;
2602 sock_reset_flag(sk, SOCK_DONE);
2603 tp->snd_wnd = 0;
2604 tcp_init_wl(tp, 0);
2605 tp->snd_una = tp->write_seq;
2606 tp->snd_sml = tp->write_seq;
2607 tp->snd_up = tp->write_seq;
2608 tp->rcv_nxt = 0;
2609 tp->rcv_wup = 0;
2610 tp->copied_seq = 0;
2611
2612 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
2613 inet_csk(sk)->icsk_retransmits = 0;
2614 tcp_clear_retrans(tp);
2615}
2616
2617
2618int tcp_connect(struct sock *sk)
2619{
2620 struct tcp_sock *tp = tcp_sk(sk);
2621 struct sk_buff *buff;
2622 int err;
2623
2624 tcp_connect_init(sk);
2625
2626 buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
2627 if (unlikely(buff == NULL))
2628 return -ENOBUFS;
2629
2630
2631 skb_reserve(buff, MAX_TCP_HEADER);
2632
2633 tp->snd_nxt = tp->write_seq;
2634 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
2635 TCP_ECN_send_syn(sk, buff);
2636
2637
2638 TCP_SKB_CB(buff)->when = tcp_time_stamp;
2639 tp->retrans_stamp = TCP_SKB_CB(buff)->when;
2640 skb_header_release(buff);
2641 __tcp_add_write_queue_tail(sk, buff);
2642 sk->sk_wmem_queued += buff->truesize;
2643 sk_mem_charge(sk, buff->truesize);
2644 tp->packets_out += tcp_skb_pcount(buff);
2645 err = tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
2646 if (err == -ECONNREFUSED)
2647 return err;
2648
2649
2650
2651
2652 tp->snd_nxt = tp->write_seq;
2653 tp->pushed_seq = tp->write_seq;
2654 TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
2655
2656
2657 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2658 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
2659 return 0;
2660}
2661EXPORT_SYMBOL(tcp_connect);
2662
2663
2664
2665
2666
2667void tcp_send_delayed_ack(struct sock *sk)
2668{
2669 struct inet_connection_sock *icsk = inet_csk(sk);
2670 int ato = icsk->icsk_ack.ato;
2671 unsigned long timeout;
2672
2673 if (ato > TCP_DELACK_MIN) {
2674 const struct tcp_sock *tp = tcp_sk(sk);
2675 int max_ato = HZ / 2;
2676
2677 if (icsk->icsk_ack.pingpong ||
2678 (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
2679 max_ato = TCP_DELACK_MAX;
2680
2681
2682
2683
2684
2685
2686
2687 if (tp->srtt) {
2688 int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN);
2689
2690 if (rtt < max_ato)
2691 max_ato = rtt;
2692 }
2693
2694 ato = min(ato, max_ato);
2695 }
2696
2697
2698 timeout = jiffies + ato;
2699
2700
2701 if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
2702
2703
2704
2705 if (icsk->icsk_ack.blocked ||
2706 time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
2707 tcp_send_ack(sk);
2708 return;
2709 }
2710
2711 if (!time_before(timeout, icsk->icsk_ack.timeout))
2712 timeout = icsk->icsk_ack.timeout;
2713 }
2714 icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
2715 icsk->icsk_ack.timeout = timeout;
2716 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
2717}
2718
2719
2720void tcp_send_ack(struct sock *sk)
2721{
2722 struct sk_buff *buff;
2723
2724
2725 if (sk->sk_state == TCP_CLOSE)
2726 return;
2727
2728
2729
2730
2731
2732 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
2733 if (buff == NULL) {
2734 inet_csk_schedule_ack(sk);
2735 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
2736 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
2737 TCP_DELACK_MAX, TCP_RTO_MAX);
2738 return;
2739 }
2740
2741
2742 skb_reserve(buff, MAX_TCP_HEADER);
2743 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
2744
2745
2746 TCP_SKB_CB(buff)->when = tcp_time_stamp;
2747 tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
2748}
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
2762{
2763 struct tcp_sock *tp = tcp_sk(sk);
2764 struct sk_buff *skb;
2765
2766
2767 skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
2768 if (skb == NULL)
2769 return -1;
2770
2771
2772 skb_reserve(skb, MAX_TCP_HEADER);
2773
2774
2775
2776
2777 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
2778 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2779 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
2780}
2781
2782
2783int tcp_write_wakeup(struct sock *sk)
2784{
2785 struct tcp_sock *tp = tcp_sk(sk);
2786 struct sk_buff *skb;
2787
2788 if (sk->sk_state == TCP_CLOSE)
2789 return -1;
2790
2791 if ((skb = tcp_send_head(sk)) != NULL &&
2792 before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
2793 int err;
2794 unsigned int mss = tcp_current_mss(sk);
2795 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
2796
2797 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
2798 tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
2799
2800
2801
2802
2803
2804 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
2805 skb->len > mss) {
2806 seg_size = min(seg_size, mss);
2807 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
2808 if (tcp_fragment(sk, skb, seg_size, mss))
2809 return -1;
2810 } else if (!tcp_skb_pcount(skb))
2811 tcp_set_skb_tso_segs(sk, skb, mss);
2812
2813 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
2814 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2815 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2816 if (!err)
2817 tcp_event_new_data_sent(sk, skb);
2818 return err;
2819 } else {
2820 if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
2821 tcp_xmit_probe_skb(sk, 1);
2822 return tcp_xmit_probe_skb(sk, 0);
2823 }
2824}
2825
2826
2827
2828
2829void tcp_send_probe0(struct sock *sk)
2830{
2831 struct inet_connection_sock *icsk = inet_csk(sk);
2832 struct tcp_sock *tp = tcp_sk(sk);
2833 int err;
2834
2835 err = tcp_write_wakeup(sk);
2836
2837 if (tp->packets_out || !tcp_send_head(sk)) {
2838
2839 icsk->icsk_probes_out = 0;
2840 icsk->icsk_backoff = 0;
2841 return;
2842 }
2843
2844 if (err <= 0) {
2845 if (icsk->icsk_backoff < sysctl_tcp_retries2)
2846 icsk->icsk_backoff++;
2847 icsk->icsk_probes_out++;
2848 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
2849 min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
2850 TCP_RTO_MAX);
2851 } else {
2852
2853
2854
2855
2856
2857
2858 if (!icsk->icsk_probes_out)
2859 icsk->icsk_probes_out = 1;
2860 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
2861 min(icsk->icsk_rto << icsk->icsk_backoff,
2862 TCP_RESOURCE_PROBE_INTERVAL),
2863 TCP_RTO_MAX);
2864 }
2865}
2866