1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37#include <net/tcp.h>
38
39#include <linux/compiler.h>
40#include <linux/module.h>
41
42
43int sysctl_tcp_retrans_collapse __read_mostly = 1;
44
45
46
47
48int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
49
50
51
52
53
54int sysctl_tcp_tso_win_divisor __read_mostly = 3;
55
56int sysctl_tcp_mtu_probing __read_mostly = 0;
57int sysctl_tcp_base_mss __read_mostly = 512;
58
59
60int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
61
62
63static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
64{
65 struct tcp_sock *tp = tcp_sk(sk);
66 unsigned int prior_packets = tp->packets_out;
67
68 tcp_advance_send_head(sk, skb);
69 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
70
71
72 if (tp->frto_counter == 2)
73 tp->frto_counter = 3;
74
75 tp->packets_out += tcp_skb_pcount(skb);
76 if (!prior_packets)
77 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
78 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
79}
80
81
82
83
84
85
86
87static inline __u32 tcp_acceptable_seq(struct sock *sk)
88{
89 struct tcp_sock *tp = tcp_sk(sk);
90
91 if (!before(tcp_wnd_end(tp), tp->snd_nxt))
92 return tp->snd_nxt;
93 else
94 return tcp_wnd_end(tp);
95}
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111static __u16 tcp_advertise_mss(struct sock *sk)
112{
113 struct tcp_sock *tp = tcp_sk(sk);
114 struct dst_entry *dst = __sk_dst_get(sk);
115 int mss = tp->advmss;
116
117 if (dst && dst_metric(dst, RTAX_ADVMSS) < mss) {
118 mss = dst_metric(dst, RTAX_ADVMSS);
119 tp->advmss = mss;
120 }
121
122 return (__u16)mss;
123}
124
125
126
127static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst)
128{
129 struct tcp_sock *tp = tcp_sk(sk);
130 s32 delta = tcp_time_stamp - tp->lsndtime;
131 u32 restart_cwnd = tcp_init_cwnd(tp, dst);
132 u32 cwnd = tp->snd_cwnd;
133
134 tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
135
136 tp->snd_ssthresh = tcp_current_ssthresh(sk);
137 restart_cwnd = min(restart_cwnd, cwnd);
138
139 while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
140 cwnd >>= 1;
141 tp->snd_cwnd = max(cwnd, restart_cwnd);
142 tp->snd_cwnd_stamp = tcp_time_stamp;
143 tp->snd_cwnd_used = 0;
144}
145
146
147static void tcp_event_data_sent(struct tcp_sock *tp,
148 struct sk_buff *skb, struct sock *sk)
149{
150 struct inet_connection_sock *icsk = inet_csk(sk);
151 const u32 now = tcp_time_stamp;
152
153 if (sysctl_tcp_slow_start_after_idle &&
154 (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto))
155 tcp_cwnd_restart(sk, __sk_dst_get(sk));
156
157 tp->lsndtime = now;
158
159
160
161
162 if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
163 icsk->icsk_ack.pingpong = 1;
164}
165
166
167static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
168{
169 tcp_dec_quickack_mode(sk, pkts);
170 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
171}
172
173
174
175
176
177
178
179
180void tcp_select_initial_window(int __space, __u32 mss,
181 __u32 *rcv_wnd, __u32 *window_clamp,
182 int wscale_ok, __u8 *rcv_wscale)
183{
184 unsigned int space = (__space < 0 ? 0 : __space);
185
186
187 if (*window_clamp == 0)
188 (*window_clamp) = (65535 << 14);
189 space = min(*window_clamp, space);
190
191
192 if (space > mss)
193 space = (space / mss) * mss;
194
195
196
197
198
199
200
201
202
203 if (sysctl_tcp_workaround_signed_windows)
204 (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
205 else
206 (*rcv_wnd) = space;
207
208 (*rcv_wscale) = 0;
209 if (wscale_ok) {
210
211
212
213 space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
214 space = min_t(u32, space, *window_clamp);
215 while (space > 65535 && (*rcv_wscale) < 14) {
216 space >>= 1;
217 (*rcv_wscale)++;
218 }
219 }
220
221
222
223
224
225 if (mss > (1 << *rcv_wscale)) {
226 int init_cwnd = 4;
227 if (mss > 1460 * 3)
228 init_cwnd = 2;
229 else if (mss > 1460)
230 init_cwnd = 3;
231 if (*rcv_wnd > init_cwnd * mss)
232 *rcv_wnd = init_cwnd * mss;
233 }
234
235
236 (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
237}
238
239
240
241
242
243
244static u16 tcp_select_window(struct sock *sk)
245{
246 struct tcp_sock *tp = tcp_sk(sk);
247 u32 cur_win = tcp_receive_window(tp);
248 u32 new_win = __tcp_select_window(sk);
249
250
251 if (new_win < cur_win) {
252
253
254
255
256
257
258
259 new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
260 }
261 tp->rcv_wnd = new_win;
262 tp->rcv_wup = tp->rcv_nxt;
263
264
265
266
267 if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
268 new_win = min(new_win, MAX_TCP_WINDOW);
269 else
270 new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
271
272
273 new_win >>= tp->rx_opt.rcv_wscale;
274
275
276 if (new_win == 0)
277 tp->pred_flags = 0;
278
279 return new_win;
280}
281
282
283static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
284{
285 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR;
286 if (!(tp->ecn_flags & TCP_ECN_OK))
287 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE;
288}
289
290
291static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
292{
293 struct tcp_sock *tp = tcp_sk(sk);
294
295 tp->ecn_flags = 0;
296 if (sysctl_tcp_ecn == 1) {
297 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR;
298 tp->ecn_flags = TCP_ECN_OK;
299 }
300}
301
302static __inline__ void
303TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th)
304{
305 if (inet_rsk(req)->ecn_ok)
306 th->ece = 1;
307}
308
309
310
311
312static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
313 int tcp_header_len)
314{
315 struct tcp_sock *tp = tcp_sk(sk);
316
317 if (tp->ecn_flags & TCP_ECN_OK) {
318
319 if (skb->len != tcp_header_len &&
320 !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
321 INET_ECN_xmit(sk);
322 if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) {
323 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
324 tcp_hdr(skb)->cwr = 1;
325 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
326 }
327 } else {
328
329 INET_ECN_dontxmit(sk);
330 }
331 if (tp->ecn_flags & TCP_ECN_DEMAND_CWR)
332 tcp_hdr(skb)->ece = 1;
333 }
334}
335
336
337
338
339static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
340{
341 skb->csum = 0;
342
343 TCP_SKB_CB(skb)->flags = flags;
344 TCP_SKB_CB(skb)->sacked = 0;
345
346 skb_shinfo(skb)->gso_segs = 1;
347 skb_shinfo(skb)->gso_size = 0;
348 skb_shinfo(skb)->gso_type = 0;
349
350 TCP_SKB_CB(skb)->seq = seq;
351 if (flags & (TCPCB_FLAG_SYN | TCPCB_FLAG_FIN))
352 seq++;
353 TCP_SKB_CB(skb)->end_seq = seq;
354}
355
356static inline int tcp_urg_mode(const struct tcp_sock *tp)
357{
358 return tp->snd_una != tp->snd_up;
359}
360
361#define OPTION_SACK_ADVERTISE (1 << 0)
362#define OPTION_TS (1 << 1)
363#define OPTION_MD5 (1 << 2)
364#define OPTION_WSCALE (1 << 3)
365
366struct tcp_out_options {
367 u8 options;
368 u8 ws;
369 u8 num_sack_blocks;
370 u16 mss;
371 __u32 tsval, tsecr;
372};
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
388 const struct tcp_out_options *opts,
389 __u8 **md5_hash) {
390 if (unlikely(OPTION_MD5 & opts->options)) {
391 *ptr++ = htonl((TCPOPT_NOP << 24) |
392 (TCPOPT_NOP << 16) |
393 (TCPOPT_MD5SIG << 8) |
394 TCPOLEN_MD5SIG);
395 *md5_hash = (__u8 *)ptr;
396 ptr += 4;
397 } else {
398 *md5_hash = NULL;
399 }
400
401 if (unlikely(opts->mss)) {
402 *ptr++ = htonl((TCPOPT_MSS << 24) |
403 (TCPOLEN_MSS << 16) |
404 opts->mss);
405 }
406
407 if (likely(OPTION_TS & opts->options)) {
408 if (unlikely(OPTION_SACK_ADVERTISE & opts->options)) {
409 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
410 (TCPOLEN_SACK_PERM << 16) |
411 (TCPOPT_TIMESTAMP << 8) |
412 TCPOLEN_TIMESTAMP);
413 } else {
414 *ptr++ = htonl((TCPOPT_NOP << 24) |
415 (TCPOPT_NOP << 16) |
416 (TCPOPT_TIMESTAMP << 8) |
417 TCPOLEN_TIMESTAMP);
418 }
419 *ptr++ = htonl(opts->tsval);
420 *ptr++ = htonl(opts->tsecr);
421 }
422
423 if (unlikely(OPTION_SACK_ADVERTISE & opts->options &&
424 !(OPTION_TS & opts->options))) {
425 *ptr++ = htonl((TCPOPT_NOP << 24) |
426 (TCPOPT_NOP << 16) |
427 (TCPOPT_SACK_PERM << 8) |
428 TCPOLEN_SACK_PERM);
429 }
430
431 if (unlikely(OPTION_WSCALE & opts->options)) {
432 *ptr++ = htonl((TCPOPT_NOP << 24) |
433 (TCPOPT_WINDOW << 16) |
434 (TCPOLEN_WINDOW << 8) |
435 opts->ws);
436 }
437
438 if (unlikely(opts->num_sack_blocks)) {
439 struct tcp_sack_block *sp = tp->rx_opt.dsack ?
440 tp->duplicate_sack : tp->selective_acks;
441 int this_sack;
442
443 *ptr++ = htonl((TCPOPT_NOP << 24) |
444 (TCPOPT_NOP << 16) |
445 (TCPOPT_SACK << 8) |
446 (TCPOLEN_SACK_BASE + (opts->num_sack_blocks *
447 TCPOLEN_SACK_PERBLOCK)));
448
449 for (this_sack = 0; this_sack < opts->num_sack_blocks;
450 ++this_sack) {
451 *ptr++ = htonl(sp[this_sack].start_seq);
452 *ptr++ = htonl(sp[this_sack].end_seq);
453 }
454
455 tp->rx_opt.dsack = 0;
456 }
457}
458
459
460
461
462static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
463 struct tcp_out_options *opts,
464 struct tcp_md5sig_key **md5) {
465 struct tcp_sock *tp = tcp_sk(sk);
466 unsigned size = 0;
467
468#ifdef CONFIG_TCP_MD5SIG
469 *md5 = tp->af_specific->md5_lookup(sk, sk);
470 if (*md5) {
471 opts->options |= OPTION_MD5;
472 size += TCPOLEN_MD5SIG_ALIGNED;
473 }
474#else
475 *md5 = NULL;
476#endif
477
478
479
480
481
482
483
484
485
486
487 opts->mss = tcp_advertise_mss(sk);
488 size += TCPOLEN_MSS_ALIGNED;
489
490 if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
491 opts->options |= OPTION_TS;
492 opts->tsval = TCP_SKB_CB(skb)->when;
493 opts->tsecr = tp->rx_opt.ts_recent;
494 size += TCPOLEN_TSTAMP_ALIGNED;
495 }
496 if (likely(sysctl_tcp_window_scaling)) {
497 opts->ws = tp->rx_opt.rcv_wscale;
498 opts->options |= OPTION_WSCALE;
499 size += TCPOLEN_WSCALE_ALIGNED;
500 }
501 if (likely(sysctl_tcp_sack)) {
502 opts->options |= OPTION_SACK_ADVERTISE;
503 if (unlikely(!(OPTION_TS & opts->options)))
504 size += TCPOLEN_SACKPERM_ALIGNED;
505 }
506
507 return size;
508}
509
510
511static unsigned tcp_synack_options(struct sock *sk,
512 struct request_sock *req,
513 unsigned mss, struct sk_buff *skb,
514 struct tcp_out_options *opts,
515 struct tcp_md5sig_key **md5) {
516 unsigned size = 0;
517 struct inet_request_sock *ireq = inet_rsk(req);
518 char doing_ts;
519
520#ifdef CONFIG_TCP_MD5SIG
521 *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
522 if (*md5) {
523 opts->options |= OPTION_MD5;
524 size += TCPOLEN_MD5SIG_ALIGNED;
525 }
526#else
527 *md5 = NULL;
528#endif
529
530
531
532
533
534 doing_ts = ireq->tstamp_ok && !(*md5 && ireq->sack_ok);
535
536 opts->mss = mss;
537 size += TCPOLEN_MSS_ALIGNED;
538
539 if (likely(ireq->wscale_ok)) {
540 opts->ws = ireq->rcv_wscale;
541 opts->options |= OPTION_WSCALE;
542 size += TCPOLEN_WSCALE_ALIGNED;
543 }
544 if (likely(doing_ts)) {
545 opts->options |= OPTION_TS;
546 opts->tsval = TCP_SKB_CB(skb)->when;
547 opts->tsecr = req->ts_recent;
548 size += TCPOLEN_TSTAMP_ALIGNED;
549 }
550 if (likely(ireq->sack_ok)) {
551 opts->options |= OPTION_SACK_ADVERTISE;
552 if (unlikely(!doing_ts))
553 size += TCPOLEN_SACKPERM_ALIGNED;
554 }
555
556 return size;
557}
558
559
560
561
562static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
563 struct tcp_out_options *opts,
564 struct tcp_md5sig_key **md5) {
565 struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
566 struct tcp_sock *tp = tcp_sk(sk);
567 unsigned size = 0;
568 unsigned int eff_sacks;
569
570#ifdef CONFIG_TCP_MD5SIG
571 *md5 = tp->af_specific->md5_lookup(sk, sk);
572 if (unlikely(*md5)) {
573 opts->options |= OPTION_MD5;
574 size += TCPOLEN_MD5SIG_ALIGNED;
575 }
576#else
577 *md5 = NULL;
578#endif
579
580 if (likely(tp->rx_opt.tstamp_ok)) {
581 opts->options |= OPTION_TS;
582 opts->tsval = tcb ? tcb->when : 0;
583 opts->tsecr = tp->rx_opt.ts_recent;
584 size += TCPOLEN_TSTAMP_ALIGNED;
585 }
586
587 eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
588 if (unlikely(eff_sacks)) {
589 const unsigned remaining = MAX_TCP_OPTION_SPACE - size;
590 opts->num_sack_blocks =
591 min_t(unsigned, eff_sacks,
592 (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
593 TCPOLEN_SACK_PERBLOCK);
594 size += TCPOLEN_SACK_BASE_ALIGNED +
595 opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
596 }
597
598 return size;
599}
600
601
602
603
604
605
606
607
608
609
610
611
612static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
613 gfp_t gfp_mask)
614{
615 const struct inet_connection_sock *icsk = inet_csk(sk);
616 struct inet_sock *inet;
617 struct tcp_sock *tp;
618 struct tcp_skb_cb *tcb;
619 struct tcp_out_options opts;
620 unsigned tcp_options_size, tcp_header_size;
621 struct tcp_md5sig_key *md5;
622 __u8 *md5_hash_location;
623 struct tcphdr *th;
624 int err;
625
626 BUG_ON(!skb || !tcp_skb_pcount(skb));
627
628
629
630
631 if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
632 __net_timestamp(skb);
633
634 if (likely(clone_it)) {
635 if (unlikely(skb_cloned(skb)))
636 skb = pskb_copy(skb, gfp_mask);
637 else
638 skb = skb_clone(skb, gfp_mask);
639 if (unlikely(!skb))
640 return -ENOBUFS;
641 }
642
643 inet = inet_sk(sk);
644 tp = tcp_sk(sk);
645 tcb = TCP_SKB_CB(skb);
646 memset(&opts, 0, sizeof(opts));
647
648 if (unlikely(tcb->flags & TCPCB_FLAG_SYN))
649 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
650 else
651 tcp_options_size = tcp_established_options(sk, skb, &opts,
652 &md5);
653 tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
654
655 if (tcp_packets_in_flight(tp) == 0)
656 tcp_ca_event(sk, CA_EVENT_TX_START);
657
658 skb_push(skb, tcp_header_size);
659 skb_reset_transport_header(skb);
660 skb_set_owner_w(skb, sk);
661
662
663 th = tcp_hdr(skb);
664 th->source = inet->sport;
665 th->dest = inet->dport;
666 th->seq = htonl(tcb->seq);
667 th->ack_seq = htonl(tp->rcv_nxt);
668 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
669 tcb->flags);
670
671 if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
672
673
674
675 th->window = htons(min(tp->rcv_wnd, 65535U));
676 } else {
677 th->window = htons(tcp_select_window(sk));
678 }
679 th->check = 0;
680 th->urg_ptr = 0;
681
682
683 if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
684 if (before(tp->snd_up, tcb->seq + 0x10000)) {
685 th->urg_ptr = htons(tp->snd_up - tcb->seq);
686 th->urg = 1;
687 } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
688 th->urg_ptr = 0xFFFF;
689 th->urg = 1;
690 }
691 }
692
693 tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
694 if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0))
695 TCP_ECN_send(sk, skb, tcp_header_size);
696
697#ifdef CONFIG_TCP_MD5SIG
698
699 if (md5) {
700 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
701 tp->af_specific->calc_md5_hash(md5_hash_location,
702 md5, sk, NULL, skb);
703 }
704#endif
705
706 icsk->icsk_af_ops->send_check(sk, skb->len, skb);
707
708 if (likely(tcb->flags & TCPCB_FLAG_ACK))
709 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
710
711 if (skb->len != tcp_header_size)
712 tcp_event_data_sent(tp, skb, sk);
713
714 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
715 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
716
717 err = icsk->icsk_af_ops->queue_xmit(skb, 0);
718 if (likely(err <= 0))
719 return err;
720
721 tcp_enter_cwr(sk, 1);
722
723 return net_xmit_eval(err);
724}
725
726
727
728
729
730
731static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
732{
733 struct tcp_sock *tp = tcp_sk(sk);
734
735
736 tp->write_seq = TCP_SKB_CB(skb)->end_seq;
737 skb_header_release(skb);
738 tcp_add_write_queue_tail(sk, skb);
739 sk->sk_wmem_queued += skb->truesize;
740 sk_mem_charge(sk, skb->truesize);
741}
742
743
744static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb,
745 unsigned int mss_now)
746{
747 if (skb->len <= mss_now || !sk_can_gso(sk) ||
748 skb->ip_summed == CHECKSUM_NONE) {
749
750
751
752 skb_shinfo(skb)->gso_segs = 1;
753 skb_shinfo(skb)->gso_size = 0;
754 skb_shinfo(skb)->gso_type = 0;
755 } else {
756 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now);
757 skb_shinfo(skb)->gso_size = mss_now;
758 skb_shinfo(skb)->gso_type = sk->sk_gso_type;
759 }
760}
761
762
763
764
765static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb,
766 int decr)
767{
768 struct tcp_sock *tp = tcp_sk(sk);
769
770 if (!tp->sacked_out || tcp_is_reno(tp))
771 return;
772
773 if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
774 tp->fackets_out -= decr;
775}
776
777
778
779
780static void tcp_adjust_pcount(struct sock *sk, struct sk_buff *skb, int decr)
781{
782 struct tcp_sock *tp = tcp_sk(sk);
783
784 tp->packets_out -= decr;
785
786 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
787 tp->sacked_out -= decr;
788 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
789 tp->retrans_out -= decr;
790 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
791 tp->lost_out -= decr;
792
793
794 if (tcp_is_reno(tp) && decr > 0)
795 tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
796
797 tcp_adjust_fackets_out(sk, skb, decr);
798
799 if (tp->lost_skb_hint &&
800 before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
801 (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)))
802 tp->lost_cnt_hint -= decr;
803
804 tcp_verify_left_out(tp);
805}
806
807
808
809
810
811
812int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
813 unsigned int mss_now)
814{
815 struct tcp_sock *tp = tcp_sk(sk);
816 struct sk_buff *buff;
817 int nsize, old_factor;
818 int nlen;
819 u8 flags;
820
821 BUG_ON(len > skb->len);
822
823 nsize = skb_headlen(skb) - len;
824 if (nsize < 0)
825 nsize = 0;
826
827 if (skb_cloned(skb) &&
828 skb_is_nonlinear(skb) &&
829 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
830 return -ENOMEM;
831
832
833 buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
834 if (buff == NULL)
835 return -ENOMEM;
836
837 sk->sk_wmem_queued += buff->truesize;
838 sk_mem_charge(sk, buff->truesize);
839 nlen = skb->len - len - nsize;
840 buff->truesize += nlen;
841 skb->truesize -= nlen;
842
843
844 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
845 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
846 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
847
848
849 flags = TCP_SKB_CB(skb)->flags;
850 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH);
851 TCP_SKB_CB(buff)->flags = flags;
852 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
853
854 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
855
856 buff->csum = csum_partial_copy_nocheck(skb->data + len,
857 skb_put(buff, nsize),
858 nsize, 0);
859
860 skb_trim(skb, len);
861
862 skb->csum = csum_block_sub(skb->csum, buff->csum, len);
863 } else {
864 skb->ip_summed = CHECKSUM_PARTIAL;
865 skb_split(skb, buff, len);
866 }
867
868 buff->ip_summed = skb->ip_summed;
869
870
871
872
873 TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
874 buff->tstamp = skb->tstamp;
875
876 old_factor = tcp_skb_pcount(skb);
877
878
879 tcp_set_skb_tso_segs(sk, skb, mss_now);
880 tcp_set_skb_tso_segs(sk, buff, mss_now);
881
882
883
884
885 if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) {
886 int diff = old_factor - tcp_skb_pcount(skb) -
887 tcp_skb_pcount(buff);
888
889 if (diff)
890 tcp_adjust_pcount(sk, skb, diff);
891 }
892
893
894 skb_header_release(buff);
895 tcp_insert_write_queue_after(skb, buff, sk);
896
897 return 0;
898}
899
900
901
902
903
904static void __pskb_trim_head(struct sk_buff *skb, int len)
905{
906 int i, k, eat;
907
908 eat = len;
909 k = 0;
910 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
911 if (skb_shinfo(skb)->frags[i].size <= eat) {
912 put_page(skb_shinfo(skb)->frags[i].page);
913 eat -= skb_shinfo(skb)->frags[i].size;
914 } else {
915 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
916 if (eat) {
917 skb_shinfo(skb)->frags[k].page_offset += eat;
918 skb_shinfo(skb)->frags[k].size -= eat;
919 eat = 0;
920 }
921 k++;
922 }
923 }
924 skb_shinfo(skb)->nr_frags = k;
925
926 skb_reset_tail_pointer(skb);
927 skb->data_len -= len;
928 skb->len = skb->data_len;
929}
930
931
932int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
933{
934 if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
935 return -ENOMEM;
936
937
938 if (unlikely(len < skb_headlen(skb)))
939 __skb_pull(skb, len);
940 else
941 __pskb_trim_head(skb, len - skb_headlen(skb));
942
943 TCP_SKB_CB(skb)->seq += len;
944 skb->ip_summed = CHECKSUM_PARTIAL;
945
946 skb->truesize -= len;
947 sk->sk_wmem_queued -= len;
948 sk_mem_uncharge(sk, len);
949 sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
950
951
952
953
954 if (tcp_skb_pcount(skb) > 1)
955 tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk));
956
957 return 0;
958}
959
960
961int tcp_mtu_to_mss(struct sock *sk, int pmtu)
962{
963 struct tcp_sock *tp = tcp_sk(sk);
964 struct inet_connection_sock *icsk = inet_csk(sk);
965 int mss_now;
966
967
968
969
970 mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr);
971
972
973 if (mss_now > tp->rx_opt.mss_clamp)
974 mss_now = tp->rx_opt.mss_clamp;
975
976
977 mss_now -= icsk->icsk_ext_hdr_len;
978
979
980 if (mss_now < 48)
981 mss_now = 48;
982
983
984 mss_now -= tp->tcp_header_len - sizeof(struct tcphdr);
985
986 return mss_now;
987}
988
989
990int tcp_mss_to_mtu(struct sock *sk, int mss)
991{
992 struct tcp_sock *tp = tcp_sk(sk);
993 struct inet_connection_sock *icsk = inet_csk(sk);
994 int mtu;
995
996 mtu = mss +
997 tp->tcp_header_len +
998 icsk->icsk_ext_hdr_len +
999 icsk->icsk_af_ops->net_header_len;
1000
1001 return mtu;
1002}
1003
1004
1005void tcp_mtup_init(struct sock *sk)
1006{
1007 struct tcp_sock *tp = tcp_sk(sk);
1008 struct inet_connection_sock *icsk = inet_csk(sk);
1009
1010 icsk->icsk_mtup.enabled = sysctl_tcp_mtu_probing > 1;
1011 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
1012 icsk->icsk_af_ops->net_header_len;
1013 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss);
1014 icsk->icsk_mtup.probe_size = 0;
1015}
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
1040{
1041 struct tcp_sock *tp = tcp_sk(sk);
1042 struct inet_connection_sock *icsk = inet_csk(sk);
1043 int mss_now;
1044
1045 if (icsk->icsk_mtup.search_high > pmtu)
1046 icsk->icsk_mtup.search_high = pmtu;
1047
1048 mss_now = tcp_mtu_to_mss(sk, pmtu);
1049 mss_now = tcp_bound_to_half_wnd(tp, mss_now);
1050
1051
1052 icsk->icsk_pmtu_cookie = pmtu;
1053 if (icsk->icsk_mtup.enabled)
1054 mss_now = min(mss_now, tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low));
1055 tp->mss_cache = mss_now;
1056
1057 return mss_now;
1058}
1059
1060
1061
1062
1063unsigned int tcp_current_mss(struct sock *sk)
1064{
1065 struct tcp_sock *tp = tcp_sk(sk);
1066 struct dst_entry *dst = __sk_dst_get(sk);
1067 u32 mss_now;
1068 unsigned header_len;
1069 struct tcp_out_options opts;
1070 struct tcp_md5sig_key *md5;
1071
1072 mss_now = tp->mss_cache;
1073
1074 if (dst) {
1075 u32 mtu = dst_mtu(dst);
1076 if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
1077 mss_now = tcp_sync_mss(sk, mtu);
1078 }
1079
1080 header_len = tcp_established_options(sk, NULL, &opts, &md5) +
1081 sizeof(struct tcphdr);
1082
1083
1084
1085
1086 if (header_len != tp->tcp_header_len) {
1087 int delta = (int) header_len - tp->tcp_header_len;
1088 mss_now -= delta;
1089 }
1090
1091 return mss_now;
1092}
1093
1094
1095static void tcp_cwnd_validate(struct sock *sk)
1096{
1097 struct tcp_sock *tp = tcp_sk(sk);
1098
1099 if (tp->packets_out >= tp->snd_cwnd) {
1100
1101 tp->snd_cwnd_used = 0;
1102 tp->snd_cwnd_stamp = tcp_time_stamp;
1103 } else {
1104
1105 if (tp->packets_out > tp->snd_cwnd_used)
1106 tp->snd_cwnd_used = tp->packets_out;
1107
1108 if (sysctl_tcp_slow_start_after_idle &&
1109 (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
1110 tcp_cwnd_application_limited(sk);
1111 }
1112}
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
1127 unsigned int mss_now, unsigned int cwnd)
1128{
1129 struct tcp_sock *tp = tcp_sk(sk);
1130 u32 needed, window, cwnd_len;
1131
1132 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1133 cwnd_len = mss_now * cwnd;
1134
1135 if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
1136 return cwnd_len;
1137
1138 needed = min(skb->len, window);
1139
1140 if (cwnd_len <= needed)
1141 return cwnd_len;
1142
1143 return needed - needed % mss_now;
1144}
1145
1146
1147
1148
1149static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
1150 struct sk_buff *skb)
1151{
1152 u32 in_flight, cwnd;
1153
1154
1155 if ((TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
1156 tcp_skb_pcount(skb) == 1)
1157 return 1;
1158
1159 in_flight = tcp_packets_in_flight(tp);
1160 cwnd = tp->snd_cwnd;
1161 if (in_flight < cwnd)
1162 return (cwnd - in_flight);
1163
1164 return 0;
1165}
1166
1167
1168
1169
1170
1171static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb,
1172 unsigned int mss_now)
1173{
1174 int tso_segs = tcp_skb_pcount(skb);
1175
1176 if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
1177 tcp_set_skb_tso_segs(sk, skb, mss_now);
1178 tso_segs = tcp_skb_pcount(skb);
1179 }
1180 return tso_segs;
1181}
1182
1183
1184static inline int tcp_minshall_check(const struct tcp_sock *tp)
1185{
1186 return after(tp->snd_sml, tp->snd_una) &&
1187 !after(tp->snd_sml, tp->snd_nxt);
1188}
1189
1190
1191
1192
1193
1194
1195
1196
1197static inline int tcp_nagle_check(const struct tcp_sock *tp,
1198 const struct sk_buff *skb,
1199 unsigned mss_now, int nonagle)
1200{
1201 return (skb->len < mss_now &&
1202 ((nonagle & TCP_NAGLE_CORK) ||
1203 (!nonagle && tp->packets_out && tcp_minshall_check(tp))));
1204}
1205
1206
1207
1208
1209static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
1210 unsigned int cur_mss, int nonagle)
1211{
1212
1213
1214
1215
1216
1217
1218 if (nonagle & TCP_NAGLE_PUSH)
1219 return 1;
1220
1221
1222
1223
1224 if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
1225 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN))
1226 return 1;
1227
1228 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
1229 return 1;
1230
1231 return 0;
1232}
1233
1234
1235static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb,
1236 unsigned int cur_mss)
1237{
1238 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
1239
1240 if (skb->len > cur_mss)
1241 end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
1242
1243 return !after(end_seq, tcp_wnd_end(tp));
1244}
1245
1246
1247
1248
1249
1250static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
1251 unsigned int cur_mss, int nonagle)
1252{
1253 struct tcp_sock *tp = tcp_sk(sk);
1254 unsigned int cwnd_quota;
1255
1256 tcp_init_tso_segs(sk, skb, cur_mss);
1257
1258 if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
1259 return 0;
1260
1261 cwnd_quota = tcp_cwnd_test(tp, skb);
1262 if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
1263 cwnd_quota = 0;
1264
1265 return cwnd_quota;
1266}
1267
1268
1269int tcp_may_send_now(struct sock *sk)
1270{
1271 struct tcp_sock *tp = tcp_sk(sk);
1272 struct sk_buff *skb = tcp_send_head(sk);
1273
1274 return (skb &&
1275 tcp_snd_test(sk, skb, tcp_current_mss(sk),
1276 (tcp_skb_is_last(sk, skb) ?
1277 tp->nonagle : TCP_NAGLE_PUSH)));
1278}
1279
1280
1281
1282
1283
1284
1285
1286
1287static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1288 unsigned int mss_now)
1289{
1290 struct sk_buff *buff;
1291 int nlen = skb->len - len;
1292 u8 flags;
1293
1294
1295 if (skb->len != skb->data_len)
1296 return tcp_fragment(sk, skb, len, mss_now);
1297
1298 buff = sk_stream_alloc_skb(sk, 0, GFP_ATOMIC);
1299 if (unlikely(buff == NULL))
1300 return -ENOMEM;
1301
1302 sk->sk_wmem_queued += buff->truesize;
1303 sk_mem_charge(sk, buff->truesize);
1304 buff->truesize += nlen;
1305 skb->truesize -= nlen;
1306
1307
1308 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
1309 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
1310 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1311
1312
1313 flags = TCP_SKB_CB(skb)->flags;
1314 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH);
1315 TCP_SKB_CB(buff)->flags = flags;
1316
1317
1318 TCP_SKB_CB(buff)->sacked = 0;
1319
1320 buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
1321 skb_split(skb, buff, len);
1322
1323
1324 tcp_set_skb_tso_segs(sk, skb, mss_now);
1325 tcp_set_skb_tso_segs(sk, buff, mss_now);
1326
1327
1328 skb_header_release(buff);
1329 tcp_insert_write_queue_after(skb, buff, sk);
1330
1331 return 0;
1332}
1333
1334
1335
1336
1337
1338
1339static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1340{
1341 struct tcp_sock *tp = tcp_sk(sk);
1342 const struct inet_connection_sock *icsk = inet_csk(sk);
1343 u32 send_win, cong_win, limit, in_flight;
1344
1345 if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
1346 goto send_now;
1347
1348 if (icsk->icsk_ca_state != TCP_CA_Open)
1349 goto send_now;
1350
1351
1352 if (tp->tso_deferred &&
1353 (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1)
1354 goto send_now;
1355
1356 in_flight = tcp_packets_in_flight(tp);
1357
1358 BUG_ON(tcp_skb_pcount(skb) <= 1 || (tp->snd_cwnd <= in_flight));
1359
1360 send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1361
1362
1363 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
1364
1365 limit = min(send_win, cong_win);
1366
1367
1368 if (limit >= sk->sk_gso_max_size)
1369 goto send_now;
1370
1371
1372 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
1373 goto send_now;
1374
1375 if (sysctl_tcp_tso_win_divisor) {
1376 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
1377
1378
1379
1380
1381 chunk /= sysctl_tcp_tso_win_divisor;
1382 if (limit >= chunk)
1383 goto send_now;
1384 } else {
1385
1386
1387
1388
1389
1390 if (limit > tcp_max_burst(tp) * tp->mss_cache)
1391 goto send_now;
1392 }
1393
1394
1395 tp->tso_deferred = 1 | (jiffies << 1);
1396
1397 return 1;
1398
1399send_now:
1400 tp->tso_deferred = 0;
1401 return 0;
1402}
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413static int tcp_mtu_probe(struct sock *sk)
1414{
1415 struct tcp_sock *tp = tcp_sk(sk);
1416 struct inet_connection_sock *icsk = inet_csk(sk);
1417 struct sk_buff *skb, *nskb, *next;
1418 int len;
1419 int probe_size;
1420 int size_needed;
1421 int copy;
1422 int mss_now;
1423
1424
1425
1426
1427
1428 if (!icsk->icsk_mtup.enabled ||
1429 icsk->icsk_mtup.probe_size ||
1430 inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
1431 tp->snd_cwnd < 11 ||
1432 tp->rx_opt.num_sacks || tp->rx_opt.dsack)
1433 return -1;
1434
1435
1436 mss_now = tcp_current_mss(sk);
1437 probe_size = 2 * tp->mss_cache;
1438 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
1439 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
1440
1441 return -1;
1442 }
1443
1444
1445 if (tp->write_seq - tp->snd_nxt < size_needed)
1446 return -1;
1447
1448 if (tp->snd_wnd < size_needed)
1449 return -1;
1450 if (after(tp->snd_nxt + size_needed, tcp_wnd_end(tp)))
1451 return 0;
1452
1453
1454 if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
1455 if (!tcp_packets_in_flight(tp))
1456 return -1;
1457 else
1458 return 0;
1459 }
1460
1461
1462 if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL)
1463 return -1;
1464 sk->sk_wmem_queued += nskb->truesize;
1465 sk_mem_charge(sk, nskb->truesize);
1466
1467 skb = tcp_send_head(sk);
1468
1469 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
1470 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
1471 TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK;
1472 TCP_SKB_CB(nskb)->sacked = 0;
1473 nskb->csum = 0;
1474 nskb->ip_summed = skb->ip_summed;
1475
1476 tcp_insert_write_queue_before(nskb, skb, sk);
1477
1478 len = 0;
1479 tcp_for_write_queue_from_safe(skb, next, sk) {
1480 copy = min_t(int, skb->len, probe_size - len);
1481 if (nskb->ip_summed)
1482 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
1483 else
1484 nskb->csum = skb_copy_and_csum_bits(skb, 0,
1485 skb_put(nskb, copy),
1486 copy, nskb->csum);
1487
1488 if (skb->len <= copy) {
1489
1490
1491 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags;
1492 tcp_unlink_write_queue(skb, sk);
1493 sk_wmem_free_skb(sk, skb);
1494 } else {
1495 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags &
1496 ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
1497 if (!skb_shinfo(skb)->nr_frags) {
1498 skb_pull(skb, copy);
1499 if (skb->ip_summed != CHECKSUM_PARTIAL)
1500 skb->csum = csum_partial(skb->data,
1501 skb->len, 0);
1502 } else {
1503 __pskb_trim_head(skb, copy);
1504 tcp_set_skb_tso_segs(sk, skb, mss_now);
1505 }
1506 TCP_SKB_CB(skb)->seq += copy;
1507 }
1508
1509 len += copy;
1510
1511 if (len >= probe_size)
1512 break;
1513 }
1514 tcp_init_tso_segs(sk, nskb, nskb->len);
1515
1516
1517
1518 TCP_SKB_CB(nskb)->when = tcp_time_stamp;
1519 if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
1520
1521
1522 tp->snd_cwnd--;
1523 tcp_event_new_data_sent(sk, nskb);
1524
1525 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
1526 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
1527 tp->mtu_probe.probe_seq_end = TCP_SKB_CB(nskb)->end_seq;
1528
1529 return 1;
1530 }
1531
1532 return -1;
1533}
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1547 int push_one, gfp_t gfp)
1548{
1549 struct tcp_sock *tp = tcp_sk(sk);
1550 struct sk_buff *skb;
1551 unsigned int tso_segs, sent_pkts;
1552 int cwnd_quota;
1553 int result;
1554
1555 sent_pkts = 0;
1556
1557 if (!push_one) {
1558
1559 result = tcp_mtu_probe(sk);
1560 if (!result) {
1561 return 0;
1562 } else if (result > 0) {
1563 sent_pkts = 1;
1564 }
1565 }
1566
1567 while ((skb = tcp_send_head(sk))) {
1568 unsigned int limit;
1569
1570 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
1571 BUG_ON(!tso_segs);
1572
1573 cwnd_quota = tcp_cwnd_test(tp, skb);
1574 if (!cwnd_quota)
1575 break;
1576
1577 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
1578 break;
1579
1580 if (tso_segs == 1) {
1581 if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
1582 (tcp_skb_is_last(sk, skb) ?
1583 nonagle : TCP_NAGLE_PUSH))))
1584 break;
1585 } else {
1586 if (!push_one && tcp_tso_should_defer(sk, skb))
1587 break;
1588 }
1589
1590 limit = mss_now;
1591 if (tso_segs > 1 && !tcp_urg_mode(tp))
1592 limit = tcp_mss_split_point(sk, skb, mss_now,
1593 cwnd_quota);
1594
1595 if (skb->len > limit &&
1596 unlikely(tso_fragment(sk, skb, limit, mss_now)))
1597 break;
1598
1599 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1600
1601 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
1602 break;
1603
1604
1605
1606
1607 tcp_event_new_data_sent(sk, skb);
1608
1609 tcp_minshall_update(tp, mss_now, skb);
1610 sent_pkts++;
1611
1612 if (push_one)
1613 break;
1614 }
1615
1616 if (likely(sent_pkts)) {
1617 tcp_cwnd_validate(sk);
1618 return 0;
1619 }
1620 return !tp->packets_out && tcp_send_head(sk);
1621}
1622
1623
1624
1625
1626
1627void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
1628 int nonagle)
1629{
1630 struct sk_buff *skb = tcp_send_head(sk);
1631
1632 if (!skb)
1633 return;
1634
1635
1636
1637
1638
1639 if (unlikely(sk->sk_state == TCP_CLOSE))
1640 return;
1641
1642 if (tcp_write_xmit(sk, cur_mss, nonagle, 0, GFP_ATOMIC))
1643 tcp_check_probe_timer(sk);
1644}
1645
1646
1647
1648
1649void tcp_push_one(struct sock *sk, unsigned int mss_now)
1650{
1651 struct sk_buff *skb = tcp_send_head(sk);
1652
1653 BUG_ON(!skb || skb->len < mss_now);
1654
1655 tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation);
1656}
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710u32 __tcp_select_window(struct sock *sk)
1711{
1712 struct inet_connection_sock *icsk = inet_csk(sk);
1713 struct tcp_sock *tp = tcp_sk(sk);
1714
1715
1716
1717
1718
1719
1720 int mss = icsk->icsk_ack.rcv_mss;
1721 int free_space = tcp_space(sk);
1722 int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk));
1723 int window;
1724
1725 if (mss > full_space)
1726 mss = full_space;
1727
1728 if (free_space < (full_space >> 1)) {
1729 icsk->icsk_ack.quick = 0;
1730
1731 if (tcp_memory_pressure)
1732 tp->rcv_ssthresh = min(tp->rcv_ssthresh,
1733 4U * tp->advmss);
1734
1735 if (free_space < mss)
1736 return 0;
1737 }
1738
1739 if (free_space > tp->rcv_ssthresh)
1740 free_space = tp->rcv_ssthresh;
1741
1742
1743
1744
1745 window = tp->rcv_wnd;
1746 if (tp->rx_opt.rcv_wscale) {
1747 window = free_space;
1748
1749
1750
1751
1752
1753 if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window)
1754 window = (((window >> tp->rx_opt.rcv_wscale) + 1)
1755 << tp->rx_opt.rcv_wscale);
1756 } else {
1757
1758
1759
1760
1761
1762
1763
1764
1765 if (window <= free_space - mss || window > free_space)
1766 window = (free_space / mss) * mss;
1767 else if (mss == full_space &&
1768 free_space > window + (full_space >> 1))
1769 window = free_space;
1770 }
1771
1772 return window;
1773}
1774
1775
1776static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
1777{
1778 struct tcp_sock *tp = tcp_sk(sk);
1779 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
1780 int skb_size, next_skb_size;
1781
1782 skb_size = skb->len;
1783 next_skb_size = next_skb->len;
1784
1785 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
1786
1787 tcp_highest_sack_combine(sk, next_skb, skb);
1788
1789 tcp_unlink_write_queue(next_skb, sk);
1790
1791 skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size),
1792 next_skb_size);
1793
1794 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
1795 skb->ip_summed = CHECKSUM_PARTIAL;
1796
1797 if (skb->ip_summed != CHECKSUM_PARTIAL)
1798 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
1799
1800
1801 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
1802
1803
1804 TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(next_skb)->flags;
1805
1806
1807
1808
1809 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
1810
1811
1812 tcp_clear_retrans_hints_partial(tp);
1813 if (next_skb == tp->retransmit_skb_hint)
1814 tp->retransmit_skb_hint = skb;
1815
1816 tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb));
1817
1818 sk_wmem_free_skb(sk, next_skb);
1819}
1820
1821
1822static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb)
1823{
1824 if (tcp_skb_pcount(skb) > 1)
1825 return 0;
1826
1827 if (skb_shinfo(skb)->nr_frags != 0)
1828 return 0;
1829 if (skb_cloned(skb))
1830 return 0;
1831 if (skb == tcp_send_head(sk))
1832 return 0;
1833
1834 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1835 return 0;
1836
1837 return 1;
1838}
1839
1840
1841
1842
1843static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
1844 int space)
1845{
1846 struct tcp_sock *tp = tcp_sk(sk);
1847 struct sk_buff *skb = to, *tmp;
1848 int first = 1;
1849
1850 if (!sysctl_tcp_retrans_collapse)
1851 return;
1852 if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)
1853 return;
1854
1855 tcp_for_write_queue_from_safe(skb, tmp, sk) {
1856 if (!tcp_can_collapse(sk, skb))
1857 break;
1858
1859 space -= skb->len;
1860
1861 if (first) {
1862 first = 0;
1863 continue;
1864 }
1865
1866 if (space < 0)
1867 break;
1868
1869
1870
1871 if (skb->len > skb_tailroom(to))
1872 break;
1873
1874 if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
1875 break;
1876
1877 tcp_collapse_retrans(sk, to);
1878 }
1879}
1880
1881
1882
1883
1884
1885int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1886{
1887 struct tcp_sock *tp = tcp_sk(sk);
1888 struct inet_connection_sock *icsk = inet_csk(sk);
1889 unsigned int cur_mss;
1890 int err;
1891
1892
1893 if (icsk->icsk_mtup.probe_size) {
1894 icsk->icsk_mtup.probe_size = 0;
1895 }
1896
1897
1898
1899
1900 if (atomic_read(&sk->sk_wmem_alloc) >
1901 min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
1902 return -EAGAIN;
1903
1904 if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
1905 if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1906 BUG();
1907 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
1908 return -ENOMEM;
1909 }
1910
1911 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
1912 return -EHOSTUNREACH;
1913
1914 cur_mss = tcp_current_mss(sk);
1915
1916
1917
1918
1919
1920
1921 if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))
1922 && TCP_SKB_CB(skb)->seq != tp->snd_una)
1923 return -EAGAIN;
1924
1925 if (skb->len > cur_mss) {
1926 if (tcp_fragment(sk, skb, cur_mss, cur_mss))
1927 return -ENOMEM;
1928 } else {
1929 int oldpcount = tcp_skb_pcount(skb);
1930
1931 if (unlikely(oldpcount > 1)) {
1932 tcp_init_tso_segs(sk, skb, cur_mss);
1933 tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
1934 }
1935 }
1936
1937 tcp_retrans_try_collapse(sk, skb, cur_mss);
1938
1939
1940
1941
1942
1943 if (skb->len > 0 &&
1944 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
1945 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
1946 if (!pskb_trim(skb, 0)) {
1947
1948 tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1,
1949 TCP_SKB_CB(skb)->flags);
1950 skb->ip_summed = CHECKSUM_NONE;
1951 }
1952 }
1953
1954
1955
1956
1957 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1958
1959 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
1960
1961 if (err == 0) {
1962
1963 TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
1964
1965 tp->total_retrans++;
1966
1967#if FASTRETRANS_DEBUG > 0
1968 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
1969 if (net_ratelimit())
1970 printk(KERN_DEBUG "retrans_out leaked.\n");
1971 }
1972#endif
1973 if (!tp->retrans_out)
1974 tp->lost_retrans_low = tp->snd_nxt;
1975 TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
1976 tp->retrans_out += tcp_skb_pcount(skb);
1977
1978
1979 if (!tp->retrans_stamp)
1980 tp->retrans_stamp = TCP_SKB_CB(skb)->when;
1981
1982 tp->undo_retrans++;
1983
1984
1985
1986
1987 TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
1988 }
1989 return err;
1990}
1991
1992
1993
1994
1995static int tcp_can_forward_retransmit(struct sock *sk)
1996{
1997 const struct inet_connection_sock *icsk = inet_csk(sk);
1998 struct tcp_sock *tp = tcp_sk(sk);
1999
2000
2001 if (icsk->icsk_ca_state != TCP_CA_Recovery)
2002 return 0;
2003
2004
2005 if (tcp_is_reno(tp))
2006 return 0;
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016 if (tcp_may_send_now(sk))
2017 return 0;
2018
2019 return 1;
2020}
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030void tcp_xmit_retransmit_queue(struct sock *sk)
2031{
2032 const struct inet_connection_sock *icsk = inet_csk(sk);
2033 struct tcp_sock *tp = tcp_sk(sk);
2034 struct sk_buff *skb;
2035 struct sk_buff *hole = NULL;
2036 u32 last_lost;
2037 int mib_idx;
2038 int fwd_rexmitting = 0;
2039
2040 if (!tp->lost_out)
2041 tp->retransmit_high = tp->snd_una;
2042
2043 if (tp->retransmit_skb_hint) {
2044 skb = tp->retransmit_skb_hint;
2045 last_lost = TCP_SKB_CB(skb)->end_seq;
2046 if (after(last_lost, tp->retransmit_high))
2047 last_lost = tp->retransmit_high;
2048 } else {
2049 skb = tcp_write_queue_head(sk);
2050 last_lost = tp->snd_una;
2051 }
2052
2053 tcp_for_write_queue_from(skb, sk) {
2054 __u8 sacked = TCP_SKB_CB(skb)->sacked;
2055
2056 if (skb == tcp_send_head(sk))
2057 break;
2058
2059 if (hole == NULL)
2060 tp->retransmit_skb_hint = skb;
2061
2062
2063
2064
2065
2066
2067
2068
2069 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
2070 return;
2071
2072 if (fwd_rexmitting) {
2073begin_fwd:
2074 if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
2075 break;
2076 mib_idx = LINUX_MIB_TCPFORWARDRETRANS;
2077
2078 } else if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high)) {
2079 tp->retransmit_high = last_lost;
2080 if (!tcp_can_forward_retransmit(sk))
2081 break;
2082
2083 if (hole != NULL) {
2084 skb = hole;
2085 hole = NULL;
2086 }
2087 fwd_rexmitting = 1;
2088 goto begin_fwd;
2089
2090 } else if (!(sacked & TCPCB_LOST)) {
2091 if (hole == NULL && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
2092 hole = skb;
2093 continue;
2094
2095 } else {
2096 last_lost = TCP_SKB_CB(skb)->end_seq;
2097 if (icsk->icsk_ca_state != TCP_CA_Loss)
2098 mib_idx = LINUX_MIB_TCPFASTRETRANS;
2099 else
2100 mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
2101 }
2102
2103 if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
2104 continue;
2105
2106 if (tcp_retransmit_skb(sk, skb))
2107 return;
2108 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2109
2110 if (skb == tcp_write_queue_head(sk))
2111 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2112 inet_csk(sk)->icsk_rto,
2113 TCP_RTO_MAX);
2114 }
2115}
2116
2117
2118
2119
2120void tcp_send_fin(struct sock *sk)
2121{
2122 struct tcp_sock *tp = tcp_sk(sk);
2123 struct sk_buff *skb = tcp_write_queue_tail(sk);
2124 int mss_now;
2125
2126
2127
2128
2129
2130 mss_now = tcp_current_mss(sk);
2131
2132 if (tcp_send_head(sk) != NULL) {
2133 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
2134 TCP_SKB_CB(skb)->end_seq++;
2135 tp->write_seq++;
2136 } else {
2137
2138 for (;;) {
2139 skb = alloc_skb_fclone(MAX_TCP_HEADER,
2140 sk->sk_allocation);
2141 if (skb)
2142 break;
2143 yield();
2144 }
2145
2146
2147 skb_reserve(skb, MAX_TCP_HEADER);
2148
2149 tcp_init_nondata_skb(skb, tp->write_seq,
2150 TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
2151 tcp_queue_skb(sk, skb);
2152 }
2153 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
2154}
2155
2156
2157
2158
2159
2160
2161void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2162{
2163 struct sk_buff *skb;
2164
2165
2166 skb = alloc_skb(MAX_TCP_HEADER, priority);
2167 if (!skb) {
2168 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2169 return;
2170 }
2171
2172
2173 skb_reserve(skb, MAX_TCP_HEADER);
2174 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
2175 TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
2176
2177 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2178 if (tcp_transmit_skb(sk, skb, 0, priority))
2179 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2180
2181 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
2182}
2183
2184
2185
2186
2187
2188
2189
2190int tcp_send_synack(struct sock *sk)
2191{
2192 struct sk_buff *skb;
2193
2194 skb = tcp_write_queue_head(sk);
2195 if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)) {
2196 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
2197 return -EFAULT;
2198 }
2199 if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_ACK)) {
2200 if (skb_cloned(skb)) {
2201 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
2202 if (nskb == NULL)
2203 return -ENOMEM;
2204 tcp_unlink_write_queue(skb, sk);
2205 skb_header_release(nskb);
2206 __tcp_add_write_queue_head(sk, nskb);
2207 sk_wmem_free_skb(sk, skb);
2208 sk->sk_wmem_queued += nskb->truesize;
2209 sk_mem_charge(sk, nskb->truesize);
2210 skb = nskb;
2211 }
2212
2213 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ACK;
2214 TCP_ECN_send_synack(tcp_sk(sk), skb);
2215 }
2216 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2217 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2218}
2219
2220
2221struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2222 struct request_sock *req)
2223{
2224 struct inet_request_sock *ireq = inet_rsk(req);
2225 struct tcp_sock *tp = tcp_sk(sk);
2226 struct tcphdr *th;
2227 int tcp_header_size;
2228 struct tcp_out_options opts;
2229 struct sk_buff *skb;
2230 struct tcp_md5sig_key *md5;
2231 __u8 *md5_hash_location;
2232 int mss;
2233
2234 skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
2235 if (skb == NULL)
2236 return NULL;
2237
2238
2239 skb_reserve(skb, MAX_TCP_HEADER);
2240
2241 skb_dst_set(skb, dst_clone(dst));
2242
2243 mss = dst_metric(dst, RTAX_ADVMSS);
2244 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
2245 mss = tp->rx_opt.user_mss;
2246
2247 if (req->rcv_wnd == 0) {
2248 __u8 rcv_wscale;
2249
2250 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
2251
2252 tcp_select_initial_window(tcp_full_space(sk),
2253 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
2254 &req->rcv_wnd,
2255 &req->window_clamp,
2256 ireq->wscale_ok,
2257 &rcv_wscale);
2258 ireq->rcv_wscale = rcv_wscale;
2259 }
2260
2261 memset(&opts, 0, sizeof(opts));
2262#ifdef CONFIG_SYN_COOKIES
2263 if (unlikely(req->cookie_ts))
2264 TCP_SKB_CB(skb)->when = cookie_init_timestamp(req);
2265 else
2266#endif
2267 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2268 tcp_header_size = tcp_synack_options(sk, req, mss,
2269 skb, &opts, &md5) +
2270 sizeof(struct tcphdr);
2271
2272 skb_push(skb, tcp_header_size);
2273 skb_reset_transport_header(skb);
2274
2275 th = tcp_hdr(skb);
2276 memset(th, 0, sizeof(struct tcphdr));
2277 th->syn = 1;
2278 th->ack = 1;
2279 TCP_ECN_make_synack(req, th);
2280 th->source = ireq->loc_port;
2281 th->dest = ireq->rmt_port;
2282
2283
2284
2285 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
2286 TCPCB_FLAG_SYN | TCPCB_FLAG_ACK);
2287 th->seq = htonl(TCP_SKB_CB(skb)->seq);
2288 th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
2289
2290
2291 th->window = htons(min(req->rcv_wnd, 65535U));
2292 tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
2293 th->doff = (tcp_header_size >> 2);
2294 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
2295
2296#ifdef CONFIG_TCP_MD5SIG
2297
2298 if (md5) {
2299 tcp_rsk(req)->af_specific->calc_md5_hash(md5_hash_location,
2300 md5, NULL, req, skb);
2301 }
2302#endif
2303
2304 return skb;
2305}
2306
2307
2308static void tcp_connect_init(struct sock *sk)
2309{
2310 struct dst_entry *dst = __sk_dst_get(sk);
2311 struct tcp_sock *tp = tcp_sk(sk);
2312 __u8 rcv_wscale;
2313
2314
2315
2316
2317 tp->tcp_header_len = sizeof(struct tcphdr) +
2318 (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
2319
2320#ifdef CONFIG_TCP_MD5SIG
2321 if (tp->af_specific->md5_lookup(sk, sk) != NULL)
2322 tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
2323#endif
2324
2325
2326 if (tp->rx_opt.user_mss)
2327 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
2328 tp->max_window = 0;
2329 tcp_mtup_init(sk);
2330 tcp_sync_mss(sk, dst_mtu(dst));
2331
2332 if (!tp->window_clamp)
2333 tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
2334 tp->advmss = dst_metric(dst, RTAX_ADVMSS);
2335 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss)
2336 tp->advmss = tp->rx_opt.user_mss;
2337
2338 tcp_initialize_rcv_mss(sk);
2339
2340 tcp_select_initial_window(tcp_full_space(sk),
2341 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
2342 &tp->rcv_wnd,
2343 &tp->window_clamp,
2344 sysctl_tcp_window_scaling,
2345 &rcv_wscale);
2346
2347 tp->rx_opt.rcv_wscale = rcv_wscale;
2348 tp->rcv_ssthresh = tp->rcv_wnd;
2349
2350 sk->sk_err = 0;
2351 sock_reset_flag(sk, SOCK_DONE);
2352 tp->snd_wnd = 0;
2353 tcp_init_wl(tp, 0);
2354 tp->snd_una = tp->write_seq;
2355 tp->snd_sml = tp->write_seq;
2356 tp->snd_up = tp->write_seq;
2357 tp->rcv_nxt = 0;
2358 tp->rcv_wup = 0;
2359 tp->copied_seq = 0;
2360
2361 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
2362 inet_csk(sk)->icsk_retransmits = 0;
2363 tcp_clear_retrans(tp);
2364}
2365
2366
2367int tcp_connect(struct sock *sk)
2368{
2369 struct tcp_sock *tp = tcp_sk(sk);
2370 struct sk_buff *buff;
2371
2372 tcp_connect_init(sk);
2373
2374 buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
2375 if (unlikely(buff == NULL))
2376 return -ENOBUFS;
2377
2378
2379 skb_reserve(buff, MAX_TCP_HEADER);
2380
2381 tp->snd_nxt = tp->write_seq;
2382 tcp_init_nondata_skb(buff, tp->write_seq++, TCPCB_FLAG_SYN);
2383 TCP_ECN_send_syn(sk, buff);
2384
2385
2386 TCP_SKB_CB(buff)->when = tcp_time_stamp;
2387 tp->retrans_stamp = TCP_SKB_CB(buff)->when;
2388 skb_header_release(buff);
2389 __tcp_add_write_queue_tail(sk, buff);
2390 sk->sk_wmem_queued += buff->truesize;
2391 sk_mem_charge(sk, buff->truesize);
2392 tp->packets_out += tcp_skb_pcount(buff);
2393 tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
2394
2395
2396
2397
2398 tp->snd_nxt = tp->write_seq;
2399 tp->pushed_seq = tp->write_seq;
2400 TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
2401
2402
2403 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2404 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
2405 return 0;
2406}
2407
2408
2409
2410
2411
2412void tcp_send_delayed_ack(struct sock *sk)
2413{
2414 struct inet_connection_sock *icsk = inet_csk(sk);
2415 int ato = icsk->icsk_ack.ato;
2416 unsigned long timeout;
2417
2418 if (ato > TCP_DELACK_MIN) {
2419 const struct tcp_sock *tp = tcp_sk(sk);
2420 int max_ato = HZ / 2;
2421
2422 if (icsk->icsk_ack.pingpong ||
2423 (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
2424 max_ato = TCP_DELACK_MAX;
2425
2426
2427
2428
2429
2430
2431
2432 if (tp->srtt) {
2433 int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN);
2434
2435 if (rtt < max_ato)
2436 max_ato = rtt;
2437 }
2438
2439 ato = min(ato, max_ato);
2440 }
2441
2442
2443 timeout = jiffies + ato;
2444
2445
2446 if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
2447
2448
2449
2450 if (icsk->icsk_ack.blocked ||
2451 time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
2452 tcp_send_ack(sk);
2453 return;
2454 }
2455
2456 if (!time_before(timeout, icsk->icsk_ack.timeout))
2457 timeout = icsk->icsk_ack.timeout;
2458 }
2459 icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
2460 icsk->icsk_ack.timeout = timeout;
2461 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
2462}
2463
2464
2465void tcp_send_ack(struct sock *sk)
2466{
2467 struct sk_buff *buff;
2468
2469
2470 if (sk->sk_state == TCP_CLOSE)
2471 return;
2472
2473
2474
2475
2476
2477 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
2478 if (buff == NULL) {
2479 inet_csk_schedule_ack(sk);
2480 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
2481 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
2482 TCP_DELACK_MAX, TCP_RTO_MAX);
2483 return;
2484 }
2485
2486
2487 skb_reserve(buff, MAX_TCP_HEADER);
2488 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPCB_FLAG_ACK);
2489
2490
2491 TCP_SKB_CB(buff)->when = tcp_time_stamp;
2492 tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
2493}
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
2507{
2508 struct tcp_sock *tp = tcp_sk(sk);
2509 struct sk_buff *skb;
2510
2511
2512 skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
2513 if (skb == NULL)
2514 return -1;
2515
2516
2517 skb_reserve(skb, MAX_TCP_HEADER);
2518
2519
2520
2521
2522 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPCB_FLAG_ACK);
2523 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2524 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
2525}
2526
2527
2528int tcp_write_wakeup(struct sock *sk)
2529{
2530 struct tcp_sock *tp = tcp_sk(sk);
2531 struct sk_buff *skb;
2532
2533 if (sk->sk_state == TCP_CLOSE)
2534 return -1;
2535
2536 if ((skb = tcp_send_head(sk)) != NULL &&
2537 before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
2538 int err;
2539 unsigned int mss = tcp_current_mss(sk);
2540 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
2541
2542 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
2543 tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
2544
2545
2546
2547
2548
2549 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
2550 skb->len > mss) {
2551 seg_size = min(seg_size, mss);
2552 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
2553 if (tcp_fragment(sk, skb, seg_size, mss))
2554 return -1;
2555 } else if (!tcp_skb_pcount(skb))
2556 tcp_set_skb_tso_segs(sk, skb, mss);
2557
2558 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
2559 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2560 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2561 if (!err)
2562 tcp_event_new_data_sent(sk, skb);
2563 return err;
2564 } else {
2565 if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
2566 tcp_xmit_probe_skb(sk, 1);
2567 return tcp_xmit_probe_skb(sk, 0);
2568 }
2569}
2570
2571
2572
2573
2574void tcp_send_probe0(struct sock *sk)
2575{
2576 struct inet_connection_sock *icsk = inet_csk(sk);
2577 struct tcp_sock *tp = tcp_sk(sk);
2578 int err;
2579
2580 err = tcp_write_wakeup(sk);
2581
2582 if (tp->packets_out || !tcp_send_head(sk)) {
2583
2584 icsk->icsk_probes_out = 0;
2585 icsk->icsk_backoff = 0;
2586 return;
2587 }
2588
2589 if (err <= 0) {
2590 if (icsk->icsk_backoff < sysctl_tcp_retries2)
2591 icsk->icsk_backoff++;
2592 icsk->icsk_probes_out++;
2593 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
2594 min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
2595 TCP_RTO_MAX);
2596 } else {
2597
2598
2599
2600
2601
2602
2603 if (!icsk->icsk_probes_out)
2604 icsk->icsk_probes_out = 1;
2605 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
2606 min(icsk->icsk_rto << icsk->icsk_backoff,
2607 TCP_RESOURCE_PROBE_INTERVAL),
2608 TCP_RTO_MAX);
2609 }
2610}
2611
2612EXPORT_SYMBOL(tcp_select_initial_window);
2613EXPORT_SYMBOL(tcp_connect);
2614EXPORT_SYMBOL(tcp_make_synack);
2615EXPORT_SYMBOL(tcp_simple_retransmit);
2616EXPORT_SYMBOL(tcp_sync_mss);
2617EXPORT_SYMBOL(tcp_mtup_init);
2618