1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248#include <linux/kernel.h>
249#include <linux/module.h>
250#include <linux/types.h>
251#include <linux/fcntl.h>
252#include <linux/poll.h>
253#include <linux/init.h>
254#include <linux/fs.h>
255#include <linux/skbuff.h>
256#include <linux/scatterlist.h>
257#include <linux/splice.h>
258#include <linux/net.h>
259#include <linux/socket.h>
260#include <linux/random.h>
261#include <linux/bootmem.h>
262#include <linux/highmem.h>
263#include <linux/swap.h>
264#include <linux/cache.h>
265#include <linux/err.h>
266#include <linux/crypto.h>
267#include <linux/time.h>
268#include <linux/slab.h>
269
270#include <net/icmp.h>
271#include <net/tcp.h>
272#include <net/xfrm.h>
273#include <net/ip.h>
274#include <net/netdma.h>
275#include <net/sock.h>
276
277#include <asm/uaccess.h>
278#include <asm/ioctls.h>
279
280int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
281
282struct percpu_counter tcp_orphan_count;
283EXPORT_SYMBOL_GPL(tcp_orphan_count);
284
285long sysctl_tcp_mem[3] __read_mostly;
286int sysctl_tcp_wmem[3] __read_mostly;
287int sysctl_tcp_rmem[3] __read_mostly;
288
289EXPORT_SYMBOL(sysctl_tcp_mem);
290EXPORT_SYMBOL(sysctl_tcp_rmem);
291EXPORT_SYMBOL(sysctl_tcp_wmem);
292
293atomic_long_t tcp_memory_allocated;
294EXPORT_SYMBOL(tcp_memory_allocated);
295
296
297
298
299struct percpu_counter tcp_sockets_allocated;
300EXPORT_SYMBOL(tcp_sockets_allocated);
301
302
303
304
305struct tcp_splice_state {
306 struct pipe_inode_info *pipe;
307 size_t len;
308 unsigned int flags;
309};
310
311
312
313
314
315
316
317int tcp_memory_pressure __read_mostly;
318EXPORT_SYMBOL(tcp_memory_pressure);
319
320void tcp_enter_memory_pressure(struct sock *sk)
321{
322 if (!tcp_memory_pressure) {
323 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES);
324 tcp_memory_pressure = 1;
325 }
326}
327EXPORT_SYMBOL(tcp_enter_memory_pressure);
328
329
330static u8 secs_to_retrans(int seconds, int timeout, int rto_max)
331{
332 u8 res = 0;
333
334 if (seconds > 0) {
335 int period = timeout;
336
337 res = 1;
338 while (seconds > period && res < 255) {
339 res++;
340 timeout <<= 1;
341 if (timeout > rto_max)
342 timeout = rto_max;
343 period += timeout;
344 }
345 }
346 return res;
347}
348
349
350static int retrans_to_secs(u8 retrans, int timeout, int rto_max)
351{
352 int period = 0;
353
354 if (retrans > 0) {
355 period = timeout;
356 while (--retrans) {
357 timeout <<= 1;
358 if (timeout > rto_max)
359 timeout = rto_max;
360 period += timeout;
361 }
362 }
363 return period;
364}
365
366
367
368
369
370
371
372
373unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
374{
375 unsigned int mask;
376 struct sock *sk = sock->sk;
377 struct tcp_sock *tp = tcp_sk(sk);
378
379 sock_poll_wait(file, sk_sleep(sk), wait);
380 if (sk->sk_state == TCP_LISTEN)
381 return inet_csk_listen_poll(sk);
382
383
384
385
386
387
388 mask = 0;
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE)
418 mask |= POLLHUP;
419 if (sk->sk_shutdown & RCV_SHUTDOWN)
420 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
421
422
423 if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) {
424 int target = sock_rcvlowat(sk, 0, INT_MAX);
425
426 if (tp->urg_seq == tp->copied_seq &&
427 !sock_flag(sk, SOCK_URGINLINE) &&
428 tp->urg_data)
429 target++;
430
431
432
433
434 if (tp->rcv_nxt - tp->copied_seq >= target)
435 mask |= POLLIN | POLLRDNORM;
436
437 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
438 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
439 mask |= POLLOUT | POLLWRNORM;
440 } else {
441 set_bit(SOCK_ASYNC_NOSPACE,
442 &sk->sk_socket->flags);
443 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
444
445
446
447
448
449 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
450 mask |= POLLOUT | POLLWRNORM;
451 }
452 } else
453 mask |= POLLOUT | POLLWRNORM;
454
455 if (tp->urg_data & TCP_URG_VALID)
456 mask |= POLLPRI;
457 }
458
459 smp_rmb();
460 if (sk->sk_err)
461 mask |= POLLERR;
462
463 return mask;
464}
465EXPORT_SYMBOL(tcp_poll);
466
467int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
468{
469 struct tcp_sock *tp = tcp_sk(sk);
470 int answ;
471
472 switch (cmd) {
473 case SIOCINQ:
474 if (sk->sk_state == TCP_LISTEN)
475 return -EINVAL;
476
477 lock_sock(sk);
478 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
479 answ = 0;
480 else if (sock_flag(sk, SOCK_URGINLINE) ||
481 !tp->urg_data ||
482 before(tp->urg_seq, tp->copied_seq) ||
483 !before(tp->urg_seq, tp->rcv_nxt)) {
484 struct sk_buff *skb;
485
486 answ = tp->rcv_nxt - tp->copied_seq;
487
488
489 skb = skb_peek_tail(&sk->sk_receive_queue);
490 if (answ && skb)
491 answ -= tcp_hdr(skb)->fin;
492 } else
493 answ = tp->urg_seq - tp->copied_seq;
494 release_sock(sk);
495 break;
496 case SIOCATMARK:
497 answ = tp->urg_data && tp->urg_seq == tp->copied_seq;
498 break;
499 case SIOCOUTQ:
500 if (sk->sk_state == TCP_LISTEN)
501 return -EINVAL;
502
503 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
504 answ = 0;
505 else
506 answ = tp->write_seq - tp->snd_una;
507 break;
508 default:
509 return -ENOIOCTLCMD;
510 }
511
512 return put_user(answ, (int __user *)arg);
513}
514EXPORT_SYMBOL(tcp_ioctl);
515
516static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
517{
518 TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
519 tp->pushed_seq = tp->write_seq;
520}
521
522static inline int forced_push(struct tcp_sock *tp)
523{
524 return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
525}
526
527static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
528{
529 struct tcp_sock *tp = tcp_sk(sk);
530 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
531
532 skb->csum = 0;
533 tcb->seq = tcb->end_seq = tp->write_seq;
534 tcb->flags = TCPHDR_ACK;
535 tcb->sacked = 0;
536 skb_header_release(skb);
537 tcp_add_write_queue_tail(sk, skb);
538 sk->sk_wmem_queued += skb->truesize;
539 sk_mem_charge(sk, skb->truesize);
540 if (tp->nonagle & TCP_NAGLE_PUSH)
541 tp->nonagle &= ~TCP_NAGLE_PUSH;
542}
543
544static inline void tcp_mark_urg(struct tcp_sock *tp, int flags)
545{
546 if (flags & MSG_OOB)
547 tp->snd_up = tp->write_seq;
548}
549
550static inline void tcp_push(struct sock *sk, int flags, int mss_now,
551 int nonagle)
552{
553 if (tcp_send_head(sk)) {
554 struct tcp_sock *tp = tcp_sk(sk);
555
556 if (!(flags & MSG_MORE) || forced_push(tp))
557 tcp_mark_push(tp, tcp_write_queue_tail(sk));
558
559 tcp_mark_urg(tp, flags);
560 __tcp_push_pending_frames(sk, mss_now,
561 (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle);
562 }
563}
564
565static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
566 unsigned int offset, size_t len)
567{
568 struct tcp_splice_state *tss = rd_desc->arg.data;
569 int ret;
570
571 ret = skb_splice_bits(skb, offset, tss->pipe, min(rd_desc->count, len),
572 tss->flags);
573 if (ret > 0)
574 rd_desc->count -= ret;
575 return ret;
576}
577
578static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss)
579{
580
581 read_descriptor_t rd_desc = {
582 .arg.data = tss,
583 .count = tss->len,
584 };
585
586 return tcp_read_sock(sk, &rd_desc, tcp_splice_data_recv);
587}
588
589
590
591
592
593
594
595
596
597
598
599
600
601ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
602 struct pipe_inode_info *pipe, size_t len,
603 unsigned int flags)
604{
605 struct sock *sk = sock->sk;
606 struct tcp_splice_state tss = {
607 .pipe = pipe,
608 .len = len,
609 .flags = flags,
610 };
611 long timeo;
612 ssize_t spliced;
613 int ret;
614
615 sock_rps_record_flow(sk);
616
617
618
619 if (unlikely(*ppos))
620 return -ESPIPE;
621
622 ret = spliced = 0;
623
624 lock_sock(sk);
625
626 timeo = sock_rcvtimeo(sk, sock->file->f_flags & O_NONBLOCK);
627 while (tss.len) {
628 ret = __tcp_splice_read(sk, &tss);
629 if (ret < 0)
630 break;
631 else if (!ret) {
632 if (spliced)
633 break;
634 if (sock_flag(sk, SOCK_DONE))
635 break;
636 if (sk->sk_err) {
637 ret = sock_error(sk);
638 break;
639 }
640 if (sk->sk_shutdown & RCV_SHUTDOWN)
641 break;
642 if (sk->sk_state == TCP_CLOSE) {
643
644
645
646
647 if (!sock_flag(sk, SOCK_DONE))
648 ret = -ENOTCONN;
649 break;
650 }
651 if (!timeo) {
652 ret = -EAGAIN;
653 break;
654 }
655 sk_wait_data(sk, &timeo);
656 if (signal_pending(current)) {
657 ret = sock_intr_errno(timeo);
658 break;
659 }
660 continue;
661 }
662 tss.len -= ret;
663 spliced += ret;
664
665 if (!timeo)
666 break;
667 release_sock(sk);
668 lock_sock(sk);
669
670 if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
671 (sk->sk_shutdown & RCV_SHUTDOWN) ||
672 signal_pending(current))
673 break;
674 }
675
676 release_sock(sk);
677
678 if (spliced)
679 return spliced;
680
681 return ret;
682}
683EXPORT_SYMBOL(tcp_splice_read);
684
685struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
686{
687 struct sk_buff *skb;
688
689
690 size = ALIGN(size, 4);
691
692 skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
693 if (skb) {
694 if (sk_wmem_schedule(sk, skb->truesize)) {
695
696
697
698
699 skb_reserve(skb, skb_tailroom(skb) - size);
700 return skb;
701 }
702 __kfree_skb(skb);
703 } else {
704 sk->sk_prot->enter_memory_pressure(sk);
705 sk_stream_moderate_sndbuf(sk);
706 }
707 return NULL;
708}
709
710static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
711 int large_allowed)
712{
713 struct tcp_sock *tp = tcp_sk(sk);
714 u32 xmit_size_goal, old_size_goal;
715
716 xmit_size_goal = mss_now;
717
718 if (large_allowed && sk_can_gso(sk)) {
719 xmit_size_goal = ((sk->sk_gso_max_size - 1) -
720 inet_csk(sk)->icsk_af_ops->net_header_len -
721 inet_csk(sk)->icsk_ext_hdr_len -
722 tp->tcp_header_len);
723
724 xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
725
726
727 old_size_goal = tp->xmit_size_goal_segs * mss_now;
728
729 if (likely(old_size_goal <= xmit_size_goal &&
730 old_size_goal + mss_now > xmit_size_goal)) {
731 xmit_size_goal = old_size_goal;
732 } else {
733 tp->xmit_size_goal_segs = xmit_size_goal / mss_now;
734 xmit_size_goal = tp->xmit_size_goal_segs * mss_now;
735 }
736 }
737
738 return max(xmit_size_goal, mss_now);
739}
740
741static int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
742{
743 int mss_now;
744
745 mss_now = tcp_current_mss(sk);
746 *size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB));
747
748 return mss_now;
749}
750
751static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
752 size_t psize, int flags)
753{
754 struct tcp_sock *tp = tcp_sk(sk);
755 int mss_now, size_goal;
756 int err;
757 ssize_t copied;
758 long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
759
760
761 if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
762 if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
763 goto out_err;
764
765 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
766
767 mss_now = tcp_send_mss(sk, &size_goal, flags);
768 copied = 0;
769
770 err = -EPIPE;
771 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
772 goto out_err;
773
774 while (psize > 0) {
775 struct sk_buff *skb = tcp_write_queue_tail(sk);
776 struct page *page = pages[poffset / PAGE_SIZE];
777 int copy, i, can_coalesce;
778 int offset = poffset % PAGE_SIZE;
779 int size = min_t(size_t, psize, PAGE_SIZE - offset);
780
781 if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) {
782new_segment:
783 if (!sk_stream_memory_free(sk))
784 goto wait_for_sndbuf;
785
786 skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
787 if (!skb)
788 goto wait_for_memory;
789
790 skb_entail(sk, skb);
791 copy = size_goal;
792 }
793
794 if (copy > size)
795 copy = size;
796
797 i = skb_shinfo(skb)->nr_frags;
798 can_coalesce = skb_can_coalesce(skb, i, page, offset);
799 if (!can_coalesce && i >= MAX_SKB_FRAGS) {
800 tcp_mark_push(tp, skb);
801 goto new_segment;
802 }
803 if (!sk_wmem_schedule(sk, copy))
804 goto wait_for_memory;
805
806 if (can_coalesce) {
807 skb_shinfo(skb)->frags[i - 1].size += copy;
808 } else {
809 get_page(page);
810 skb_fill_page_desc(skb, i, page, offset, copy);
811 }
812
813 skb->len += copy;
814 skb->data_len += copy;
815 skb->truesize += copy;
816 sk->sk_wmem_queued += copy;
817 sk_mem_charge(sk, copy);
818 skb->ip_summed = CHECKSUM_PARTIAL;
819 tp->write_seq += copy;
820 TCP_SKB_CB(skb)->end_seq += copy;
821 skb_shinfo(skb)->gso_segs = 0;
822
823 if (!copied)
824 TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH;
825
826 copied += copy;
827 poffset += copy;
828 if (!(psize -= copy))
829 goto out;
830
831 if (skb->len < size_goal || (flags & MSG_OOB))
832 continue;
833
834 if (forced_push(tp)) {
835 tcp_mark_push(tp, skb);
836 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
837 } else if (skb == tcp_send_head(sk))
838 tcp_push_one(sk, mss_now);
839 continue;
840
841wait_for_sndbuf:
842 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
843wait_for_memory:
844 if (copied)
845 tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
846
847 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
848 goto do_error;
849
850 mss_now = tcp_send_mss(sk, &size_goal, flags);
851 }
852
853out:
854 if (copied)
855 tcp_push(sk, flags, mss_now, tp->nonagle);
856 return copied;
857
858do_error:
859 if (copied)
860 goto out;
861out_err:
862 return sk_stream_error(sk, flags, err);
863}
864
865int tcp_sendpage(struct sock *sk, struct page *page, int offset,
866 size_t size, int flags)
867{
868 ssize_t res;
869
870 if (!(sk->sk_route_caps & NETIF_F_SG) ||
871 !(sk->sk_route_caps & NETIF_F_ALL_CSUM))
872 return sock_no_sendpage(sk->sk_socket, page, offset, size,
873 flags);
874
875 lock_sock(sk);
876 TCP_CHECK_TIMER(sk);
877 res = do_tcp_sendpages(sk, &page, offset, size, flags);
878 TCP_CHECK_TIMER(sk);
879 release_sock(sk);
880 return res;
881}
882EXPORT_SYMBOL(tcp_sendpage);
883
884#define TCP_PAGE(sk) (sk->sk_sndmsg_page)
885#define TCP_OFF(sk) (sk->sk_sndmsg_off)
886
887static inline int select_size(struct sock *sk, int sg)
888{
889 struct tcp_sock *tp = tcp_sk(sk);
890 int tmp = tp->mss_cache;
891
892 if (sg) {
893 if (sk_can_gso(sk))
894 tmp = 0;
895 else {
896 int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
897
898 if (tmp >= pgbreak &&
899 tmp <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE)
900 tmp = pgbreak;
901 }
902 }
903
904 return tmp;
905}
906
907int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
908 size_t size)
909{
910 struct iovec *iov;
911 struct tcp_sock *tp = tcp_sk(sk);
912 struct sk_buff *skb;
913 int iovlen, flags;
914 int mss_now, size_goal;
915 int sg, err, copied;
916 long timeo;
917
918 lock_sock(sk);
919 TCP_CHECK_TIMER(sk);
920
921 flags = msg->msg_flags;
922 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
923
924
925 if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
926 if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
927 goto out_err;
928
929
930 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
931
932 mss_now = tcp_send_mss(sk, &size_goal, flags);
933
934
935 iovlen = msg->msg_iovlen;
936 iov = msg->msg_iov;
937 copied = 0;
938
939 err = -EPIPE;
940 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
941 goto out_err;
942
943 sg = sk->sk_route_caps & NETIF_F_SG;
944
945 while (--iovlen >= 0) {
946 size_t seglen = iov->iov_len;
947 unsigned char __user *from = iov->iov_base;
948
949 iov++;
950
951 while (seglen > 0) {
952 int copy = 0;
953 int max = size_goal;
954
955 skb = tcp_write_queue_tail(sk);
956 if (tcp_send_head(sk)) {
957 if (skb->ip_summed == CHECKSUM_NONE)
958 max = mss_now;
959 copy = max - skb->len;
960 }
961
962 if (copy <= 0) {
963new_segment:
964
965
966
967 if (!sk_stream_memory_free(sk))
968 goto wait_for_sndbuf;
969
970 skb = sk_stream_alloc_skb(sk,
971 select_size(sk, sg),
972 sk->sk_allocation);
973 if (!skb)
974 goto wait_for_memory;
975
976
977
978
979 if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
980 skb->ip_summed = CHECKSUM_PARTIAL;
981
982 skb_entail(sk, skb);
983 copy = size_goal;
984 max = size_goal;
985 }
986
987
988 if (copy > seglen)
989 copy = seglen;
990
991
992 if (skb_tailroom(skb) > 0) {
993
994 if (copy > skb_tailroom(skb))
995 copy = skb_tailroom(skb);
996 if ((err = skb_add_data(skb, from, copy)) != 0)
997 goto do_fault;
998 } else {
999 int merge = 0;
1000 int i = skb_shinfo(skb)->nr_frags;
1001 struct page *page = TCP_PAGE(sk);
1002 int off = TCP_OFF(sk);
1003
1004 if (skb_can_coalesce(skb, i, page, off) &&
1005 off != PAGE_SIZE) {
1006
1007
1008 merge = 1;
1009 } else if (i == MAX_SKB_FRAGS || !sg) {
1010
1011
1012
1013
1014 tcp_mark_push(tp, skb);
1015 goto new_segment;
1016 } else if (page) {
1017 if (off == PAGE_SIZE) {
1018 put_page(page);
1019 TCP_PAGE(sk) = page = NULL;
1020 off = 0;
1021 }
1022 } else
1023 off = 0;
1024
1025 if (copy > PAGE_SIZE - off)
1026 copy = PAGE_SIZE - off;
1027
1028 if (!sk_wmem_schedule(sk, copy))
1029 goto wait_for_memory;
1030
1031 if (!page) {
1032
1033 if (!(page = sk_stream_alloc_page(sk)))
1034 goto wait_for_memory;
1035 }
1036
1037
1038
1039 err = skb_copy_to_page(sk, from, skb, page,
1040 off, copy);
1041 if (err) {
1042
1043
1044
1045 if (!TCP_PAGE(sk)) {
1046 TCP_PAGE(sk) = page;
1047 TCP_OFF(sk) = 0;
1048 }
1049 goto do_error;
1050 }
1051
1052
1053 if (merge) {
1054 skb_shinfo(skb)->frags[i - 1].size +=
1055 copy;
1056 } else {
1057 skb_fill_page_desc(skb, i, page, off, copy);
1058 if (TCP_PAGE(sk)) {
1059 get_page(page);
1060 } else if (off + copy < PAGE_SIZE) {
1061 get_page(page);
1062 TCP_PAGE(sk) = page;
1063 }
1064 }
1065
1066 TCP_OFF(sk) = off + copy;
1067 }
1068
1069 if (!copied)
1070 TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH;
1071
1072 tp->write_seq += copy;
1073 TCP_SKB_CB(skb)->end_seq += copy;
1074 skb_shinfo(skb)->gso_segs = 0;
1075
1076 from += copy;
1077 copied += copy;
1078 if ((seglen -= copy) == 0 && iovlen == 0)
1079 goto out;
1080
1081 if (skb->len < max || (flags & MSG_OOB))
1082 continue;
1083
1084 if (forced_push(tp)) {
1085 tcp_mark_push(tp, skb);
1086 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
1087 } else if (skb == tcp_send_head(sk))
1088 tcp_push_one(sk, mss_now);
1089 continue;
1090
1091wait_for_sndbuf:
1092 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1093wait_for_memory:
1094 if (copied)
1095 tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
1096
1097 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
1098 goto do_error;
1099
1100 mss_now = tcp_send_mss(sk, &size_goal, flags);
1101 }
1102 }
1103
1104out:
1105 if (copied)
1106 tcp_push(sk, flags, mss_now, tp->nonagle);
1107 TCP_CHECK_TIMER(sk);
1108 release_sock(sk);
1109 return copied;
1110
1111do_fault:
1112 if (!skb->len) {
1113 tcp_unlink_write_queue(skb, sk);
1114
1115
1116
1117 tcp_check_send_head(sk, skb);
1118 sk_wmem_free_skb(sk, skb);
1119 }
1120
1121do_error:
1122 if (copied)
1123 goto out;
1124out_err:
1125 err = sk_stream_error(sk, flags, err);
1126 TCP_CHECK_TIMER(sk);
1127 release_sock(sk);
1128 return err;
1129}
1130EXPORT_SYMBOL(tcp_sendmsg);
1131
1132
1133
1134
1135
1136
1137static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags)
1138{
1139 struct tcp_sock *tp = tcp_sk(sk);
1140
1141
1142 if (sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data ||
1143 tp->urg_data == TCP_URG_READ)
1144 return -EINVAL;
1145
1146 if (sk->sk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DONE))
1147 return -ENOTCONN;
1148
1149 if (tp->urg_data & TCP_URG_VALID) {
1150 int err = 0;
1151 char c = tp->urg_data;
1152
1153 if (!(flags & MSG_PEEK))
1154 tp->urg_data = TCP_URG_READ;
1155
1156
1157 msg->msg_flags |= MSG_OOB;
1158
1159 if (len > 0) {
1160 if (!(flags & MSG_TRUNC))
1161 err = memcpy_toiovec(msg->msg_iov, &c, 1);
1162 len = 1;
1163 } else
1164 msg->msg_flags |= MSG_TRUNC;
1165
1166 return err ? -EFAULT : len;
1167 }
1168
1169 if (sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN))
1170 return 0;
1171
1172
1173
1174
1175
1176
1177
1178 return -EAGAIN;
1179}
1180
1181
1182
1183
1184
1185
1186
1187void tcp_cleanup_rbuf(struct sock *sk, int copied)
1188{
1189 struct tcp_sock *tp = tcp_sk(sk);
1190 int time_to_ack = 0;
1191
1192#if TCP_DEBUG
1193 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
1194
1195 WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq),
1196 "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n",
1197 tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt);
1198#endif
1199
1200 if (inet_csk_ack_scheduled(sk)) {
1201 const struct inet_connection_sock *icsk = inet_csk(sk);
1202
1203
1204 if (icsk->icsk_ack.blocked ||
1205
1206 tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss ||
1207
1208
1209
1210
1211
1212
1213 (copied > 0 &&
1214 ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) ||
1215 ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
1216 !icsk->icsk_ack.pingpong)) &&
1217 !atomic_read(&sk->sk_rmem_alloc)))
1218 time_to_ack = 1;
1219 }
1220
1221
1222
1223
1224
1225
1226
1227 if (copied > 0 && !time_to_ack && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
1228 __u32 rcv_window_now = tcp_receive_window(tp);
1229
1230
1231 if (2*rcv_window_now <= tp->window_clamp) {
1232 __u32 new_window = __tcp_select_window(sk);
1233
1234
1235
1236
1237
1238
1239 if (new_window && new_window >= 2 * rcv_window_now)
1240 time_to_ack = 1;
1241 }
1242 }
1243 if (time_to_ack)
1244 tcp_send_ack(sk);
1245}
1246
1247static void tcp_prequeue_process(struct sock *sk)
1248{
1249 struct sk_buff *skb;
1250 struct tcp_sock *tp = tcp_sk(sk);
1251
1252 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
1253
1254
1255
1256 local_bh_disable();
1257 while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
1258 sk_backlog_rcv(sk, skb);
1259 local_bh_enable();
1260
1261
1262 tp->ucopy.memory = 0;
1263}
1264
1265#ifdef CONFIG_NET_DMA
1266static void tcp_service_net_dma(struct sock *sk, bool wait)
1267{
1268 dma_cookie_t done, used;
1269 dma_cookie_t last_issued;
1270 struct tcp_sock *tp = tcp_sk(sk);
1271
1272 if (!tp->ucopy.dma_chan)
1273 return;
1274
1275 last_issued = tp->ucopy.dma_cookie;
1276 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
1277
1278 do {
1279 if (dma_async_memcpy_complete(tp->ucopy.dma_chan,
1280 last_issued, &done,
1281 &used) == DMA_SUCCESS) {
1282
1283 __skb_queue_purge(&sk->sk_async_wait_queue);
1284 break;
1285 } else {
1286 struct sk_buff *skb;
1287 while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
1288 (dma_async_is_complete(skb->dma_cookie, done,
1289 used) == DMA_SUCCESS)) {
1290 __skb_dequeue(&sk->sk_async_wait_queue);
1291 kfree_skb(skb);
1292 }
1293 }
1294 } while (wait);
1295}
1296#endif
1297
1298static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
1299{
1300 struct sk_buff *skb;
1301 u32 offset;
1302
1303 skb_queue_walk(&sk->sk_receive_queue, skb) {
1304 offset = seq - TCP_SKB_CB(skb)->seq;
1305 if (tcp_hdr(skb)->syn)
1306 offset--;
1307 if (offset < skb->len || tcp_hdr(skb)->fin) {
1308 *off = offset;
1309 return skb;
1310 }
1311 }
1312 return NULL;
1313}
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1327 sk_read_actor_t recv_actor)
1328{
1329 struct sk_buff *skb;
1330 struct tcp_sock *tp = tcp_sk(sk);
1331 u32 seq = tp->copied_seq;
1332 u32 offset;
1333 int copied = 0;
1334
1335 if (sk->sk_state == TCP_LISTEN)
1336 return -ENOTCONN;
1337 while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
1338 if (offset < skb->len) {
1339 int used;
1340 size_t len;
1341
1342 len = skb->len - offset;
1343
1344 if (tp->urg_data) {
1345 u32 urg_offset = tp->urg_seq - seq;
1346 if (urg_offset < len)
1347 len = urg_offset;
1348 if (!len)
1349 break;
1350 }
1351 used = recv_actor(desc, skb, offset, len);
1352 if (used < 0) {
1353 if (!copied)
1354 copied = used;
1355 break;
1356 } else if (used <= len) {
1357 seq += used;
1358 copied += used;
1359 offset += used;
1360 }
1361
1362
1363
1364
1365
1366
1367 skb = tcp_recv_skb(sk, seq-1, &offset);
1368 if (!skb || (offset+1 != skb->len))
1369 break;
1370 }
1371 if (tcp_hdr(skb)->fin) {
1372 sk_eat_skb(sk, skb, 0);
1373 ++seq;
1374 break;
1375 }
1376 sk_eat_skb(sk, skb, 0);
1377 if (!desc->count)
1378 break;
1379 tp->copied_seq = seq;
1380 }
1381 tp->copied_seq = seq;
1382
1383 tcp_rcv_space_adjust(sk);
1384
1385
1386 if (copied > 0)
1387 tcp_cleanup_rbuf(sk, copied);
1388 return copied;
1389}
1390EXPORT_SYMBOL(tcp_read_sock);
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1401 size_t len, int nonblock, int flags, int *addr_len)
1402{
1403 struct tcp_sock *tp = tcp_sk(sk);
1404 int copied = 0;
1405 u32 peek_seq;
1406 u32 *seq;
1407 unsigned long used;
1408 int err;
1409 int target;
1410 long timeo;
1411 struct task_struct *user_recv = NULL;
1412 int copied_early = 0;
1413 struct sk_buff *skb;
1414 u32 urg_hole = 0;
1415
1416 lock_sock(sk);
1417
1418 TCP_CHECK_TIMER(sk);
1419
1420 err = -ENOTCONN;
1421 if (sk->sk_state == TCP_LISTEN)
1422 goto out;
1423
1424 timeo = sock_rcvtimeo(sk, nonblock);
1425
1426
1427 if (flags & MSG_OOB)
1428 goto recv_urg;
1429
1430 seq = &tp->copied_seq;
1431 if (flags & MSG_PEEK) {
1432 peek_seq = tp->copied_seq;
1433 seq = &peek_seq;
1434 }
1435
1436 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1437
1438#ifdef CONFIG_NET_DMA
1439 tp->ucopy.dma_chan = NULL;
1440 preempt_disable();
1441 skb = skb_peek_tail(&sk->sk_receive_queue);
1442 {
1443 int available = 0;
1444
1445 if (skb)
1446 available = TCP_SKB_CB(skb)->seq + skb->len - (*seq);
1447 if ((available < target) &&
1448 (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
1449 !sysctl_tcp_low_latency &&
1450 dma_find_channel(DMA_MEMCPY)) {
1451 preempt_enable_no_resched();
1452 tp->ucopy.pinned_list =
1453 dma_pin_iovec_pages(msg->msg_iov, len);
1454 } else {
1455 preempt_enable_no_resched();
1456 }
1457 }
1458#endif
1459
1460 do {
1461 u32 offset;
1462
1463
1464 if (tp->urg_data && tp->urg_seq == *seq) {
1465 if (copied)
1466 break;
1467 if (signal_pending(current)) {
1468 copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
1469 break;
1470 }
1471 }
1472
1473
1474
1475 skb_queue_walk(&sk->sk_receive_queue, skb) {
1476
1477
1478
1479 if (WARN(before(*seq, TCP_SKB_CB(skb)->seq),
1480 "recvmsg bug: copied %X seq %X rcvnxt %X fl %X\n",
1481 *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
1482 flags))
1483 break;
1484
1485 offset = *seq - TCP_SKB_CB(skb)->seq;
1486 if (tcp_hdr(skb)->syn)
1487 offset--;
1488 if (offset < skb->len)
1489 goto found_ok_skb;
1490 if (tcp_hdr(skb)->fin)
1491 goto found_fin_ok;
1492 WARN(!(flags & MSG_PEEK),
1493 "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n",
1494 *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags);
1495 }
1496
1497
1498
1499 if (copied >= target && !sk->sk_backlog.tail)
1500 break;
1501
1502 if (copied) {
1503 if (sk->sk_err ||
1504 sk->sk_state == TCP_CLOSE ||
1505 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1506 !timeo ||
1507 signal_pending(current))
1508 break;
1509 } else {
1510 if (sock_flag(sk, SOCK_DONE))
1511 break;
1512
1513 if (sk->sk_err) {
1514 copied = sock_error(sk);
1515 break;
1516 }
1517
1518 if (sk->sk_shutdown & RCV_SHUTDOWN)
1519 break;
1520
1521 if (sk->sk_state == TCP_CLOSE) {
1522 if (!sock_flag(sk, SOCK_DONE)) {
1523
1524
1525
1526 copied = -ENOTCONN;
1527 break;
1528 }
1529 break;
1530 }
1531
1532 if (!timeo) {
1533 copied = -EAGAIN;
1534 break;
1535 }
1536
1537 if (signal_pending(current)) {
1538 copied = sock_intr_errno(timeo);
1539 break;
1540 }
1541 }
1542
1543 tcp_cleanup_rbuf(sk, copied);
1544
1545 if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) {
1546
1547 if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) {
1548 user_recv = current;
1549 tp->ucopy.task = user_recv;
1550 tp->ucopy.iov = msg->msg_iov;
1551 }
1552
1553 tp->ucopy.len = len;
1554
1555 WARN_ON(tp->copied_seq != tp->rcv_nxt &&
1556 !(flags & (MSG_PEEK | MSG_TRUNC)));
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584 if (!skb_queue_empty(&tp->ucopy.prequeue))
1585 goto do_prequeue;
1586
1587
1588 }
1589
1590#ifdef CONFIG_NET_DMA
1591 if (tp->ucopy.dma_chan)
1592 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
1593#endif
1594 if (copied >= target) {
1595
1596 release_sock(sk);
1597 lock_sock(sk);
1598 } else
1599 sk_wait_data(sk, &timeo);
1600
1601#ifdef CONFIG_NET_DMA
1602 tcp_service_net_dma(sk, false);
1603 tp->ucopy.wakeup = 0;
1604#endif
1605
1606 if (user_recv) {
1607 int chunk;
1608
1609
1610
1611 if ((chunk = len - tp->ucopy.len) != 0) {
1612 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
1613 len -= chunk;
1614 copied += chunk;
1615 }
1616
1617 if (tp->rcv_nxt == tp->copied_seq &&
1618 !skb_queue_empty(&tp->ucopy.prequeue)) {
1619do_prequeue:
1620 tcp_prequeue_process(sk);
1621
1622 if ((chunk = len - tp->ucopy.len) != 0) {
1623 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
1624 len -= chunk;
1625 copied += chunk;
1626 }
1627 }
1628 }
1629 if ((flags & MSG_PEEK) &&
1630 (peek_seq - copied - urg_hole != tp->copied_seq)) {
1631 if (net_ratelimit())
1632 printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n",
1633 current->comm, task_pid_nr(current));
1634 peek_seq = tp->copied_seq;
1635 }
1636 continue;
1637
1638 found_ok_skb:
1639
1640 used = skb->len - offset;
1641 if (len < used)
1642 used = len;
1643
1644
1645 if (tp->urg_data) {
1646 u32 urg_offset = tp->urg_seq - *seq;
1647 if (urg_offset < used) {
1648 if (!urg_offset) {
1649 if (!sock_flag(sk, SOCK_URGINLINE)) {
1650 ++*seq;
1651 urg_hole++;
1652 offset++;
1653 used--;
1654 if (!used)
1655 goto skip_copy;
1656 }
1657 } else
1658 used = urg_offset;
1659 }
1660 }
1661
1662 if (!(flags & MSG_TRUNC)) {
1663#ifdef CONFIG_NET_DMA
1664 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1665 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1666
1667 if (tp->ucopy.dma_chan) {
1668 tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec(
1669 tp->ucopy.dma_chan, skb, offset,
1670 msg->msg_iov, used,
1671 tp->ucopy.pinned_list);
1672
1673 if (tp->ucopy.dma_cookie < 0) {
1674
1675 printk(KERN_ALERT "dma_cookie < 0\n");
1676
1677
1678 if (!copied)
1679 copied = -EFAULT;
1680 break;
1681 }
1682
1683 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
1684
1685 if ((offset + used) == skb->len)
1686 copied_early = 1;
1687
1688 } else
1689#endif
1690 {
1691 err = skb_copy_datagram_iovec(skb, offset,
1692 msg->msg_iov, used);
1693 if (err) {
1694
1695 if (!copied)
1696 copied = -EFAULT;
1697 break;
1698 }
1699 }
1700 }
1701
1702 *seq += used;
1703 copied += used;
1704 len -= used;
1705
1706 tcp_rcv_space_adjust(sk);
1707
1708skip_copy:
1709 if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) {
1710 tp->urg_data = 0;
1711 tcp_fast_path_check(sk);
1712 }
1713 if (used + offset < skb->len)
1714 continue;
1715
1716 if (tcp_hdr(skb)->fin)
1717 goto found_fin_ok;
1718 if (!(flags & MSG_PEEK)) {
1719 sk_eat_skb(sk, skb, copied_early);
1720 copied_early = 0;
1721 }
1722 continue;
1723
1724 found_fin_ok:
1725
1726 ++*seq;
1727 if (!(flags & MSG_PEEK)) {
1728 sk_eat_skb(sk, skb, copied_early);
1729 copied_early = 0;
1730 }
1731 break;
1732 } while (len > 0);
1733
1734 if (user_recv) {
1735 if (!skb_queue_empty(&tp->ucopy.prequeue)) {
1736 int chunk;
1737
1738 tp->ucopy.len = copied > 0 ? len : 0;
1739
1740 tcp_prequeue_process(sk);
1741
1742 if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
1743 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
1744 len -= chunk;
1745 copied += chunk;
1746 }
1747 }
1748
1749 tp->ucopy.task = NULL;
1750 tp->ucopy.len = 0;
1751 }
1752
1753#ifdef CONFIG_NET_DMA
1754 tcp_service_net_dma(sk, true);
1755 tp->ucopy.dma_chan = NULL;
1756
1757 if (tp->ucopy.pinned_list) {
1758 dma_unpin_iovec_pages(tp->ucopy.pinned_list);
1759 tp->ucopy.pinned_list = NULL;
1760 }
1761#endif
1762
1763
1764
1765
1766
1767
1768 tcp_cleanup_rbuf(sk, copied);
1769
1770 TCP_CHECK_TIMER(sk);
1771 release_sock(sk);
1772 return copied;
1773
1774out:
1775 TCP_CHECK_TIMER(sk);
1776 release_sock(sk);
1777 return err;
1778
1779recv_urg:
1780 err = tcp_recv_urg(sk, msg, len, flags);
1781 goto out;
1782}
1783EXPORT_SYMBOL(tcp_recvmsg);
1784
1785void tcp_set_state(struct sock *sk, int state)
1786{
1787 int oldstate = sk->sk_state;
1788
1789 switch (state) {
1790 case TCP_ESTABLISHED:
1791 if (oldstate != TCP_ESTABLISHED)
1792 TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
1793 break;
1794
1795 case TCP_CLOSE:
1796 if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
1797 TCP_INC_STATS(sock_net(sk), TCP_MIB_ESTABRESETS);
1798
1799 sk->sk_prot->unhash(sk);
1800 if (inet_csk(sk)->icsk_bind_hash &&
1801 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
1802 inet_put_port(sk);
1803
1804 default:
1805 if (oldstate == TCP_ESTABLISHED)
1806 TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
1807 }
1808
1809
1810
1811
1812 sk->sk_state = state;
1813
1814#ifdef STATE_TRACE
1815 SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
1816#endif
1817}
1818EXPORT_SYMBOL_GPL(tcp_set_state);
1819
1820
1821
1822
1823
1824
1825
1826
1827static const unsigned char new_state[16] = {
1828
1829 TCP_CLOSE,
1830 TCP_FIN_WAIT1 | TCP_ACTION_FIN,
1831 TCP_CLOSE,
1832 TCP_FIN_WAIT1 | TCP_ACTION_FIN,
1833 TCP_FIN_WAIT1,
1834 TCP_FIN_WAIT2,
1835 TCP_CLOSE,
1836 TCP_CLOSE,
1837 TCP_LAST_ACK | TCP_ACTION_FIN,
1838 TCP_LAST_ACK,
1839 TCP_CLOSE,
1840 TCP_CLOSING,
1841};
1842
1843static int tcp_close_state(struct sock *sk)
1844{
1845 int next = (int)new_state[sk->sk_state];
1846 int ns = next & TCP_STATE_MASK;
1847
1848 tcp_set_state(sk, ns);
1849
1850 return next & TCP_ACTION_FIN;
1851}
1852
1853
1854
1855
1856
1857
1858void tcp_shutdown(struct sock *sk, int how)
1859{
1860
1861
1862
1863
1864 if (!(how & SEND_SHUTDOWN))
1865 return;
1866
1867
1868 if ((1 << sk->sk_state) &
1869 (TCPF_ESTABLISHED | TCPF_SYN_SENT |
1870 TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
1871
1872 if (tcp_close_state(sk))
1873 tcp_send_fin(sk);
1874 }
1875}
1876EXPORT_SYMBOL(tcp_shutdown);
1877
1878void tcp_close(struct sock *sk, long timeout)
1879{
1880 struct sk_buff *skb;
1881 int data_was_unread = 0;
1882 int state;
1883
1884 lock_sock(sk);
1885 sk->sk_shutdown = SHUTDOWN_MASK;
1886
1887 if (sk->sk_state == TCP_LISTEN) {
1888 tcp_set_state(sk, TCP_CLOSE);
1889
1890
1891 inet_csk_listen_stop(sk);
1892
1893 goto adjudge_to_death;
1894 }
1895
1896
1897
1898
1899
1900 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1901 u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq -
1902 tcp_hdr(skb)->fin;
1903 data_was_unread += len;
1904 __kfree_skb(skb);
1905 }
1906
1907 sk_mem_reclaim(sk);
1908
1909
1910 if (sk->sk_state == TCP_CLOSE)
1911 goto adjudge_to_death;
1912
1913
1914
1915
1916
1917
1918
1919
1920 if (data_was_unread) {
1921
1922 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
1923 tcp_set_state(sk, TCP_CLOSE);
1924 tcp_send_active_reset(sk, sk->sk_allocation);
1925 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1926
1927 sk->sk_prot->disconnect(sk, 0);
1928 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
1929 } else if (tcp_close_state(sk)) {
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955 tcp_send_fin(sk);
1956 }
1957
1958 sk_stream_wait_close(sk, timeout);
1959
1960adjudge_to_death:
1961 state = sk->sk_state;
1962 sock_hold(sk);
1963 sock_orphan(sk);
1964
1965
1966 release_sock(sk);
1967
1968
1969
1970
1971
1972 local_bh_disable();
1973 bh_lock_sock(sk);
1974 WARN_ON(sock_owned_by_user(sk));
1975
1976 percpu_counter_inc(sk->sk_prot->orphan_count);
1977
1978
1979 if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
1980 goto out;
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996 if (sk->sk_state == TCP_FIN_WAIT2) {
1997 struct tcp_sock *tp = tcp_sk(sk);
1998 if (tp->linger2 < 0) {
1999 tcp_set_state(sk, TCP_CLOSE);
2000 tcp_send_active_reset(sk, GFP_ATOMIC);
2001 NET_INC_STATS_BH(sock_net(sk),
2002 LINUX_MIB_TCPABORTONLINGER);
2003 } else {
2004 const int tmo = tcp_fin_time(sk);
2005
2006 if (tmo > TCP_TIMEWAIT_LEN) {
2007 inet_csk_reset_keepalive_timer(sk,
2008 tmo - TCP_TIMEWAIT_LEN);
2009 } else {
2010 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
2011 goto out;
2012 }
2013 }
2014 }
2015 if (sk->sk_state != TCP_CLOSE) {
2016 sk_mem_reclaim(sk);
2017 if (tcp_too_many_orphans(sk, 0)) {
2018 if (net_ratelimit())
2019 printk(KERN_INFO "TCP: too many of orphaned "
2020 "sockets\n");
2021 tcp_set_state(sk, TCP_CLOSE);
2022 tcp_send_active_reset(sk, GFP_ATOMIC);
2023 NET_INC_STATS_BH(sock_net(sk),
2024 LINUX_MIB_TCPABORTONMEMORY);
2025 }
2026 }
2027
2028 if (sk->sk_state == TCP_CLOSE)
2029 inet_csk_destroy_sock(sk);
2030
2031
2032out:
2033 bh_unlock_sock(sk);
2034 local_bh_enable();
2035 sock_put(sk);
2036}
2037EXPORT_SYMBOL(tcp_close);
2038
2039
2040
2041static inline int tcp_need_reset(int state)
2042{
2043 return (1 << state) &
2044 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
2045 TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
2046}
2047
2048int tcp_disconnect(struct sock *sk, int flags)
2049{
2050 struct inet_sock *inet = inet_sk(sk);
2051 struct inet_connection_sock *icsk = inet_csk(sk);
2052 struct tcp_sock *tp = tcp_sk(sk);
2053 int err = 0;
2054 int old_state = sk->sk_state;
2055
2056 if (old_state != TCP_CLOSE)
2057 tcp_set_state(sk, TCP_CLOSE);
2058
2059
2060 if (old_state == TCP_LISTEN) {
2061 inet_csk_listen_stop(sk);
2062 } else if (tcp_need_reset(old_state) ||
2063 (tp->snd_nxt != tp->write_seq &&
2064 (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
2065
2066
2067
2068 tcp_send_active_reset(sk, gfp_any());
2069 sk->sk_err = ECONNRESET;
2070 } else if (old_state == TCP_SYN_SENT)
2071 sk->sk_err = ECONNRESET;
2072
2073 tcp_clear_xmit_timers(sk);
2074 __skb_queue_purge(&sk->sk_receive_queue);
2075 tcp_write_queue_purge(sk);
2076 __skb_queue_purge(&tp->out_of_order_queue);
2077#ifdef CONFIG_NET_DMA
2078 __skb_queue_purge(&sk->sk_async_wait_queue);
2079#endif
2080
2081 inet->inet_dport = 0;
2082
2083 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
2084 inet_reset_saddr(sk);
2085
2086 sk->sk_shutdown = 0;
2087 sock_reset_flag(sk, SOCK_DONE);
2088 tp->srtt = 0;
2089 if ((tp->write_seq += tp->max_window + 2) == 0)
2090 tp->write_seq = 1;
2091 icsk->icsk_backoff = 0;
2092 tp->snd_cwnd = 2;
2093 icsk->icsk_probes_out = 0;
2094 tp->packets_out = 0;
2095 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
2096 tp->snd_cwnd_cnt = 0;
2097 tp->bytes_acked = 0;
2098 tp->window_clamp = 0;
2099 tcp_set_ca_state(sk, TCP_CA_Open);
2100 tcp_clear_retrans(tp);
2101 inet_csk_delack_init(sk);
2102 tcp_init_send_head(sk);
2103 memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
2104 __sk_dst_reset(sk);
2105
2106 WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
2107
2108 sk->sk_error_report(sk);
2109 return err;
2110}
2111EXPORT_SYMBOL(tcp_disconnect);
2112
2113
2114
2115
2116static int do_tcp_setsockopt(struct sock *sk, int level,
2117 int optname, char __user *optval, unsigned int optlen)
2118{
2119 struct tcp_sock *tp = tcp_sk(sk);
2120 struct inet_connection_sock *icsk = inet_csk(sk);
2121 int val;
2122 int err = 0;
2123
2124
2125 switch (optname) {
2126 case TCP_CONGESTION: {
2127 char name[TCP_CA_NAME_MAX];
2128
2129 if (optlen < 1)
2130 return -EINVAL;
2131
2132 val = strncpy_from_user(name, optval,
2133 min_t(long, TCP_CA_NAME_MAX-1, optlen));
2134 if (val < 0)
2135 return -EFAULT;
2136 name[val] = 0;
2137
2138 lock_sock(sk);
2139 err = tcp_set_congestion_control(sk, name);
2140 release_sock(sk);
2141 return err;
2142 }
2143 case TCP_COOKIE_TRANSACTIONS: {
2144 struct tcp_cookie_transactions ctd;
2145 struct tcp_cookie_values *cvp = NULL;
2146
2147 if (sizeof(ctd) > optlen)
2148 return -EINVAL;
2149 if (copy_from_user(&ctd, optval, sizeof(ctd)))
2150 return -EFAULT;
2151
2152 if (ctd.tcpct_used > sizeof(ctd.tcpct_value) ||
2153 ctd.tcpct_s_data_desired > TCP_MSS_DESIRED)
2154 return -EINVAL;
2155
2156 if (ctd.tcpct_cookie_desired == 0) {
2157
2158 } else if ((0x1 & ctd.tcpct_cookie_desired) ||
2159 ctd.tcpct_cookie_desired > TCP_COOKIE_MAX ||
2160 ctd.tcpct_cookie_desired < TCP_COOKIE_MIN) {
2161 return -EINVAL;
2162 }
2163
2164 if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) {
2165
2166 lock_sock(sk);
2167 if (tp->cookie_values != NULL) {
2168 kref_put(&tp->cookie_values->kref,
2169 tcp_cookie_values_release);
2170 tp->cookie_values = NULL;
2171 }
2172 tp->rx_opt.cookie_in_always = 0;
2173 tp->rx_opt.cookie_out_never = 1;
2174 release_sock(sk);
2175 return err;
2176 }
2177
2178
2179
2180 if (ctd.tcpct_used > 0 ||
2181 (tp->cookie_values == NULL &&
2182 (sysctl_tcp_cookie_size > 0 ||
2183 ctd.tcpct_cookie_desired > 0 ||
2184 ctd.tcpct_s_data_desired > 0))) {
2185 cvp = kzalloc(sizeof(*cvp) + ctd.tcpct_used,
2186 GFP_KERNEL);
2187 if (cvp == NULL)
2188 return -ENOMEM;
2189
2190 kref_init(&cvp->kref);
2191 }
2192 lock_sock(sk);
2193 tp->rx_opt.cookie_in_always =
2194 (TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags);
2195 tp->rx_opt.cookie_out_never = 0;
2196
2197 if (tp->cookie_values != NULL) {
2198 if (cvp != NULL) {
2199
2200
2201
2202
2203 kref_put(&tp->cookie_values->kref,
2204 tcp_cookie_values_release);
2205 } else {
2206 cvp = tp->cookie_values;
2207 }
2208 }
2209
2210 if (cvp != NULL) {
2211 cvp->cookie_desired = ctd.tcpct_cookie_desired;
2212
2213 if (ctd.tcpct_used > 0) {
2214 memcpy(cvp->s_data_payload, ctd.tcpct_value,
2215 ctd.tcpct_used);
2216 cvp->s_data_desired = ctd.tcpct_used;
2217 cvp->s_data_constant = 1;
2218 } else {
2219
2220 cvp->s_data_desired = ctd.tcpct_s_data_desired;
2221 cvp->s_data_constant = 0;
2222 }
2223
2224 tp->cookie_values = cvp;
2225 }
2226 release_sock(sk);
2227 return err;
2228 }
2229 default:
2230
2231 break;
2232 }
2233
2234 if (optlen < sizeof(int))
2235 return -EINVAL;
2236
2237 if (get_user(val, (int __user *)optval))
2238 return -EFAULT;
2239
2240 lock_sock(sk);
2241
2242 switch (optname) {
2243 case TCP_MAXSEG:
2244
2245
2246
2247 if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) {
2248 err = -EINVAL;
2249 break;
2250 }
2251 tp->rx_opt.user_mss = val;
2252 break;
2253
2254 case TCP_NODELAY:
2255 if (val) {
2256
2257
2258
2259
2260
2261
2262
2263
2264 tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
2265 tcp_push_pending_frames(sk);
2266 } else {
2267 tp->nonagle &= ~TCP_NAGLE_OFF;
2268 }
2269 break;
2270
2271 case TCP_THIN_LINEAR_TIMEOUTS:
2272 if (val < 0 || val > 1)
2273 err = -EINVAL;
2274 else
2275 tp->thin_lto = val;
2276 break;
2277
2278 case TCP_THIN_DUPACK:
2279 if (val < 0 || val > 1)
2280 err = -EINVAL;
2281 else
2282 tp->thin_dupack = val;
2283 break;
2284
2285 case TCP_CORK:
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297 if (val) {
2298 tp->nonagle |= TCP_NAGLE_CORK;
2299 } else {
2300 tp->nonagle &= ~TCP_NAGLE_CORK;
2301 if (tp->nonagle&TCP_NAGLE_OFF)
2302 tp->nonagle |= TCP_NAGLE_PUSH;
2303 tcp_push_pending_frames(sk);
2304 }
2305 break;
2306
2307 case TCP_KEEPIDLE:
2308 if (val < 1 || val > MAX_TCP_KEEPIDLE)
2309 err = -EINVAL;
2310 else {
2311 tp->keepalive_time = val * HZ;
2312 if (sock_flag(sk, SOCK_KEEPOPEN) &&
2313 !((1 << sk->sk_state) &
2314 (TCPF_CLOSE | TCPF_LISTEN))) {
2315 u32 elapsed = keepalive_time_elapsed(tp);
2316 if (tp->keepalive_time > elapsed)
2317 elapsed = tp->keepalive_time - elapsed;
2318 else
2319 elapsed = 0;
2320 inet_csk_reset_keepalive_timer(sk, elapsed);
2321 }
2322 }
2323 break;
2324 case TCP_KEEPINTVL:
2325 if (val < 1 || val > MAX_TCP_KEEPINTVL)
2326 err = -EINVAL;
2327 else
2328 tp->keepalive_intvl = val * HZ;
2329 break;
2330 case TCP_KEEPCNT:
2331 if (val < 1 || val > MAX_TCP_KEEPCNT)
2332 err = -EINVAL;
2333 else
2334 tp->keepalive_probes = val;
2335 break;
2336 case TCP_SYNCNT:
2337 if (val < 1 || val > MAX_TCP_SYNCNT)
2338 err = -EINVAL;
2339 else
2340 icsk->icsk_syn_retries = val;
2341 break;
2342
2343 case TCP_LINGER2:
2344 if (val < 0)
2345 tp->linger2 = -1;
2346 else if (val > sysctl_tcp_fin_timeout / HZ)
2347 tp->linger2 = 0;
2348 else
2349 tp->linger2 = val * HZ;
2350 break;
2351
2352 case TCP_DEFER_ACCEPT:
2353
2354 icsk->icsk_accept_queue.rskq_defer_accept =
2355 secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
2356 TCP_RTO_MAX / HZ);
2357 break;
2358
2359 case TCP_WINDOW_CLAMP:
2360 if (!val) {
2361 if (sk->sk_state != TCP_CLOSE) {
2362 err = -EINVAL;
2363 break;
2364 }
2365 tp->window_clamp = 0;
2366 } else
2367 tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
2368 SOCK_MIN_RCVBUF / 2 : val;
2369 break;
2370
2371 case TCP_QUICKACK:
2372 if (!val) {
2373 icsk->icsk_ack.pingpong = 1;
2374 } else {
2375 icsk->icsk_ack.pingpong = 0;
2376 if ((1 << sk->sk_state) &
2377 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
2378 inet_csk_ack_scheduled(sk)) {
2379 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
2380 tcp_cleanup_rbuf(sk, 1);
2381 if (!(val & 1))
2382 icsk->icsk_ack.pingpong = 1;
2383 }
2384 }
2385 break;
2386
2387#ifdef CONFIG_TCP_MD5SIG
2388 case TCP_MD5SIG:
2389
2390 err = tp->af_specific->md5_parse(sk, optval, optlen);
2391 break;
2392#endif
2393 case TCP_USER_TIMEOUT:
2394
2395
2396
2397 icsk->icsk_user_timeout = msecs_to_jiffies(val);
2398 break;
2399 default:
2400 err = -ENOPROTOOPT;
2401 break;
2402 }
2403
2404 release_sock(sk);
2405 return err;
2406}
2407
2408int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
2409 unsigned int optlen)
2410{
2411 struct inet_connection_sock *icsk = inet_csk(sk);
2412
2413 if (level != SOL_TCP)
2414 return icsk->icsk_af_ops->setsockopt(sk, level, optname,
2415 optval, optlen);
2416 return do_tcp_setsockopt(sk, level, optname, optval, optlen);
2417}
2418EXPORT_SYMBOL(tcp_setsockopt);
2419
2420#ifdef CONFIG_COMPAT
2421int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
2422 char __user *optval, unsigned int optlen)
2423{
2424 if (level != SOL_TCP)
2425 return inet_csk_compat_setsockopt(sk, level, optname,
2426 optval, optlen);
2427 return do_tcp_setsockopt(sk, level, optname, optval, optlen);
2428}
2429EXPORT_SYMBOL(compat_tcp_setsockopt);
2430#endif
2431
2432
2433void tcp_get_info(struct sock *sk, struct tcp_info *info)
2434{
2435 struct tcp_sock *tp = tcp_sk(sk);
2436 const struct inet_connection_sock *icsk = inet_csk(sk);
2437 u32 now = tcp_time_stamp;
2438
2439 memset(info, 0, sizeof(*info));
2440
2441 info->tcpi_state = sk->sk_state;
2442 info->tcpi_ca_state = icsk->icsk_ca_state;
2443 info->tcpi_retransmits = icsk->icsk_retransmits;
2444 info->tcpi_probes = icsk->icsk_probes_out;
2445 info->tcpi_backoff = icsk->icsk_backoff;
2446
2447 if (tp->rx_opt.tstamp_ok)
2448 info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
2449 if (tcp_is_sack(tp))
2450 info->tcpi_options |= TCPI_OPT_SACK;
2451 if (tp->rx_opt.wscale_ok) {
2452 info->tcpi_options |= TCPI_OPT_WSCALE;
2453 info->tcpi_snd_wscale = tp->rx_opt.snd_wscale;
2454 info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale;
2455 }
2456
2457 if (tp->ecn_flags&TCP_ECN_OK)
2458 info->tcpi_options |= TCPI_OPT_ECN;
2459
2460 info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto);
2461 info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato);
2462 info->tcpi_snd_mss = tp->mss_cache;
2463 info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
2464
2465 if (sk->sk_state == TCP_LISTEN) {
2466 info->tcpi_unacked = sk->sk_ack_backlog;
2467 info->tcpi_sacked = sk->sk_max_ack_backlog;
2468 } else {
2469 info->tcpi_unacked = tp->packets_out;
2470 info->tcpi_sacked = tp->sacked_out;
2471 }
2472 info->tcpi_lost = tp->lost_out;
2473 info->tcpi_retrans = tp->retrans_out;
2474 info->tcpi_fackets = tp->fackets_out;
2475
2476 info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
2477 info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime);
2478 info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
2479
2480 info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
2481 info->tcpi_rcv_ssthresh = tp->rcv_ssthresh;
2482 info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3;
2483 info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2;
2484 info->tcpi_snd_ssthresh = tp->snd_ssthresh;
2485 info->tcpi_snd_cwnd = tp->snd_cwnd;
2486 info->tcpi_advmss = tp->advmss;
2487 info->tcpi_reordering = tp->reordering;
2488
2489 info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3;
2490 info->tcpi_rcv_space = tp->rcvq_space.space;
2491
2492 info->tcpi_total_retrans = tp->total_retrans;
2493}
2494EXPORT_SYMBOL_GPL(tcp_get_info);
2495
2496static int do_tcp_getsockopt(struct sock *sk, int level,
2497 int optname, char __user *optval, int __user *optlen)
2498{
2499 struct inet_connection_sock *icsk = inet_csk(sk);
2500 struct tcp_sock *tp = tcp_sk(sk);
2501 int val, len;
2502
2503 if (get_user(len, optlen))
2504 return -EFAULT;
2505
2506 len = min_t(unsigned int, len, sizeof(int));
2507
2508 if (len < 0)
2509 return -EINVAL;
2510
2511 switch (optname) {
2512 case TCP_MAXSEG:
2513 val = tp->mss_cache;
2514 if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
2515 val = tp->rx_opt.user_mss;
2516 break;
2517 case TCP_NODELAY:
2518 val = !!(tp->nonagle&TCP_NAGLE_OFF);
2519 break;
2520 case TCP_CORK:
2521 val = !!(tp->nonagle&TCP_NAGLE_CORK);
2522 break;
2523 case TCP_KEEPIDLE:
2524 val = keepalive_time_when(tp) / HZ;
2525 break;
2526 case TCP_KEEPINTVL:
2527 val = keepalive_intvl_when(tp) / HZ;
2528 break;
2529 case TCP_KEEPCNT:
2530 val = keepalive_probes(tp);
2531 break;
2532 case TCP_SYNCNT:
2533 val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
2534 break;
2535 case TCP_LINGER2:
2536 val = tp->linger2;
2537 if (val >= 0)
2538 val = (val ? : sysctl_tcp_fin_timeout) / HZ;
2539 break;
2540 case TCP_DEFER_ACCEPT:
2541 val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
2542 TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ);
2543 break;
2544 case TCP_WINDOW_CLAMP:
2545 val = tp->window_clamp;
2546 break;
2547 case TCP_INFO: {
2548 struct tcp_info info;
2549
2550 if (get_user(len, optlen))
2551 return -EFAULT;
2552
2553 tcp_get_info(sk, &info);
2554
2555 len = min_t(unsigned int, len, sizeof(info));
2556 if (put_user(len, optlen))
2557 return -EFAULT;
2558 if (copy_to_user(optval, &info, len))
2559 return -EFAULT;
2560 return 0;
2561 }
2562 case TCP_QUICKACK:
2563 val = !icsk->icsk_ack.pingpong;
2564 break;
2565
2566 case TCP_CONGESTION:
2567 if (get_user(len, optlen))
2568 return -EFAULT;
2569 len = min_t(unsigned int, len, TCP_CA_NAME_MAX);
2570 if (put_user(len, optlen))
2571 return -EFAULT;
2572 if (copy_to_user(optval, icsk->icsk_ca_ops->name, len))
2573 return -EFAULT;
2574 return 0;
2575
2576 case TCP_COOKIE_TRANSACTIONS: {
2577 struct tcp_cookie_transactions ctd;
2578 struct tcp_cookie_values *cvp = tp->cookie_values;
2579
2580 if (get_user(len, optlen))
2581 return -EFAULT;
2582 if (len < sizeof(ctd))
2583 return -EINVAL;
2584
2585 memset(&ctd, 0, sizeof(ctd));
2586 ctd.tcpct_flags = (tp->rx_opt.cookie_in_always ?
2587 TCP_COOKIE_IN_ALWAYS : 0)
2588 | (tp->rx_opt.cookie_out_never ?
2589 TCP_COOKIE_OUT_NEVER : 0);
2590
2591 if (cvp != NULL) {
2592 ctd.tcpct_flags |= (cvp->s_data_in ?
2593 TCP_S_DATA_IN : 0)
2594 | (cvp->s_data_out ?
2595 TCP_S_DATA_OUT : 0);
2596
2597 ctd.tcpct_cookie_desired = cvp->cookie_desired;
2598 ctd.tcpct_s_data_desired = cvp->s_data_desired;
2599
2600 memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0],
2601 cvp->cookie_pair_size);
2602 ctd.tcpct_used = cvp->cookie_pair_size;
2603 }
2604
2605 if (put_user(sizeof(ctd), optlen))
2606 return -EFAULT;
2607 if (copy_to_user(optval, &ctd, sizeof(ctd)))
2608 return -EFAULT;
2609 return 0;
2610 }
2611 case TCP_THIN_LINEAR_TIMEOUTS:
2612 val = tp->thin_lto;
2613 break;
2614 case TCP_THIN_DUPACK:
2615 val = tp->thin_dupack;
2616 break;
2617
2618 case TCP_USER_TIMEOUT:
2619 val = jiffies_to_msecs(icsk->icsk_user_timeout);
2620 break;
2621 default:
2622 return -ENOPROTOOPT;
2623 }
2624
2625 if (put_user(len, optlen))
2626 return -EFAULT;
2627 if (copy_to_user(optval, &val, len))
2628 return -EFAULT;
2629 return 0;
2630}
2631
2632int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
2633 int __user *optlen)
2634{
2635 struct inet_connection_sock *icsk = inet_csk(sk);
2636
2637 if (level != SOL_TCP)
2638 return icsk->icsk_af_ops->getsockopt(sk, level, optname,
2639 optval, optlen);
2640 return do_tcp_getsockopt(sk, level, optname, optval, optlen);
2641}
2642EXPORT_SYMBOL(tcp_getsockopt);
2643
2644#ifdef CONFIG_COMPAT
2645int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
2646 char __user *optval, int __user *optlen)
2647{
2648 if (level != SOL_TCP)
2649 return inet_csk_compat_getsockopt(sk, level, optname,
2650 optval, optlen);
2651 return do_tcp_getsockopt(sk, level, optname, optval, optlen);
2652}
2653EXPORT_SYMBOL(compat_tcp_getsockopt);
2654#endif
2655
2656struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
2657{
2658 struct sk_buff *segs = ERR_PTR(-EINVAL);
2659 struct tcphdr *th;
2660 unsigned thlen;
2661 unsigned int seq;
2662 __be32 delta;
2663 unsigned int oldlen;
2664 unsigned int mss;
2665
2666 if (!pskb_may_pull(skb, sizeof(*th)))
2667 goto out;
2668
2669 th = tcp_hdr(skb);
2670 thlen = th->doff * 4;
2671 if (thlen < sizeof(*th))
2672 goto out;
2673
2674 if (!pskb_may_pull(skb, thlen))
2675 goto out;
2676
2677 oldlen = (u16)~skb->len;
2678 __skb_pull(skb, thlen);
2679
2680 mss = skb_shinfo(skb)->gso_size;
2681 if (unlikely(skb->len <= mss))
2682 goto out;
2683
2684 if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
2685
2686 int type = skb_shinfo(skb)->gso_type;
2687
2688 if (unlikely(type &
2689 ~(SKB_GSO_TCPV4 |
2690 SKB_GSO_DODGY |
2691 SKB_GSO_TCP_ECN |
2692 SKB_GSO_TCPV6 |
2693 0) ||
2694 !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
2695 goto out;
2696
2697 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
2698
2699 segs = NULL;
2700 goto out;
2701 }
2702
2703 segs = skb_segment(skb, features);
2704 if (IS_ERR(segs))
2705 goto out;
2706
2707 delta = htonl(oldlen + (thlen + mss));
2708
2709 skb = segs;
2710 th = tcp_hdr(skb);
2711 seq = ntohl(th->seq);
2712
2713 do {
2714 th->fin = th->psh = 0;
2715
2716 th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
2717 (__force u32)delta));
2718 if (skb->ip_summed != CHECKSUM_PARTIAL)
2719 th->check =
2720 csum_fold(csum_partial(skb_transport_header(skb),
2721 thlen, skb->csum));
2722
2723 seq += mss;
2724 skb = skb->next;
2725 th = tcp_hdr(skb);
2726
2727 th->seq = htonl(seq);
2728 th->cwr = 0;
2729 } while (skb->next);
2730
2731 delta = htonl(oldlen + (skb->tail - skb->transport_header) +
2732 skb->data_len);
2733 th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
2734 (__force u32)delta));
2735 if (skb->ip_summed != CHECKSUM_PARTIAL)
2736 th->check = csum_fold(csum_partial(skb_transport_header(skb),
2737 thlen, skb->csum));
2738
2739out:
2740 return segs;
2741}
2742EXPORT_SYMBOL(tcp_tso_segment);
2743
2744struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2745{
2746 struct sk_buff **pp = NULL;
2747 struct sk_buff *p;
2748 struct tcphdr *th;
2749 struct tcphdr *th2;
2750 unsigned int len;
2751 unsigned int thlen;
2752 __be32 flags;
2753 unsigned int mss = 1;
2754 unsigned int hlen;
2755 unsigned int off;
2756 int flush = 1;
2757 int i;
2758
2759 off = skb_gro_offset(skb);
2760 hlen = off + sizeof(*th);
2761 th = skb_gro_header_fast(skb, off);
2762 if (skb_gro_header_hard(skb, hlen)) {
2763 th = skb_gro_header_slow(skb, hlen, off);
2764 if (unlikely(!th))
2765 goto out;
2766 }
2767
2768 thlen = th->doff * 4;
2769 if (thlen < sizeof(*th))
2770 goto out;
2771
2772 hlen = off + thlen;
2773 if (skb_gro_header_hard(skb, hlen)) {
2774 th = skb_gro_header_slow(skb, hlen, off);
2775 if (unlikely(!th))
2776 goto out;
2777 }
2778
2779 skb_gro_pull(skb, thlen);
2780
2781 len = skb_gro_len(skb);
2782 flags = tcp_flag_word(th);
2783
2784 for (; (p = *head); head = &p->next) {
2785 if (!NAPI_GRO_CB(p)->same_flow)
2786 continue;
2787
2788 th2 = tcp_hdr(p);
2789
2790 if (*(u32 *)&th->source ^ *(u32 *)&th2->source) {
2791 NAPI_GRO_CB(p)->same_flow = 0;
2792 continue;
2793 }
2794
2795 goto found;
2796 }
2797
2798 goto out_check_final;
2799
2800found:
2801 flush = NAPI_GRO_CB(p)->flush;
2802 flush |= (__force int)(flags & TCP_FLAG_CWR);
2803 flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
2804 ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));
2805 flush |= (__force int)(th->ack_seq ^ th2->ack_seq);
2806 for (i = sizeof(*th); i < thlen; i += 4)
2807 flush |= *(u32 *)((u8 *)th + i) ^
2808 *(u32 *)((u8 *)th2 + i);
2809
2810 mss = skb_shinfo(p)->gso_size;
2811
2812 flush |= (len - 1) >= mss;
2813 flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
2814
2815 if (flush || skb_gro_receive(head, skb)) {
2816 mss = 1;
2817 goto out_check_final;
2818 }
2819
2820 p = *head;
2821 th2 = tcp_hdr(p);
2822 tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
2823
2824out_check_final:
2825 flush = len < mss;
2826 flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH |
2827 TCP_FLAG_RST | TCP_FLAG_SYN |
2828 TCP_FLAG_FIN));
2829
2830 if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))
2831 pp = head;
2832
2833out:
2834 NAPI_GRO_CB(skb)->flush |= flush;
2835
2836 return pp;
2837}
2838EXPORT_SYMBOL(tcp_gro_receive);
2839
2840int tcp_gro_complete(struct sk_buff *skb)
2841{
2842 struct tcphdr *th = tcp_hdr(skb);
2843
2844 skb->csum_start = skb_transport_header(skb) - skb->head;
2845 skb->csum_offset = offsetof(struct tcphdr, check);
2846 skb->ip_summed = CHECKSUM_PARTIAL;
2847
2848 skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
2849
2850 if (th->cwr)
2851 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
2852
2853 return 0;
2854}
2855EXPORT_SYMBOL(tcp_gro_complete);
2856
2857#ifdef CONFIG_TCP_MD5SIG
2858static unsigned long tcp_md5sig_users;
2859static struct tcp_md5sig_pool * __percpu *tcp_md5sig_pool;
2860static DEFINE_SPINLOCK(tcp_md5sig_pool_lock);
2861
2862static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool)
2863{
2864 int cpu;
2865 for_each_possible_cpu(cpu) {
2866 struct tcp_md5sig_pool *p = *per_cpu_ptr(pool, cpu);
2867 if (p) {
2868 if (p->md5_desc.tfm)
2869 crypto_free_hash(p->md5_desc.tfm);
2870 kfree(p);
2871 }
2872 }
2873 free_percpu(pool);
2874}
2875
2876void tcp_free_md5sig_pool(void)
2877{
2878 struct tcp_md5sig_pool * __percpu *pool = NULL;
2879
2880 spin_lock_bh(&tcp_md5sig_pool_lock);
2881 if (--tcp_md5sig_users == 0) {
2882 pool = tcp_md5sig_pool;
2883 tcp_md5sig_pool = NULL;
2884 }
2885 spin_unlock_bh(&tcp_md5sig_pool_lock);
2886 if (pool)
2887 __tcp_free_md5sig_pool(pool);
2888}
2889EXPORT_SYMBOL(tcp_free_md5sig_pool);
2890
2891static struct tcp_md5sig_pool * __percpu *
2892__tcp_alloc_md5sig_pool(struct sock *sk)
2893{
2894 int cpu;
2895 struct tcp_md5sig_pool * __percpu *pool;
2896
2897 pool = alloc_percpu(struct tcp_md5sig_pool *);
2898 if (!pool)
2899 return NULL;
2900
2901 for_each_possible_cpu(cpu) {
2902 struct tcp_md5sig_pool *p;
2903 struct crypto_hash *hash;
2904
2905 p = kzalloc(sizeof(*p), sk->sk_allocation);
2906 if (!p)
2907 goto out_free;
2908 *per_cpu_ptr(pool, cpu) = p;
2909
2910 hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
2911 if (!hash || IS_ERR(hash))
2912 goto out_free;
2913
2914 p->md5_desc.tfm = hash;
2915 }
2916 return pool;
2917out_free:
2918 __tcp_free_md5sig_pool(pool);
2919 return NULL;
2920}
2921
2922struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *sk)
2923{
2924 struct tcp_md5sig_pool * __percpu *pool;
2925 int alloc = 0;
2926
2927retry:
2928 spin_lock_bh(&tcp_md5sig_pool_lock);
2929 pool = tcp_md5sig_pool;
2930 if (tcp_md5sig_users++ == 0) {
2931 alloc = 1;
2932 spin_unlock_bh(&tcp_md5sig_pool_lock);
2933 } else if (!pool) {
2934 tcp_md5sig_users--;
2935 spin_unlock_bh(&tcp_md5sig_pool_lock);
2936 cpu_relax();
2937 goto retry;
2938 } else
2939 spin_unlock_bh(&tcp_md5sig_pool_lock);
2940
2941 if (alloc) {
2942
2943 struct tcp_md5sig_pool * __percpu *p;
2944
2945 p = __tcp_alloc_md5sig_pool(sk);
2946 spin_lock_bh(&tcp_md5sig_pool_lock);
2947 if (!p) {
2948 tcp_md5sig_users--;
2949 spin_unlock_bh(&tcp_md5sig_pool_lock);
2950 return NULL;
2951 }
2952 pool = tcp_md5sig_pool;
2953 if (pool) {
2954
2955 spin_unlock_bh(&tcp_md5sig_pool_lock);
2956 __tcp_free_md5sig_pool(p);
2957 } else {
2958 tcp_md5sig_pool = pool = p;
2959 spin_unlock_bh(&tcp_md5sig_pool_lock);
2960 }
2961 }
2962 return pool;
2963}
2964EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
2975{
2976 struct tcp_md5sig_pool * __percpu *p;
2977
2978 local_bh_disable();
2979
2980 spin_lock(&tcp_md5sig_pool_lock);
2981 p = tcp_md5sig_pool;
2982 if (p)
2983 tcp_md5sig_users++;
2984 spin_unlock(&tcp_md5sig_pool_lock);
2985
2986 if (p)
2987 return *this_cpu_ptr(p);
2988
2989 local_bh_enable();
2990 return NULL;
2991}
2992EXPORT_SYMBOL(tcp_get_md5sig_pool);
2993
2994void tcp_put_md5sig_pool(void)
2995{
2996 local_bh_enable();
2997 tcp_free_md5sig_pool();
2998}
2999EXPORT_SYMBOL(tcp_put_md5sig_pool);
3000
3001int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
3002 struct tcphdr *th)
3003{
3004 struct scatterlist sg;
3005 int err;
3006
3007 __sum16 old_checksum = th->check;
3008 th->check = 0;
3009
3010 sg_init_one(&sg, th, sizeof(struct tcphdr));
3011 err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(struct tcphdr));
3012 th->check = old_checksum;
3013 return err;
3014}
3015EXPORT_SYMBOL(tcp_md5_hash_header);
3016
3017int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
3018 struct sk_buff *skb, unsigned header_len)
3019{
3020 struct scatterlist sg;
3021 const struct tcphdr *tp = tcp_hdr(skb);
3022 struct hash_desc *desc = &hp->md5_desc;
3023 unsigned i;
3024 const unsigned head_data_len = skb_headlen(skb) > header_len ?
3025 skb_headlen(skb) - header_len : 0;
3026 const struct skb_shared_info *shi = skb_shinfo(skb);
3027 struct sk_buff *frag_iter;
3028
3029 sg_init_table(&sg, 1);
3030
3031 sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len);
3032 if (crypto_hash_update(desc, &sg, head_data_len))
3033 return 1;
3034
3035 for (i = 0; i < shi->nr_frags; ++i) {
3036 const struct skb_frag_struct *f = &shi->frags[i];
3037 sg_set_page(&sg, f->page, f->size, f->page_offset);
3038 if (crypto_hash_update(desc, &sg, f->size))
3039 return 1;
3040 }
3041
3042 skb_walk_frags(skb, frag_iter)
3043 if (tcp_md5_hash_skb_data(hp, frag_iter, 0))
3044 return 1;
3045
3046 return 0;
3047}
3048EXPORT_SYMBOL(tcp_md5_hash_skb_data);
3049
3050int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key)
3051{
3052 struct scatterlist sg;
3053
3054 sg_init_one(&sg, key->key, key->keylen);
3055 return crypto_hash_update(&hp->md5_desc, &sg, key->keylen);
3056}
3057EXPORT_SYMBOL(tcp_md5_hash_key);
3058
3059#endif
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085struct tcp_cookie_secret {
3086
3087
3088
3089
3090
3091 u32 secrets[COOKIE_WORKSPACE_WORDS];
3092 unsigned long expires;
3093};
3094
3095#define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL)
3096#define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2)
3097#define TCP_SECRET_LIFE (HZ * 600)
3098
3099static struct tcp_cookie_secret tcp_secret_one;
3100static struct tcp_cookie_secret tcp_secret_two;
3101
3102
3103static struct tcp_cookie_secret *tcp_secret_generating;
3104static struct tcp_cookie_secret *tcp_secret_primary;
3105static struct tcp_cookie_secret *tcp_secret_retiring;
3106static struct tcp_cookie_secret *tcp_secret_secondary;
3107
3108static DEFINE_SPINLOCK(tcp_secret_locker);
3109
3110
3111
3112static inline u32 tcp_cookie_work(const u32 *ws, const int n)
3113{
3114 return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])];
3115}
3116
3117
3118
3119
3120
3121int tcp_cookie_generator(u32 *bakery)
3122{
3123 unsigned long jiffy = jiffies;
3124
3125 if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) {
3126 spin_lock_bh(&tcp_secret_locker);
3127 if (!time_after_eq(jiffy, tcp_secret_generating->expires)) {
3128
3129 memcpy(bakery,
3130 &tcp_secret_generating->secrets[0],
3131 COOKIE_WORKSPACE_WORDS);
3132 } else {
3133
3134 get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS);
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145 if (unlikely(tcp_secret_primary->expires ==
3146 tcp_secret_secondary->expires)) {
3147 struct timespec tv;
3148
3149 getnstimeofday(&tv);
3150 bakery[COOKIE_DIGEST_WORDS+0] ^=
3151 (u32)tv.tv_nsec;
3152
3153 tcp_secret_secondary->expires = jiffy
3154 + TCP_SECRET_1MSL
3155 + (0x0f & tcp_cookie_work(bakery, 0));
3156 } else {
3157 tcp_secret_secondary->expires = jiffy
3158 + TCP_SECRET_LIFE
3159 + (0xff & tcp_cookie_work(bakery, 1));
3160 tcp_secret_primary->expires = jiffy
3161 + TCP_SECRET_2MSL
3162 + (0x1f & tcp_cookie_work(bakery, 2));
3163 }
3164 memcpy(&tcp_secret_secondary->secrets[0],
3165 bakery, COOKIE_WORKSPACE_WORDS);
3166
3167 rcu_assign_pointer(tcp_secret_generating,
3168 tcp_secret_secondary);
3169 rcu_assign_pointer(tcp_secret_retiring,
3170 tcp_secret_primary);
3171
3172
3173
3174
3175
3176
3177 }
3178 spin_unlock_bh(&tcp_secret_locker);
3179 } else {
3180 rcu_read_lock_bh();
3181 memcpy(bakery,
3182 &rcu_dereference(tcp_secret_generating)->secrets[0],
3183 COOKIE_WORKSPACE_WORDS);
3184 rcu_read_unlock_bh();
3185 }
3186 return 0;
3187}
3188EXPORT_SYMBOL(tcp_cookie_generator);
3189
3190void tcp_done(struct sock *sk)
3191{
3192 if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
3193 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
3194
3195 tcp_set_state(sk, TCP_CLOSE);
3196 tcp_clear_xmit_timers(sk);
3197
3198 sk->sk_shutdown = SHUTDOWN_MASK;
3199
3200 if (!sock_flag(sk, SOCK_DEAD))
3201 sk->sk_state_change(sk);
3202 else
3203 inet_csk_destroy_sock(sk);
3204}
3205EXPORT_SYMBOL_GPL(tcp_done);
3206
3207extern struct tcp_congestion_ops tcp_reno;
3208
3209static __initdata unsigned long thash_entries;
3210static int __init set_thash_entries(char *str)
3211{
3212 if (!str)
3213 return 0;
3214 thash_entries = simple_strtoul(str, &str, 0);
3215 return 1;
3216}
3217__setup("thash_entries=", set_thash_entries);
3218
3219void __init tcp_init(void)
3220{
3221 struct sk_buff *skb = NULL;
3222 unsigned long nr_pages, limit;
3223 int i, max_share, cnt;
3224 unsigned long jiffy = jiffies;
3225
3226 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
3227
3228 percpu_counter_init(&tcp_sockets_allocated, 0);
3229 percpu_counter_init(&tcp_orphan_count, 0);
3230 tcp_hashinfo.bind_bucket_cachep =
3231 kmem_cache_create("tcp_bind_bucket",
3232 sizeof(struct inet_bind_bucket), 0,
3233 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
3234
3235
3236
3237
3238
3239
3240 tcp_hashinfo.ehash =
3241 alloc_large_system_hash("TCP established",
3242 sizeof(struct inet_ehash_bucket),
3243 thash_entries,
3244 (totalram_pages >= 128 * 1024) ?
3245 13 : 15,
3246 0,
3247 NULL,
3248 &tcp_hashinfo.ehash_mask,
3249 thash_entries ? 0 : 512 * 1024);
3250 for (i = 0; i <= tcp_hashinfo.ehash_mask; i++) {
3251 INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i);
3252 INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i);
3253 }
3254 if (inet_ehash_locks_alloc(&tcp_hashinfo))
3255 panic("TCP: failed to alloc ehash_locks");
3256 tcp_hashinfo.bhash =
3257 alloc_large_system_hash("TCP bind",
3258 sizeof(struct inet_bind_hashbucket),
3259 tcp_hashinfo.ehash_mask + 1,
3260 (totalram_pages >= 128 * 1024) ?
3261 13 : 15,
3262 0,
3263 &tcp_hashinfo.bhash_size,
3264 NULL,
3265 64 * 1024);
3266 tcp_hashinfo.bhash_size = 1 << tcp_hashinfo.bhash_size;
3267 for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
3268 spin_lock_init(&tcp_hashinfo.bhash[i].lock);
3269 INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
3270 }
3271
3272
3273 cnt = tcp_hashinfo.ehash_mask + 1;
3274
3275 tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
3276 sysctl_tcp_max_orphans = cnt / 2;
3277 sysctl_max_syn_backlog = max(128, cnt / 256);
3278
3279
3280
3281
3282
3283 nr_pages = totalram_pages - totalhigh_pages;
3284 limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
3285 limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
3286 limit = max(limit, 128UL);
3287 sysctl_tcp_mem[0] = limit / 4 * 3;
3288 sysctl_tcp_mem[1] = limit;
3289 sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;
3290
3291
3292 limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7);
3293 max_share = min(4UL*1024*1024, limit);
3294
3295 sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
3296 sysctl_tcp_wmem[1] = 16*1024;
3297 sysctl_tcp_wmem[2] = max(64*1024, max_share);
3298
3299 sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
3300 sysctl_tcp_rmem[1] = 87380;
3301 sysctl_tcp_rmem[2] = max(87380, max_share);
3302
3303 printk(KERN_INFO "TCP: Hash tables configured "
3304 "(established %u bind %u)\n",
3305 tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
3306
3307 tcp_register_congestion_control(&tcp_reno);
3308
3309 memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets));
3310 memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets));
3311 tcp_secret_one.expires = jiffy;
3312 tcp_secret_two.expires = jiffy;
3313 tcp_secret_generating = &tcp_secret_one;
3314 tcp_secret_primary = &tcp_secret_one;
3315 tcp_secret_retiring = &tcp_secret_two;
3316 tcp_secret_secondary = &tcp_secret_two;
3317}
3318