1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248#include <linux/kernel.h>
249#include <linux/module.h>
250#include <linux/types.h>
251#include <linux/fcntl.h>
252#include <linux/poll.h>
253#include <linux/init.h>
254#include <linux/fs.h>
255#include <linux/skbuff.h>
256#include <linux/scatterlist.h>
257#include <linux/splice.h>
258#include <linux/net.h>
259#include <linux/socket.h>
260#include <linux/random.h>
261#include <linux/bootmem.h>
262#include <linux/highmem.h>
263#include <linux/swap.h>
264#include <linux/cache.h>
265#include <linux/err.h>
266#include <linux/crypto.h>
267#include <linux/time.h>
268#include <linux/slab.h>
269
270#include <net/icmp.h>
271#include <net/tcp.h>
272#include <net/xfrm.h>
273#include <net/ip.h>
274#include <net/netdma.h>
275#include <net/sock.h>
276
277#include <asm/uaccess.h>
278#include <asm/ioctls.h>
279
280int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
281
282struct percpu_counter tcp_orphan_count;
283EXPORT_SYMBOL_GPL(tcp_orphan_count);
284
285long sysctl_tcp_mem[3] __read_mostly;
286int sysctl_tcp_wmem[3] __read_mostly;
287int sysctl_tcp_rmem[3] __read_mostly;
288
289EXPORT_SYMBOL(sysctl_tcp_mem);
290EXPORT_SYMBOL(sysctl_tcp_rmem);
291EXPORT_SYMBOL(sysctl_tcp_wmem);
292
293atomic_long_t tcp_memory_allocated;
294EXPORT_SYMBOL(tcp_memory_allocated);
295
296
297
298
299struct percpu_counter tcp_sockets_allocated;
300EXPORT_SYMBOL(tcp_sockets_allocated);
301
302
303
304
305struct tcp_splice_state {
306 struct pipe_inode_info *pipe;
307 size_t len;
308 unsigned int flags;
309};
310
311
312
313
314
315
316
317int tcp_memory_pressure __read_mostly;
318EXPORT_SYMBOL(tcp_memory_pressure);
319
320void tcp_enter_memory_pressure(struct sock *sk)
321{
322 if (!tcp_memory_pressure) {
323 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES);
324 tcp_memory_pressure = 1;
325 }
326}
327EXPORT_SYMBOL(tcp_enter_memory_pressure);
328
329
330static u8 secs_to_retrans(int seconds, int timeout, int rto_max)
331{
332 u8 res = 0;
333
334 if (seconds > 0) {
335 int period = timeout;
336
337 res = 1;
338 while (seconds > period && res < 255) {
339 res++;
340 timeout <<= 1;
341 if (timeout > rto_max)
342 timeout = rto_max;
343 period += timeout;
344 }
345 }
346 return res;
347}
348
349
350static int retrans_to_secs(u8 retrans, int timeout, int rto_max)
351{
352 int period = 0;
353
354 if (retrans > 0) {
355 period = timeout;
356 while (--retrans) {
357 timeout <<= 1;
358 if (timeout > rto_max)
359 timeout = rto_max;
360 period += timeout;
361 }
362 }
363 return period;
364}
365
366
367
368
369
370
371
372
373unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
374{
375 unsigned int mask;
376 struct sock *sk = sock->sk;
377 struct tcp_sock *tp = tcp_sk(sk);
378
379 sock_poll_wait(file, sk_sleep(sk), wait);
380 if (sk->sk_state == TCP_LISTEN)
381 return inet_csk_listen_poll(sk);
382
383
384
385
386
387
388 mask = 0;
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE)
418 mask |= POLLHUP;
419 if (sk->sk_shutdown & RCV_SHUTDOWN)
420 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
421
422
423 if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) {
424 int target = sock_rcvlowat(sk, 0, INT_MAX);
425
426 if (tp->urg_seq == tp->copied_seq &&
427 !sock_flag(sk, SOCK_URGINLINE) &&
428 tp->urg_data)
429 target++;
430
431
432
433
434 if (tp->rcv_nxt - tp->copied_seq >= target)
435 mask |= POLLIN | POLLRDNORM;
436
437 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
438 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
439 mask |= POLLOUT | POLLWRNORM;
440 } else {
441 set_bit(SOCK_ASYNC_NOSPACE,
442 &sk->sk_socket->flags);
443 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
444
445
446
447
448
449 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
450 mask |= POLLOUT | POLLWRNORM;
451 }
452 } else
453 mask |= POLLOUT | POLLWRNORM;
454
455 if (tp->urg_data & TCP_URG_VALID)
456 mask |= POLLPRI;
457 }
458
459 smp_rmb();
460 if (sk->sk_err)
461 mask |= POLLERR;
462
463 return mask;
464}
465EXPORT_SYMBOL(tcp_poll);
466
467int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
468{
469 struct tcp_sock *tp = tcp_sk(sk);
470 int answ;
471
472 switch (cmd) {
473 case SIOCINQ:
474 if (sk->sk_state == TCP_LISTEN)
475 return -EINVAL;
476
477 lock_sock(sk);
478 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
479 answ = 0;
480 else if (sock_flag(sk, SOCK_URGINLINE) ||
481 !tp->urg_data ||
482 before(tp->urg_seq, tp->copied_seq) ||
483 !before(tp->urg_seq, tp->rcv_nxt)) {
484 struct sk_buff *skb;
485
486 answ = tp->rcv_nxt - tp->copied_seq;
487
488
489 skb = skb_peek_tail(&sk->sk_receive_queue);
490 if (answ && skb)
491 answ -= tcp_hdr(skb)->fin;
492 } else
493 answ = tp->urg_seq - tp->copied_seq;
494 release_sock(sk);
495 break;
496 case SIOCATMARK:
497 answ = tp->urg_data && tp->urg_seq == tp->copied_seq;
498 break;
499 case SIOCOUTQ:
500 if (sk->sk_state == TCP_LISTEN)
501 return -EINVAL;
502
503 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
504 answ = 0;
505 else
506 answ = tp->write_seq - tp->snd_una;
507 break;
508 case SIOCOUTQNSD:
509 if (sk->sk_state == TCP_LISTEN)
510 return -EINVAL;
511
512 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
513 answ = 0;
514 else
515 answ = tp->write_seq - tp->snd_nxt;
516 break;
517 default:
518 return -ENOIOCTLCMD;
519 }
520
521 return put_user(answ, (int __user *)arg);
522}
523EXPORT_SYMBOL(tcp_ioctl);
524
525static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
526{
527 TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
528 tp->pushed_seq = tp->write_seq;
529}
530
531static inline int forced_push(struct tcp_sock *tp)
532{
533 return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
534}
535
536static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
537{
538 struct tcp_sock *tp = tcp_sk(sk);
539 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
540
541 skb->csum = 0;
542 tcb->seq = tcb->end_seq = tp->write_seq;
543 tcb->flags = TCPHDR_ACK;
544 tcb->sacked = 0;
545 skb_header_release(skb);
546 tcp_add_write_queue_tail(sk, skb);
547 sk->sk_wmem_queued += skb->truesize;
548 sk_mem_charge(sk, skb->truesize);
549 if (tp->nonagle & TCP_NAGLE_PUSH)
550 tp->nonagle &= ~TCP_NAGLE_PUSH;
551}
552
553static inline void tcp_mark_urg(struct tcp_sock *tp, int flags)
554{
555 if (flags & MSG_OOB)
556 tp->snd_up = tp->write_seq;
557}
558
559static inline void tcp_push(struct sock *sk, int flags, int mss_now,
560 int nonagle)
561{
562 if (tcp_send_head(sk)) {
563 struct tcp_sock *tp = tcp_sk(sk);
564
565 if (!(flags & MSG_MORE) || forced_push(tp))
566 tcp_mark_push(tp, tcp_write_queue_tail(sk));
567
568 tcp_mark_urg(tp, flags);
569 __tcp_push_pending_frames(sk, mss_now,
570 (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle);
571 }
572}
573
574static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
575 unsigned int offset, size_t len)
576{
577 struct tcp_splice_state *tss = rd_desc->arg.data;
578 int ret;
579
580 ret = skb_splice_bits(skb, offset, tss->pipe, min(rd_desc->count, len),
581 tss->flags);
582 if (ret > 0)
583 rd_desc->count -= ret;
584 return ret;
585}
586
587static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss)
588{
589
590 read_descriptor_t rd_desc = {
591 .arg.data = tss,
592 .count = tss->len,
593 };
594
595 return tcp_read_sock(sk, &rd_desc, tcp_splice_data_recv);
596}
597
598
599
600
601
602
603
604
605
606
607
608
609
610ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
611 struct pipe_inode_info *pipe, size_t len,
612 unsigned int flags)
613{
614 struct sock *sk = sock->sk;
615 struct tcp_splice_state tss = {
616 .pipe = pipe,
617 .len = len,
618 .flags = flags,
619 };
620 long timeo;
621 ssize_t spliced;
622 int ret;
623
624 sock_rps_record_flow(sk);
625
626
627
628 if (unlikely(*ppos))
629 return -ESPIPE;
630
631 ret = spliced = 0;
632
633 lock_sock(sk);
634
635 timeo = sock_rcvtimeo(sk, sock->file->f_flags & O_NONBLOCK);
636 while (tss.len) {
637 ret = __tcp_splice_read(sk, &tss);
638 if (ret < 0)
639 break;
640 else if (!ret) {
641 if (spliced)
642 break;
643 if (sock_flag(sk, SOCK_DONE))
644 break;
645 if (sk->sk_err) {
646 ret = sock_error(sk);
647 break;
648 }
649 if (sk->sk_shutdown & RCV_SHUTDOWN)
650 break;
651 if (sk->sk_state == TCP_CLOSE) {
652
653
654
655
656 if (!sock_flag(sk, SOCK_DONE))
657 ret = -ENOTCONN;
658 break;
659 }
660 if (!timeo) {
661 ret = -EAGAIN;
662 break;
663 }
664 sk_wait_data(sk, &timeo);
665 if (signal_pending(current)) {
666 ret = sock_intr_errno(timeo);
667 break;
668 }
669 continue;
670 }
671 tss.len -= ret;
672 spliced += ret;
673
674 if (!timeo)
675 break;
676 release_sock(sk);
677 lock_sock(sk);
678
679 if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
680 (sk->sk_shutdown & RCV_SHUTDOWN) ||
681 signal_pending(current))
682 break;
683 }
684
685 release_sock(sk);
686
687 if (spliced)
688 return spliced;
689
690 return ret;
691}
692EXPORT_SYMBOL(tcp_splice_read);
693
694struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
695{
696 struct sk_buff *skb;
697
698
699 size = ALIGN(size, 4);
700
701 skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
702 if (skb) {
703 if (sk_wmem_schedule(sk, skb->truesize)) {
704
705
706
707
708 skb_reserve(skb, skb_tailroom(skb) - size);
709 return skb;
710 }
711 __kfree_skb(skb);
712 } else {
713 sk->sk_prot->enter_memory_pressure(sk);
714 sk_stream_moderate_sndbuf(sk);
715 }
716 return NULL;
717}
718
719static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
720 int large_allowed)
721{
722 struct tcp_sock *tp = tcp_sk(sk);
723 u32 xmit_size_goal, old_size_goal;
724
725 xmit_size_goal = mss_now;
726
727 if (large_allowed && sk_can_gso(sk)) {
728 xmit_size_goal = ((sk->sk_gso_max_size - 1) -
729 inet_csk(sk)->icsk_af_ops->net_header_len -
730 inet_csk(sk)->icsk_ext_hdr_len -
731 tp->tcp_header_len);
732
733 xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
734
735
736 old_size_goal = tp->xmit_size_goal_segs * mss_now;
737
738 if (likely(old_size_goal <= xmit_size_goal &&
739 old_size_goal + mss_now > xmit_size_goal)) {
740 xmit_size_goal = old_size_goal;
741 } else {
742 tp->xmit_size_goal_segs = xmit_size_goal / mss_now;
743 xmit_size_goal = tp->xmit_size_goal_segs * mss_now;
744 }
745 }
746
747 return max(xmit_size_goal, mss_now);
748}
749
750static int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
751{
752 int mss_now;
753
754 mss_now = tcp_current_mss(sk);
755 *size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB));
756
757 return mss_now;
758}
759
760static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
761 size_t psize, int flags)
762{
763 struct tcp_sock *tp = tcp_sk(sk);
764 int mss_now, size_goal;
765 int err;
766 ssize_t copied;
767 long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
768
769
770 if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
771 if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
772 goto out_err;
773
774 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
775
776 mss_now = tcp_send_mss(sk, &size_goal, flags);
777 copied = 0;
778
779 err = -EPIPE;
780 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
781 goto out_err;
782
783 while (psize > 0) {
784 struct sk_buff *skb = tcp_write_queue_tail(sk);
785 struct page *page = pages[poffset / PAGE_SIZE];
786 int copy, i, can_coalesce;
787 int offset = poffset % PAGE_SIZE;
788 int size = min_t(size_t, psize, PAGE_SIZE - offset);
789
790 if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) {
791new_segment:
792 if (!sk_stream_memory_free(sk))
793 goto wait_for_sndbuf;
794
795 skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
796 if (!skb)
797 goto wait_for_memory;
798
799 skb_entail(sk, skb);
800 copy = size_goal;
801 }
802
803 if (copy > size)
804 copy = size;
805
806 i = skb_shinfo(skb)->nr_frags;
807 can_coalesce = skb_can_coalesce(skb, i, page, offset);
808 if (!can_coalesce && i >= MAX_SKB_FRAGS) {
809 tcp_mark_push(tp, skb);
810 goto new_segment;
811 }
812 if (!sk_wmem_schedule(sk, copy))
813 goto wait_for_memory;
814
815 if (can_coalesce) {
816 skb_shinfo(skb)->frags[i - 1].size += copy;
817 } else {
818 get_page(page);
819 skb_fill_page_desc(skb, i, page, offset, copy);
820 }
821
822 skb->len += copy;
823 skb->data_len += copy;
824 skb->truesize += copy;
825 sk->sk_wmem_queued += copy;
826 sk_mem_charge(sk, copy);
827 skb->ip_summed = CHECKSUM_PARTIAL;
828 tp->write_seq += copy;
829 TCP_SKB_CB(skb)->end_seq += copy;
830 skb_shinfo(skb)->gso_segs = 0;
831
832 if (!copied)
833 TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH;
834
835 copied += copy;
836 poffset += copy;
837 if (!(psize -= copy))
838 goto out;
839
840 if (skb->len < size_goal || (flags & MSG_OOB))
841 continue;
842
843 if (forced_push(tp)) {
844 tcp_mark_push(tp, skb);
845 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
846 } else if (skb == tcp_send_head(sk))
847 tcp_push_one(sk, mss_now);
848 continue;
849
850wait_for_sndbuf:
851 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
852wait_for_memory:
853 if (copied)
854 tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
855
856 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
857 goto do_error;
858
859 mss_now = tcp_send_mss(sk, &size_goal, flags);
860 }
861
862out:
863 if (copied)
864 tcp_push(sk, flags, mss_now, tp->nonagle);
865 return copied;
866
867do_error:
868 if (copied)
869 goto out;
870out_err:
871 return sk_stream_error(sk, flags, err);
872}
873
874int tcp_sendpage(struct sock *sk, struct page *page, int offset,
875 size_t size, int flags)
876{
877 ssize_t res;
878
879 if (!(sk->sk_route_caps & NETIF_F_SG) ||
880 !(sk->sk_route_caps & NETIF_F_ALL_CSUM))
881 return sock_no_sendpage(sk->sk_socket, page, offset, size,
882 flags);
883
884 lock_sock(sk);
885 res = do_tcp_sendpages(sk, &page, offset, size, flags);
886 release_sock(sk);
887 return res;
888}
889EXPORT_SYMBOL(tcp_sendpage);
890
891#define TCP_PAGE(sk) (sk->sk_sndmsg_page)
892#define TCP_OFF(sk) (sk->sk_sndmsg_off)
893
894static inline int select_size(struct sock *sk, int sg)
895{
896 struct tcp_sock *tp = tcp_sk(sk);
897 int tmp = tp->mss_cache;
898
899 if (sg) {
900 if (sk_can_gso(sk))
901 tmp = 0;
902 else {
903 int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
904
905 if (tmp >= pgbreak &&
906 tmp <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE)
907 tmp = pgbreak;
908 }
909 }
910
911 return tmp;
912}
913
914int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
915 size_t size)
916{
917 struct iovec *iov;
918 struct tcp_sock *tp = tcp_sk(sk);
919 struct sk_buff *skb;
920 int iovlen, flags;
921 int mss_now, size_goal;
922 int sg, err, copied;
923 long timeo;
924
925 lock_sock(sk);
926
927 flags = msg->msg_flags;
928 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
929
930
931 if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
932 if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
933 goto out_err;
934
935
936 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
937
938 mss_now = tcp_send_mss(sk, &size_goal, flags);
939
940
941 iovlen = msg->msg_iovlen;
942 iov = msg->msg_iov;
943 copied = 0;
944
945 err = -EPIPE;
946 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
947 goto out_err;
948
949 sg = sk->sk_route_caps & NETIF_F_SG;
950
951 while (--iovlen >= 0) {
952 size_t seglen = iov->iov_len;
953 unsigned char __user *from = iov->iov_base;
954
955 iov++;
956
957 while (seglen > 0) {
958 int copy = 0;
959 int max = size_goal;
960
961 skb = tcp_write_queue_tail(sk);
962 if (tcp_send_head(sk)) {
963 if (skb->ip_summed == CHECKSUM_NONE)
964 max = mss_now;
965 copy = max - skb->len;
966 }
967
968 if (copy <= 0) {
969new_segment:
970
971
972
973 if (!sk_stream_memory_free(sk))
974 goto wait_for_sndbuf;
975
976 skb = sk_stream_alloc_skb(sk,
977 select_size(sk, sg),
978 sk->sk_allocation);
979 if (!skb)
980 goto wait_for_memory;
981
982
983
984
985 if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
986 skb->ip_summed = CHECKSUM_PARTIAL;
987
988 skb_entail(sk, skb);
989 copy = size_goal;
990 max = size_goal;
991 }
992
993
994 if (copy > seglen)
995 copy = seglen;
996
997
998 if (skb_tailroom(skb) > 0) {
999
1000 if (copy > skb_tailroom(skb))
1001 copy = skb_tailroom(skb);
1002 if ((err = skb_add_data(skb, from, copy)) != 0)
1003 goto do_fault;
1004 } else {
1005 int merge = 0;
1006 int i = skb_shinfo(skb)->nr_frags;
1007 struct page *page = TCP_PAGE(sk);
1008 int off = TCP_OFF(sk);
1009
1010 if (skb_can_coalesce(skb, i, page, off) &&
1011 off != PAGE_SIZE) {
1012
1013
1014 merge = 1;
1015 } else if (i == MAX_SKB_FRAGS || !sg) {
1016
1017
1018
1019
1020 tcp_mark_push(tp, skb);
1021 goto new_segment;
1022 } else if (page) {
1023 if (off == PAGE_SIZE) {
1024 put_page(page);
1025 TCP_PAGE(sk) = page = NULL;
1026 off = 0;
1027 }
1028 } else
1029 off = 0;
1030
1031 if (copy > PAGE_SIZE - off)
1032 copy = PAGE_SIZE - off;
1033
1034 if (!sk_wmem_schedule(sk, copy))
1035 goto wait_for_memory;
1036
1037 if (!page) {
1038
1039 if (!(page = sk_stream_alloc_page(sk)))
1040 goto wait_for_memory;
1041 }
1042
1043
1044
1045 err = skb_copy_to_page(sk, from, skb, page,
1046 off, copy);
1047 if (err) {
1048
1049
1050
1051 if (!TCP_PAGE(sk)) {
1052 TCP_PAGE(sk) = page;
1053 TCP_OFF(sk) = 0;
1054 }
1055 goto do_error;
1056 }
1057
1058
1059 if (merge) {
1060 skb_shinfo(skb)->frags[i - 1].size +=
1061 copy;
1062 } else {
1063 skb_fill_page_desc(skb, i, page, off, copy);
1064 if (TCP_PAGE(sk)) {
1065 get_page(page);
1066 } else if (off + copy < PAGE_SIZE) {
1067 get_page(page);
1068 TCP_PAGE(sk) = page;
1069 }
1070 }
1071
1072 TCP_OFF(sk) = off + copy;
1073 }
1074
1075 if (!copied)
1076 TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH;
1077
1078 tp->write_seq += copy;
1079 TCP_SKB_CB(skb)->end_seq += copy;
1080 skb_shinfo(skb)->gso_segs = 0;
1081
1082 from += copy;
1083 copied += copy;
1084 if ((seglen -= copy) == 0 && iovlen == 0)
1085 goto out;
1086
1087 if (skb->len < max || (flags & MSG_OOB))
1088 continue;
1089
1090 if (forced_push(tp)) {
1091 tcp_mark_push(tp, skb);
1092 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
1093 } else if (skb == tcp_send_head(sk))
1094 tcp_push_one(sk, mss_now);
1095 continue;
1096
1097wait_for_sndbuf:
1098 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1099wait_for_memory:
1100 if (copied)
1101 tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
1102
1103 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
1104 goto do_error;
1105
1106 mss_now = tcp_send_mss(sk, &size_goal, flags);
1107 }
1108 }
1109
1110out:
1111 if (copied)
1112 tcp_push(sk, flags, mss_now, tp->nonagle);
1113 release_sock(sk);
1114 return copied;
1115
1116do_fault:
1117 if (!skb->len) {
1118 tcp_unlink_write_queue(skb, sk);
1119
1120
1121
1122 tcp_check_send_head(sk, skb);
1123 sk_wmem_free_skb(sk, skb);
1124 }
1125
1126do_error:
1127 if (copied)
1128 goto out;
1129out_err:
1130 err = sk_stream_error(sk, flags, err);
1131 release_sock(sk);
1132 return err;
1133}
1134EXPORT_SYMBOL(tcp_sendmsg);
1135
1136
1137
1138
1139
1140
1141static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags)
1142{
1143 struct tcp_sock *tp = tcp_sk(sk);
1144
1145
1146 if (sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data ||
1147 tp->urg_data == TCP_URG_READ)
1148 return -EINVAL;
1149
1150 if (sk->sk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DONE))
1151 return -ENOTCONN;
1152
1153 if (tp->urg_data & TCP_URG_VALID) {
1154 int err = 0;
1155 char c = tp->urg_data;
1156
1157 if (!(flags & MSG_PEEK))
1158 tp->urg_data = TCP_URG_READ;
1159
1160
1161 msg->msg_flags |= MSG_OOB;
1162
1163 if (len > 0) {
1164 if (!(flags & MSG_TRUNC))
1165 err = memcpy_toiovec(msg->msg_iov, &c, 1);
1166 len = 1;
1167 } else
1168 msg->msg_flags |= MSG_TRUNC;
1169
1170 return err ? -EFAULT : len;
1171 }
1172
1173 if (sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN))
1174 return 0;
1175
1176
1177
1178
1179
1180
1181
1182 return -EAGAIN;
1183}
1184
1185
1186
1187
1188
1189
1190
1191void tcp_cleanup_rbuf(struct sock *sk, int copied)
1192{
1193 struct tcp_sock *tp = tcp_sk(sk);
1194 int time_to_ack = 0;
1195
1196#if TCP_DEBUG
1197 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
1198
1199 WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq),
1200 "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n",
1201 tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt);
1202#endif
1203
1204 if (inet_csk_ack_scheduled(sk)) {
1205 const struct inet_connection_sock *icsk = inet_csk(sk);
1206
1207
1208 if (icsk->icsk_ack.blocked ||
1209
1210 tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss ||
1211
1212
1213
1214
1215
1216
1217 (copied > 0 &&
1218 ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) ||
1219 ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
1220 !icsk->icsk_ack.pingpong)) &&
1221 !atomic_read(&sk->sk_rmem_alloc)))
1222 time_to_ack = 1;
1223 }
1224
1225
1226
1227
1228
1229
1230
1231 if (copied > 0 && !time_to_ack && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
1232 __u32 rcv_window_now = tcp_receive_window(tp);
1233
1234
1235 if (2*rcv_window_now <= tp->window_clamp) {
1236 __u32 new_window = __tcp_select_window(sk);
1237
1238
1239
1240
1241
1242
1243 if (new_window && new_window >= 2 * rcv_window_now)
1244 time_to_ack = 1;
1245 }
1246 }
1247 if (time_to_ack)
1248 tcp_send_ack(sk);
1249}
1250
1251static void tcp_prequeue_process(struct sock *sk)
1252{
1253 struct sk_buff *skb;
1254 struct tcp_sock *tp = tcp_sk(sk);
1255
1256 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
1257
1258
1259
1260 local_bh_disable();
1261 while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
1262 sk_backlog_rcv(sk, skb);
1263 local_bh_enable();
1264
1265
1266 tp->ucopy.memory = 0;
1267}
1268
1269#ifdef CONFIG_NET_DMA
1270static void tcp_service_net_dma(struct sock *sk, bool wait)
1271{
1272 dma_cookie_t done, used;
1273 dma_cookie_t last_issued;
1274 struct tcp_sock *tp = tcp_sk(sk);
1275
1276 if (!tp->ucopy.dma_chan)
1277 return;
1278
1279 last_issued = tp->ucopy.dma_cookie;
1280 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
1281
1282 do {
1283 if (dma_async_memcpy_complete(tp->ucopy.dma_chan,
1284 last_issued, &done,
1285 &used) == DMA_SUCCESS) {
1286
1287 __skb_queue_purge(&sk->sk_async_wait_queue);
1288 break;
1289 } else {
1290 struct sk_buff *skb;
1291 while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
1292 (dma_async_is_complete(skb->dma_cookie, done,
1293 used) == DMA_SUCCESS)) {
1294 __skb_dequeue(&sk->sk_async_wait_queue);
1295 kfree_skb(skb);
1296 }
1297 }
1298 } while (wait);
1299}
1300#endif
1301
1302static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
1303{
1304 struct sk_buff *skb;
1305 u32 offset;
1306
1307 skb_queue_walk(&sk->sk_receive_queue, skb) {
1308 offset = seq - TCP_SKB_CB(skb)->seq;
1309 if (tcp_hdr(skb)->syn)
1310 offset--;
1311 if (offset < skb->len || tcp_hdr(skb)->fin) {
1312 *off = offset;
1313 return skb;
1314 }
1315 }
1316 return NULL;
1317}
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1331 sk_read_actor_t recv_actor)
1332{
1333 struct sk_buff *skb;
1334 struct tcp_sock *tp = tcp_sk(sk);
1335 u32 seq = tp->copied_seq;
1336 u32 offset;
1337 int copied = 0;
1338
1339 if (sk->sk_state == TCP_LISTEN)
1340 return -ENOTCONN;
1341 while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
1342 if (offset < skb->len) {
1343 int used;
1344 size_t len;
1345
1346 len = skb->len - offset;
1347
1348 if (tp->urg_data) {
1349 u32 urg_offset = tp->urg_seq - seq;
1350 if (urg_offset < len)
1351 len = urg_offset;
1352 if (!len)
1353 break;
1354 }
1355 used = recv_actor(desc, skb, offset, len);
1356 if (used < 0) {
1357 if (!copied)
1358 copied = used;
1359 break;
1360 } else if (used <= len) {
1361 seq += used;
1362 copied += used;
1363 offset += used;
1364 }
1365
1366
1367
1368
1369
1370
1371 skb = tcp_recv_skb(sk, seq-1, &offset);
1372 if (!skb || (offset+1 != skb->len))
1373 break;
1374 }
1375 if (tcp_hdr(skb)->fin) {
1376 sk_eat_skb(sk, skb, 0);
1377 ++seq;
1378 break;
1379 }
1380 sk_eat_skb(sk, skb, 0);
1381 if (!desc->count)
1382 break;
1383 tp->copied_seq = seq;
1384 }
1385 tp->copied_seq = seq;
1386
1387 tcp_rcv_space_adjust(sk);
1388
1389
1390 if (copied > 0)
1391 tcp_cleanup_rbuf(sk, copied);
1392 return copied;
1393}
1394EXPORT_SYMBOL(tcp_read_sock);
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1405 size_t len, int nonblock, int flags, int *addr_len)
1406{
1407 struct tcp_sock *tp = tcp_sk(sk);
1408 int copied = 0;
1409 u32 peek_seq;
1410 u32 *seq;
1411 unsigned long used;
1412 int err;
1413 int target;
1414 long timeo;
1415 struct task_struct *user_recv = NULL;
1416 int copied_early = 0;
1417 struct sk_buff *skb;
1418 u32 urg_hole = 0;
1419
1420 lock_sock(sk);
1421
1422 err = -ENOTCONN;
1423 if (sk->sk_state == TCP_LISTEN)
1424 goto out;
1425
1426 timeo = sock_rcvtimeo(sk, nonblock);
1427
1428
1429 if (flags & MSG_OOB)
1430 goto recv_urg;
1431
1432 seq = &tp->copied_seq;
1433 if (flags & MSG_PEEK) {
1434 peek_seq = tp->copied_seq;
1435 seq = &peek_seq;
1436 }
1437
1438 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1439
1440#ifdef CONFIG_NET_DMA
1441 tp->ucopy.dma_chan = NULL;
1442 preempt_disable();
1443 skb = skb_peek_tail(&sk->sk_receive_queue);
1444 {
1445 int available = 0;
1446
1447 if (skb)
1448 available = TCP_SKB_CB(skb)->seq + skb->len - (*seq);
1449 if ((available < target) &&
1450 (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
1451 !sysctl_tcp_low_latency &&
1452 dma_find_channel(DMA_MEMCPY)) {
1453 preempt_enable_no_resched();
1454 tp->ucopy.pinned_list =
1455 dma_pin_iovec_pages(msg->msg_iov, len);
1456 } else {
1457 preempt_enable_no_resched();
1458 }
1459 }
1460#endif
1461
1462 do {
1463 u32 offset;
1464
1465
1466 if (tp->urg_data && tp->urg_seq == *seq) {
1467 if (copied)
1468 break;
1469 if (signal_pending(current)) {
1470 copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
1471 break;
1472 }
1473 }
1474
1475
1476
1477 skb_queue_walk(&sk->sk_receive_queue, skb) {
1478
1479
1480
1481 if (WARN(before(*seq, TCP_SKB_CB(skb)->seq),
1482 "recvmsg bug: copied %X seq %X rcvnxt %X fl %X\n",
1483 *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
1484 flags))
1485 break;
1486
1487 offset = *seq - TCP_SKB_CB(skb)->seq;
1488 if (tcp_hdr(skb)->syn)
1489 offset--;
1490 if (offset < skb->len)
1491 goto found_ok_skb;
1492 if (tcp_hdr(skb)->fin)
1493 goto found_fin_ok;
1494 WARN(!(flags & MSG_PEEK),
1495 "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n",
1496 *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags);
1497 }
1498
1499
1500
1501 if (copied >= target && !sk->sk_backlog.tail)
1502 break;
1503
1504 if (copied) {
1505 if (sk->sk_err ||
1506 sk->sk_state == TCP_CLOSE ||
1507 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1508 !timeo ||
1509 signal_pending(current))
1510 break;
1511 } else {
1512 if (sock_flag(sk, SOCK_DONE))
1513 break;
1514
1515 if (sk->sk_err) {
1516 copied = sock_error(sk);
1517 break;
1518 }
1519
1520 if (sk->sk_shutdown & RCV_SHUTDOWN)
1521 break;
1522
1523 if (sk->sk_state == TCP_CLOSE) {
1524 if (!sock_flag(sk, SOCK_DONE)) {
1525
1526
1527
1528 copied = -ENOTCONN;
1529 break;
1530 }
1531 break;
1532 }
1533
1534 if (!timeo) {
1535 copied = -EAGAIN;
1536 break;
1537 }
1538
1539 if (signal_pending(current)) {
1540 copied = sock_intr_errno(timeo);
1541 break;
1542 }
1543 }
1544
1545 tcp_cleanup_rbuf(sk, copied);
1546
1547 if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) {
1548
1549 if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) {
1550 user_recv = current;
1551 tp->ucopy.task = user_recv;
1552 tp->ucopy.iov = msg->msg_iov;
1553 }
1554
1555 tp->ucopy.len = len;
1556
1557 WARN_ON(tp->copied_seq != tp->rcv_nxt &&
1558 !(flags & (MSG_PEEK | MSG_TRUNC)));
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586 if (!skb_queue_empty(&tp->ucopy.prequeue))
1587 goto do_prequeue;
1588
1589
1590 }
1591
1592#ifdef CONFIG_NET_DMA
1593 if (tp->ucopy.dma_chan)
1594 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
1595#endif
1596 if (copied >= target) {
1597
1598 release_sock(sk);
1599 lock_sock(sk);
1600 } else
1601 sk_wait_data(sk, &timeo);
1602
1603#ifdef CONFIG_NET_DMA
1604 tcp_service_net_dma(sk, false);
1605 tp->ucopy.wakeup = 0;
1606#endif
1607
1608 if (user_recv) {
1609 int chunk;
1610
1611
1612
1613 if ((chunk = len - tp->ucopy.len) != 0) {
1614 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
1615 len -= chunk;
1616 copied += chunk;
1617 }
1618
1619 if (tp->rcv_nxt == tp->copied_seq &&
1620 !skb_queue_empty(&tp->ucopy.prequeue)) {
1621do_prequeue:
1622 tcp_prequeue_process(sk);
1623
1624 if ((chunk = len - tp->ucopy.len) != 0) {
1625 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
1626 len -= chunk;
1627 copied += chunk;
1628 }
1629 }
1630 }
1631 if ((flags & MSG_PEEK) &&
1632 (peek_seq - copied - urg_hole != tp->copied_seq)) {
1633 if (net_ratelimit())
1634 printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n",
1635 current->comm, task_pid_nr(current));
1636 peek_seq = tp->copied_seq;
1637 }
1638 continue;
1639
1640 found_ok_skb:
1641
1642 used = skb->len - offset;
1643 if (len < used)
1644 used = len;
1645
1646
1647 if (tp->urg_data) {
1648 u32 urg_offset = tp->urg_seq - *seq;
1649 if (urg_offset < used) {
1650 if (!urg_offset) {
1651 if (!sock_flag(sk, SOCK_URGINLINE)) {
1652 ++*seq;
1653 urg_hole++;
1654 offset++;
1655 used--;
1656 if (!used)
1657 goto skip_copy;
1658 }
1659 } else
1660 used = urg_offset;
1661 }
1662 }
1663
1664 if (!(flags & MSG_TRUNC)) {
1665#ifdef CONFIG_NET_DMA
1666 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1667 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1668
1669 if (tp->ucopy.dma_chan) {
1670 tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec(
1671 tp->ucopy.dma_chan, skb, offset,
1672 msg->msg_iov, used,
1673 tp->ucopy.pinned_list);
1674
1675 if (tp->ucopy.dma_cookie < 0) {
1676
1677 printk(KERN_ALERT "dma_cookie < 0\n");
1678
1679
1680 if (!copied)
1681 copied = -EFAULT;
1682 break;
1683 }
1684
1685 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
1686
1687 if ((offset + used) == skb->len)
1688 copied_early = 1;
1689
1690 } else
1691#endif
1692 {
1693 err = skb_copy_datagram_iovec(skb, offset,
1694 msg->msg_iov, used);
1695 if (err) {
1696
1697 if (!copied)
1698 copied = -EFAULT;
1699 break;
1700 }
1701 }
1702 }
1703
1704 *seq += used;
1705 copied += used;
1706 len -= used;
1707
1708 tcp_rcv_space_adjust(sk);
1709
1710skip_copy:
1711 if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) {
1712 tp->urg_data = 0;
1713 tcp_fast_path_check(sk);
1714 }
1715 if (used + offset < skb->len)
1716 continue;
1717
1718 if (tcp_hdr(skb)->fin)
1719 goto found_fin_ok;
1720 if (!(flags & MSG_PEEK)) {
1721 sk_eat_skb(sk, skb, copied_early);
1722 copied_early = 0;
1723 }
1724 continue;
1725
1726 found_fin_ok:
1727
1728 ++*seq;
1729 if (!(flags & MSG_PEEK)) {
1730 sk_eat_skb(sk, skb, copied_early);
1731 copied_early = 0;
1732 }
1733 break;
1734 } while (len > 0);
1735
1736 if (user_recv) {
1737 if (!skb_queue_empty(&tp->ucopy.prequeue)) {
1738 int chunk;
1739
1740 tp->ucopy.len = copied > 0 ? len : 0;
1741
1742 tcp_prequeue_process(sk);
1743
1744 if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
1745 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
1746 len -= chunk;
1747 copied += chunk;
1748 }
1749 }
1750
1751 tp->ucopy.task = NULL;
1752 tp->ucopy.len = 0;
1753 }
1754
1755#ifdef CONFIG_NET_DMA
1756 tcp_service_net_dma(sk, true);
1757 tp->ucopy.dma_chan = NULL;
1758
1759 if (tp->ucopy.pinned_list) {
1760 dma_unpin_iovec_pages(tp->ucopy.pinned_list);
1761 tp->ucopy.pinned_list = NULL;
1762 }
1763#endif
1764
1765
1766
1767
1768
1769
1770 tcp_cleanup_rbuf(sk, copied);
1771
1772 release_sock(sk);
1773 return copied;
1774
1775out:
1776 release_sock(sk);
1777 return err;
1778
1779recv_urg:
1780 err = tcp_recv_urg(sk, msg, len, flags);
1781 goto out;
1782}
1783EXPORT_SYMBOL(tcp_recvmsg);
1784
1785void tcp_set_state(struct sock *sk, int state)
1786{
1787 int oldstate = sk->sk_state;
1788
1789 switch (state) {
1790 case TCP_ESTABLISHED:
1791 if (oldstate != TCP_ESTABLISHED)
1792 TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
1793 break;
1794
1795 case TCP_CLOSE:
1796 if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
1797 TCP_INC_STATS(sock_net(sk), TCP_MIB_ESTABRESETS);
1798
1799 sk->sk_prot->unhash(sk);
1800 if (inet_csk(sk)->icsk_bind_hash &&
1801 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
1802 inet_put_port(sk);
1803
1804 default:
1805 if (oldstate == TCP_ESTABLISHED)
1806 TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
1807 }
1808
1809
1810
1811
1812 sk->sk_state = state;
1813
1814#ifdef STATE_TRACE
1815 SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
1816#endif
1817}
1818EXPORT_SYMBOL_GPL(tcp_set_state);
1819
1820
1821
1822
1823
1824
1825
1826
1827static const unsigned char new_state[16] = {
1828
1829 TCP_CLOSE,
1830 TCP_FIN_WAIT1 | TCP_ACTION_FIN,
1831 TCP_CLOSE,
1832 TCP_FIN_WAIT1 | TCP_ACTION_FIN,
1833 TCP_FIN_WAIT1,
1834 TCP_FIN_WAIT2,
1835 TCP_CLOSE,
1836 TCP_CLOSE,
1837 TCP_LAST_ACK | TCP_ACTION_FIN,
1838 TCP_LAST_ACK,
1839 TCP_CLOSE,
1840 TCP_CLOSING,
1841};
1842
1843static int tcp_close_state(struct sock *sk)
1844{
1845 int next = (int)new_state[sk->sk_state];
1846 int ns = next & TCP_STATE_MASK;
1847
1848 tcp_set_state(sk, ns);
1849
1850 return next & TCP_ACTION_FIN;
1851}
1852
1853
1854
1855
1856
1857
1858void tcp_shutdown(struct sock *sk, int how)
1859{
1860
1861
1862
1863
1864 if (!(how & SEND_SHUTDOWN))
1865 return;
1866
1867
1868 if ((1 << sk->sk_state) &
1869 (TCPF_ESTABLISHED | TCPF_SYN_SENT |
1870 TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
1871
1872 if (tcp_close_state(sk))
1873 tcp_send_fin(sk);
1874 }
1875}
1876EXPORT_SYMBOL(tcp_shutdown);
1877
1878void tcp_close(struct sock *sk, long timeout)
1879{
1880 struct sk_buff *skb;
1881 int data_was_unread = 0;
1882 int state;
1883
1884 lock_sock(sk);
1885 sk->sk_shutdown = SHUTDOWN_MASK;
1886
1887 if (sk->sk_state == TCP_LISTEN) {
1888 tcp_set_state(sk, TCP_CLOSE);
1889
1890
1891 inet_csk_listen_stop(sk);
1892
1893 goto adjudge_to_death;
1894 }
1895
1896
1897
1898
1899
1900 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1901 u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq -
1902 tcp_hdr(skb)->fin;
1903 data_was_unread += len;
1904 __kfree_skb(skb);
1905 }
1906
1907 sk_mem_reclaim(sk);
1908
1909
1910 if (sk->sk_state == TCP_CLOSE)
1911 goto adjudge_to_death;
1912
1913
1914
1915
1916
1917
1918
1919
1920 if (data_was_unread) {
1921
1922 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
1923 tcp_set_state(sk, TCP_CLOSE);
1924 tcp_send_active_reset(sk, sk->sk_allocation);
1925 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1926
1927 sk->sk_prot->disconnect(sk, 0);
1928 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
1929 } else if (tcp_close_state(sk)) {
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955 tcp_send_fin(sk);
1956 }
1957
1958 sk_stream_wait_close(sk, timeout);
1959
1960adjudge_to_death:
1961 state = sk->sk_state;
1962 sock_hold(sk);
1963 sock_orphan(sk);
1964
1965
1966 release_sock(sk);
1967
1968
1969
1970
1971
1972 local_bh_disable();
1973 bh_lock_sock(sk);
1974 WARN_ON(sock_owned_by_user(sk));
1975
1976 percpu_counter_inc(sk->sk_prot->orphan_count);
1977
1978
1979 if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
1980 goto out;
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996 if (sk->sk_state == TCP_FIN_WAIT2) {
1997 struct tcp_sock *tp = tcp_sk(sk);
1998 if (tp->linger2 < 0) {
1999 tcp_set_state(sk, TCP_CLOSE);
2000 tcp_send_active_reset(sk, GFP_ATOMIC);
2001 NET_INC_STATS_BH(sock_net(sk),
2002 LINUX_MIB_TCPABORTONLINGER);
2003 } else {
2004 const int tmo = tcp_fin_time(sk);
2005
2006 if (tmo > TCP_TIMEWAIT_LEN) {
2007 inet_csk_reset_keepalive_timer(sk,
2008 tmo - TCP_TIMEWAIT_LEN);
2009 } else {
2010 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
2011 goto out;
2012 }
2013 }
2014 }
2015 if (sk->sk_state != TCP_CLOSE) {
2016 sk_mem_reclaim(sk);
2017 if (tcp_too_many_orphans(sk, 0)) {
2018 if (net_ratelimit())
2019 printk(KERN_INFO "TCP: too many of orphaned "
2020 "sockets\n");
2021 tcp_set_state(sk, TCP_CLOSE);
2022 tcp_send_active_reset(sk, GFP_ATOMIC);
2023 NET_INC_STATS_BH(sock_net(sk),
2024 LINUX_MIB_TCPABORTONMEMORY);
2025 }
2026 }
2027
2028 if (sk->sk_state == TCP_CLOSE)
2029 inet_csk_destroy_sock(sk);
2030
2031
2032out:
2033 bh_unlock_sock(sk);
2034 local_bh_enable();
2035 sock_put(sk);
2036}
2037EXPORT_SYMBOL(tcp_close);
2038
2039
2040
2041static inline int tcp_need_reset(int state)
2042{
2043 return (1 << state) &
2044 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
2045 TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
2046}
2047
2048int tcp_disconnect(struct sock *sk, int flags)
2049{
2050 struct inet_sock *inet = inet_sk(sk);
2051 struct inet_connection_sock *icsk = inet_csk(sk);
2052 struct tcp_sock *tp = tcp_sk(sk);
2053 int err = 0;
2054 int old_state = sk->sk_state;
2055
2056 if (old_state != TCP_CLOSE)
2057 tcp_set_state(sk, TCP_CLOSE);
2058
2059
2060 if (old_state == TCP_LISTEN) {
2061 inet_csk_listen_stop(sk);
2062 } else if (tcp_need_reset(old_state) ||
2063 (tp->snd_nxt != tp->write_seq &&
2064 (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
2065
2066
2067
2068 tcp_send_active_reset(sk, gfp_any());
2069 sk->sk_err = ECONNRESET;
2070 } else if (old_state == TCP_SYN_SENT)
2071 sk->sk_err = ECONNRESET;
2072
2073 tcp_clear_xmit_timers(sk);
2074 __skb_queue_purge(&sk->sk_receive_queue);
2075 tcp_write_queue_purge(sk);
2076 __skb_queue_purge(&tp->out_of_order_queue);
2077#ifdef CONFIG_NET_DMA
2078 __skb_queue_purge(&sk->sk_async_wait_queue);
2079#endif
2080
2081 inet->inet_dport = 0;
2082
2083 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
2084 inet_reset_saddr(sk);
2085
2086 sk->sk_shutdown = 0;
2087 sock_reset_flag(sk, SOCK_DONE);
2088 tp->srtt = 0;
2089 if ((tp->write_seq += tp->max_window + 2) == 0)
2090 tp->write_seq = 1;
2091 icsk->icsk_backoff = 0;
2092 tp->snd_cwnd = 2;
2093 icsk->icsk_probes_out = 0;
2094 tp->packets_out = 0;
2095 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
2096 tp->snd_cwnd_cnt = 0;
2097 tp->bytes_acked = 0;
2098 tp->window_clamp = 0;
2099 tcp_set_ca_state(sk, TCP_CA_Open);
2100 tcp_clear_retrans(tp);
2101 inet_csk_delack_init(sk);
2102 tcp_init_send_head(sk);
2103 memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
2104 __sk_dst_reset(sk);
2105
2106 WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
2107
2108 sk->sk_error_report(sk);
2109 return err;
2110}
2111EXPORT_SYMBOL(tcp_disconnect);
2112
2113
2114
2115
2116static int do_tcp_setsockopt(struct sock *sk, int level,
2117 int optname, char __user *optval, unsigned int optlen)
2118{
2119 struct tcp_sock *tp = tcp_sk(sk);
2120 struct inet_connection_sock *icsk = inet_csk(sk);
2121 int val;
2122 int err = 0;
2123
2124
2125 switch (optname) {
2126 case TCP_CONGESTION: {
2127 char name[TCP_CA_NAME_MAX];
2128
2129 if (optlen < 1)
2130 return -EINVAL;
2131
2132 val = strncpy_from_user(name, optval,
2133 min_t(long, TCP_CA_NAME_MAX-1, optlen));
2134 if (val < 0)
2135 return -EFAULT;
2136 name[val] = 0;
2137
2138 lock_sock(sk);
2139 err = tcp_set_congestion_control(sk, name);
2140 release_sock(sk);
2141 return err;
2142 }
2143 case TCP_COOKIE_TRANSACTIONS: {
2144 struct tcp_cookie_transactions ctd;
2145 struct tcp_cookie_values *cvp = NULL;
2146
2147 if (sizeof(ctd) > optlen)
2148 return -EINVAL;
2149 if (copy_from_user(&ctd, optval, sizeof(ctd)))
2150 return -EFAULT;
2151
2152 if (ctd.tcpct_used > sizeof(ctd.tcpct_value) ||
2153 ctd.tcpct_s_data_desired > TCP_MSS_DESIRED)
2154 return -EINVAL;
2155
2156 if (ctd.tcpct_cookie_desired == 0) {
2157
2158 } else if ((0x1 & ctd.tcpct_cookie_desired) ||
2159 ctd.tcpct_cookie_desired > TCP_COOKIE_MAX ||
2160 ctd.tcpct_cookie_desired < TCP_COOKIE_MIN) {
2161 return -EINVAL;
2162 }
2163
2164 if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) {
2165
2166 lock_sock(sk);
2167 if (tp->cookie_values != NULL) {
2168 kref_put(&tp->cookie_values->kref,
2169 tcp_cookie_values_release);
2170 tp->cookie_values = NULL;
2171 }
2172 tp->rx_opt.cookie_in_always = 0;
2173 tp->rx_opt.cookie_out_never = 1;
2174 release_sock(sk);
2175 return err;
2176 }
2177
2178
2179
2180 if (ctd.tcpct_used > 0 ||
2181 (tp->cookie_values == NULL &&
2182 (sysctl_tcp_cookie_size > 0 ||
2183 ctd.tcpct_cookie_desired > 0 ||
2184 ctd.tcpct_s_data_desired > 0))) {
2185 cvp = kzalloc(sizeof(*cvp) + ctd.tcpct_used,
2186 GFP_KERNEL);
2187 if (cvp == NULL)
2188 return -ENOMEM;
2189
2190 kref_init(&cvp->kref);
2191 }
2192 lock_sock(sk);
2193 tp->rx_opt.cookie_in_always =
2194 (TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags);
2195 tp->rx_opt.cookie_out_never = 0;
2196
2197 if (tp->cookie_values != NULL) {
2198 if (cvp != NULL) {
2199
2200
2201
2202
2203 kref_put(&tp->cookie_values->kref,
2204 tcp_cookie_values_release);
2205 } else {
2206 cvp = tp->cookie_values;
2207 }
2208 }
2209
2210 if (cvp != NULL) {
2211 cvp->cookie_desired = ctd.tcpct_cookie_desired;
2212
2213 if (ctd.tcpct_used > 0) {
2214 memcpy(cvp->s_data_payload, ctd.tcpct_value,
2215 ctd.tcpct_used);
2216 cvp->s_data_desired = ctd.tcpct_used;
2217 cvp->s_data_constant = 1;
2218 } else {
2219
2220 cvp->s_data_desired = ctd.tcpct_s_data_desired;
2221 cvp->s_data_constant = 0;
2222 }
2223
2224 tp->cookie_values = cvp;
2225 }
2226 release_sock(sk);
2227 return err;
2228 }
2229 default:
2230
2231 break;
2232 }
2233
2234 if (optlen < sizeof(int))
2235 return -EINVAL;
2236
2237 if (get_user(val, (int __user *)optval))
2238 return -EFAULT;
2239
2240 lock_sock(sk);
2241
2242 switch (optname) {
2243 case TCP_MAXSEG:
2244
2245
2246
2247 if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) {
2248 err = -EINVAL;
2249 break;
2250 }
2251 tp->rx_opt.user_mss = val;
2252 break;
2253
2254 case TCP_NODELAY:
2255 if (val) {
2256
2257
2258
2259
2260
2261
2262
2263
2264 tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
2265 tcp_push_pending_frames(sk);
2266 } else {
2267 tp->nonagle &= ~TCP_NAGLE_OFF;
2268 }
2269 break;
2270
2271 case TCP_THIN_LINEAR_TIMEOUTS:
2272 if (val < 0 || val > 1)
2273 err = -EINVAL;
2274 else
2275 tp->thin_lto = val;
2276 break;
2277
2278 case TCP_THIN_DUPACK:
2279 if (val < 0 || val > 1)
2280 err = -EINVAL;
2281 else
2282 tp->thin_dupack = val;
2283 break;
2284
2285 case TCP_CORK:
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297 if (val) {
2298 tp->nonagle |= TCP_NAGLE_CORK;
2299 } else {
2300 tp->nonagle &= ~TCP_NAGLE_CORK;
2301 if (tp->nonagle&TCP_NAGLE_OFF)
2302 tp->nonagle |= TCP_NAGLE_PUSH;
2303 tcp_push_pending_frames(sk);
2304 }
2305 break;
2306
2307 case TCP_KEEPIDLE:
2308 if (val < 1 || val > MAX_TCP_KEEPIDLE)
2309 err = -EINVAL;
2310 else {
2311 tp->keepalive_time = val * HZ;
2312 if (sock_flag(sk, SOCK_KEEPOPEN) &&
2313 !((1 << sk->sk_state) &
2314 (TCPF_CLOSE | TCPF_LISTEN))) {
2315 u32 elapsed = keepalive_time_elapsed(tp);
2316 if (tp->keepalive_time > elapsed)
2317 elapsed = tp->keepalive_time - elapsed;
2318 else
2319 elapsed = 0;
2320 inet_csk_reset_keepalive_timer(sk, elapsed);
2321 }
2322 }
2323 break;
2324 case TCP_KEEPINTVL:
2325 if (val < 1 || val > MAX_TCP_KEEPINTVL)
2326 err = -EINVAL;
2327 else
2328 tp->keepalive_intvl = val * HZ;
2329 break;
2330 case TCP_KEEPCNT:
2331 if (val < 1 || val > MAX_TCP_KEEPCNT)
2332 err = -EINVAL;
2333 else
2334 tp->keepalive_probes = val;
2335 break;
2336 case TCP_SYNCNT:
2337 if (val < 1 || val > MAX_TCP_SYNCNT)
2338 err = -EINVAL;
2339 else
2340 icsk->icsk_syn_retries = val;
2341 break;
2342
2343 case TCP_LINGER2:
2344 if (val < 0)
2345 tp->linger2 = -1;
2346 else if (val > sysctl_tcp_fin_timeout / HZ)
2347 tp->linger2 = 0;
2348 else
2349 tp->linger2 = val * HZ;
2350 break;
2351
2352 case TCP_DEFER_ACCEPT:
2353
2354 icsk->icsk_accept_queue.rskq_defer_accept =
2355 secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
2356 TCP_RTO_MAX / HZ);
2357 break;
2358
2359 case TCP_WINDOW_CLAMP:
2360 if (!val) {
2361 if (sk->sk_state != TCP_CLOSE) {
2362 err = -EINVAL;
2363 break;
2364 }
2365 tp->window_clamp = 0;
2366 } else
2367 tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
2368 SOCK_MIN_RCVBUF / 2 : val;
2369 break;
2370
2371 case TCP_QUICKACK:
2372 if (!val) {
2373 icsk->icsk_ack.pingpong = 1;
2374 } else {
2375 icsk->icsk_ack.pingpong = 0;
2376 if ((1 << sk->sk_state) &
2377 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
2378 inet_csk_ack_scheduled(sk)) {
2379 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
2380 tcp_cleanup_rbuf(sk, 1);
2381 if (!(val & 1))
2382 icsk->icsk_ack.pingpong = 1;
2383 }
2384 }
2385 break;
2386
2387#ifdef CONFIG_TCP_MD5SIG
2388 case TCP_MD5SIG:
2389
2390 err = tp->af_specific->md5_parse(sk, optval, optlen);
2391 break;
2392#endif
2393 case TCP_USER_TIMEOUT:
2394
2395
2396
2397 icsk->icsk_user_timeout = msecs_to_jiffies(val);
2398 break;
2399 default:
2400 err = -ENOPROTOOPT;
2401 break;
2402 }
2403
2404 release_sock(sk);
2405 return err;
2406}
2407
2408int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
2409 unsigned int optlen)
2410{
2411 struct inet_connection_sock *icsk = inet_csk(sk);
2412
2413 if (level != SOL_TCP)
2414 return icsk->icsk_af_ops->setsockopt(sk, level, optname,
2415 optval, optlen);
2416 return do_tcp_setsockopt(sk, level, optname, optval, optlen);
2417}
2418EXPORT_SYMBOL(tcp_setsockopt);
2419
2420#ifdef CONFIG_COMPAT
2421int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
2422 char __user *optval, unsigned int optlen)
2423{
2424 if (level != SOL_TCP)
2425 return inet_csk_compat_setsockopt(sk, level, optname,
2426 optval, optlen);
2427 return do_tcp_setsockopt(sk, level, optname, optval, optlen);
2428}
2429EXPORT_SYMBOL(compat_tcp_setsockopt);
2430#endif
2431
2432
2433void tcp_get_info(struct sock *sk, struct tcp_info *info)
2434{
2435 struct tcp_sock *tp = tcp_sk(sk);
2436 const struct inet_connection_sock *icsk = inet_csk(sk);
2437 u32 now = tcp_time_stamp;
2438
2439 memset(info, 0, sizeof(*info));
2440
2441 info->tcpi_state = sk->sk_state;
2442 info->tcpi_ca_state = icsk->icsk_ca_state;
2443 info->tcpi_retransmits = icsk->icsk_retransmits;
2444 info->tcpi_probes = icsk->icsk_probes_out;
2445 info->tcpi_backoff = icsk->icsk_backoff;
2446
2447 if (tp->rx_opt.tstamp_ok)
2448 info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
2449 if (tcp_is_sack(tp))
2450 info->tcpi_options |= TCPI_OPT_SACK;
2451 if (tp->rx_opt.wscale_ok) {
2452 info->tcpi_options |= TCPI_OPT_WSCALE;
2453 info->tcpi_snd_wscale = tp->rx_opt.snd_wscale;
2454 info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale;
2455 }
2456
2457 if (tp->ecn_flags&TCP_ECN_OK)
2458 info->tcpi_options |= TCPI_OPT_ECN;
2459
2460 info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto);
2461 info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato);
2462 info->tcpi_snd_mss = tp->mss_cache;
2463 info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
2464
2465 if (sk->sk_state == TCP_LISTEN) {
2466 info->tcpi_unacked = sk->sk_ack_backlog;
2467 info->tcpi_sacked = sk->sk_max_ack_backlog;
2468 } else {
2469 info->tcpi_unacked = tp->packets_out;
2470 info->tcpi_sacked = tp->sacked_out;
2471 }
2472 info->tcpi_lost = tp->lost_out;
2473 info->tcpi_retrans = tp->retrans_out;
2474 info->tcpi_fackets = tp->fackets_out;
2475
2476 info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
2477 info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime);
2478 info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
2479
2480 info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
2481 info->tcpi_rcv_ssthresh = tp->rcv_ssthresh;
2482 info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3;
2483 info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2;
2484 info->tcpi_snd_ssthresh = tp->snd_ssthresh;
2485 info->tcpi_snd_cwnd = tp->snd_cwnd;
2486 info->tcpi_advmss = tp->advmss;
2487 info->tcpi_reordering = tp->reordering;
2488
2489 info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3;
2490 info->tcpi_rcv_space = tp->rcvq_space.space;
2491
2492 info->tcpi_total_retrans = tp->total_retrans;
2493}
2494EXPORT_SYMBOL_GPL(tcp_get_info);
2495
2496static int do_tcp_getsockopt(struct sock *sk, int level,
2497 int optname, char __user *optval, int __user *optlen)
2498{
2499 struct inet_connection_sock *icsk = inet_csk(sk);
2500 struct tcp_sock *tp = tcp_sk(sk);
2501 int val, len;
2502
2503 if (get_user(len, optlen))
2504 return -EFAULT;
2505
2506 len = min_t(unsigned int, len, sizeof(int));
2507
2508 if (len < 0)
2509 return -EINVAL;
2510
2511 switch (optname) {
2512 case TCP_MAXSEG:
2513 val = tp->mss_cache;
2514 if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
2515 val = tp->rx_opt.user_mss;
2516 break;
2517 case TCP_NODELAY:
2518 val = !!(tp->nonagle&TCP_NAGLE_OFF);
2519 break;
2520 case TCP_CORK:
2521 val = !!(tp->nonagle&TCP_NAGLE_CORK);
2522 break;
2523 case TCP_KEEPIDLE:
2524 val = keepalive_time_when(tp) / HZ;
2525 break;
2526 case TCP_KEEPINTVL:
2527 val = keepalive_intvl_when(tp) / HZ;
2528 break;
2529 case TCP_KEEPCNT:
2530 val = keepalive_probes(tp);
2531 break;
2532 case TCP_SYNCNT:
2533 val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
2534 break;
2535 case TCP_LINGER2:
2536 val = tp->linger2;
2537 if (val >= 0)
2538 val = (val ? : sysctl_tcp_fin_timeout) / HZ;
2539 break;
2540 case TCP_DEFER_ACCEPT:
2541 val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
2542 TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ);
2543 break;
2544 case TCP_WINDOW_CLAMP:
2545 val = tp->window_clamp;
2546 break;
2547 case TCP_INFO: {
2548 struct tcp_info info;
2549
2550 if (get_user(len, optlen))
2551 return -EFAULT;
2552
2553 tcp_get_info(sk, &info);
2554
2555 len = min_t(unsigned int, len, sizeof(info));
2556 if (put_user(len, optlen))
2557 return -EFAULT;
2558 if (copy_to_user(optval, &info, len))
2559 return -EFAULT;
2560 return 0;
2561 }
2562 case TCP_QUICKACK:
2563 val = !icsk->icsk_ack.pingpong;
2564 break;
2565
2566 case TCP_CONGESTION:
2567 if (get_user(len, optlen))
2568 return -EFAULT;
2569 len = min_t(unsigned int, len, TCP_CA_NAME_MAX);
2570 if (put_user(len, optlen))
2571 return -EFAULT;
2572 if (copy_to_user(optval, icsk->icsk_ca_ops->name, len))
2573 return -EFAULT;
2574 return 0;
2575
2576 case TCP_COOKIE_TRANSACTIONS: {
2577 struct tcp_cookie_transactions ctd;
2578 struct tcp_cookie_values *cvp = tp->cookie_values;
2579
2580 if (get_user(len, optlen))
2581 return -EFAULT;
2582 if (len < sizeof(ctd))
2583 return -EINVAL;
2584
2585 memset(&ctd, 0, sizeof(ctd));
2586 ctd.tcpct_flags = (tp->rx_opt.cookie_in_always ?
2587 TCP_COOKIE_IN_ALWAYS : 0)
2588 | (tp->rx_opt.cookie_out_never ?
2589 TCP_COOKIE_OUT_NEVER : 0);
2590
2591 if (cvp != NULL) {
2592 ctd.tcpct_flags |= (cvp->s_data_in ?
2593 TCP_S_DATA_IN : 0)
2594 | (cvp->s_data_out ?
2595 TCP_S_DATA_OUT : 0);
2596
2597 ctd.tcpct_cookie_desired = cvp->cookie_desired;
2598 ctd.tcpct_s_data_desired = cvp->s_data_desired;
2599
2600 memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0],
2601 cvp->cookie_pair_size);
2602 ctd.tcpct_used = cvp->cookie_pair_size;
2603 }
2604
2605 if (put_user(sizeof(ctd), optlen))
2606 return -EFAULT;
2607 if (copy_to_user(optval, &ctd, sizeof(ctd)))
2608 return -EFAULT;
2609 return 0;
2610 }
2611 case TCP_THIN_LINEAR_TIMEOUTS:
2612 val = tp->thin_lto;
2613 break;
2614 case TCP_THIN_DUPACK:
2615 val = tp->thin_dupack;
2616 break;
2617
2618 case TCP_USER_TIMEOUT:
2619 val = jiffies_to_msecs(icsk->icsk_user_timeout);
2620 break;
2621 default:
2622 return -ENOPROTOOPT;
2623 }
2624
2625 if (put_user(len, optlen))
2626 return -EFAULT;
2627 if (copy_to_user(optval, &val, len))
2628 return -EFAULT;
2629 return 0;
2630}
2631
2632int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
2633 int __user *optlen)
2634{
2635 struct inet_connection_sock *icsk = inet_csk(sk);
2636
2637 if (level != SOL_TCP)
2638 return icsk->icsk_af_ops->getsockopt(sk, level, optname,
2639 optval, optlen);
2640 return do_tcp_getsockopt(sk, level, optname, optval, optlen);
2641}
2642EXPORT_SYMBOL(tcp_getsockopt);
2643
2644#ifdef CONFIG_COMPAT
2645int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
2646 char __user *optval, int __user *optlen)
2647{
2648 if (level != SOL_TCP)
2649 return inet_csk_compat_getsockopt(sk, level, optname,
2650 optval, optlen);
2651 return do_tcp_getsockopt(sk, level, optname, optval, optlen);
2652}
2653EXPORT_SYMBOL(compat_tcp_getsockopt);
2654#endif
2655
2656struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features)
2657{
2658 struct sk_buff *segs = ERR_PTR(-EINVAL);
2659 struct tcphdr *th;
2660 unsigned thlen;
2661 unsigned int seq;
2662 __be32 delta;
2663 unsigned int oldlen;
2664 unsigned int mss;
2665
2666 if (!pskb_may_pull(skb, sizeof(*th)))
2667 goto out;
2668
2669 th = tcp_hdr(skb);
2670 thlen = th->doff * 4;
2671 if (thlen < sizeof(*th))
2672 goto out;
2673
2674 if (!pskb_may_pull(skb, thlen))
2675 goto out;
2676
2677 oldlen = (u16)~skb->len;
2678 __skb_pull(skb, thlen);
2679
2680 mss = skb_shinfo(skb)->gso_size;
2681 if (unlikely(skb->len <= mss))
2682 goto out;
2683
2684 if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
2685
2686 int type = skb_shinfo(skb)->gso_type;
2687
2688 if (unlikely(type &
2689 ~(SKB_GSO_TCPV4 |
2690 SKB_GSO_DODGY |
2691 SKB_GSO_TCP_ECN |
2692 SKB_GSO_TCPV6 |
2693 0) ||
2694 !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
2695 goto out;
2696
2697 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
2698
2699 segs = NULL;
2700 goto out;
2701 }
2702
2703 segs = skb_segment(skb, features);
2704 if (IS_ERR(segs))
2705 goto out;
2706
2707 delta = htonl(oldlen + (thlen + mss));
2708
2709 skb = segs;
2710 th = tcp_hdr(skb);
2711 seq = ntohl(th->seq);
2712
2713 do {
2714 th->fin = th->psh = 0;
2715
2716 th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
2717 (__force u32)delta));
2718 if (skb->ip_summed != CHECKSUM_PARTIAL)
2719 th->check =
2720 csum_fold(csum_partial(skb_transport_header(skb),
2721 thlen, skb->csum));
2722
2723 seq += mss;
2724 skb = skb->next;
2725 th = tcp_hdr(skb);
2726
2727 th->seq = htonl(seq);
2728 th->cwr = 0;
2729 } while (skb->next);
2730
2731 delta = htonl(oldlen + (skb->tail - skb->transport_header) +
2732 skb->data_len);
2733 th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
2734 (__force u32)delta));
2735 if (skb->ip_summed != CHECKSUM_PARTIAL)
2736 th->check = csum_fold(csum_partial(skb_transport_header(skb),
2737 thlen, skb->csum));
2738
2739out:
2740 return segs;
2741}
2742EXPORT_SYMBOL(tcp_tso_segment);
2743
2744struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2745{
2746 struct sk_buff **pp = NULL;
2747 struct sk_buff *p;
2748 struct tcphdr *th;
2749 struct tcphdr *th2;
2750 unsigned int len;
2751 unsigned int thlen;
2752 __be32 flags;
2753 unsigned int mss = 1;
2754 unsigned int hlen;
2755 unsigned int off;
2756 int flush = 1;
2757 int i;
2758
2759 off = skb_gro_offset(skb);
2760 hlen = off + sizeof(*th);
2761 th = skb_gro_header_fast(skb, off);
2762 if (skb_gro_header_hard(skb, hlen)) {
2763 th = skb_gro_header_slow(skb, hlen, off);
2764 if (unlikely(!th))
2765 goto out;
2766 }
2767
2768 thlen = th->doff * 4;
2769 if (thlen < sizeof(*th))
2770 goto out;
2771
2772 hlen = off + thlen;
2773 if (skb_gro_header_hard(skb, hlen)) {
2774 th = skb_gro_header_slow(skb, hlen, off);
2775 if (unlikely(!th))
2776 goto out;
2777 }
2778
2779 skb_gro_pull(skb, thlen);
2780
2781 len = skb_gro_len(skb);
2782 flags = tcp_flag_word(th);
2783
2784 for (; (p = *head); head = &p->next) {
2785 if (!NAPI_GRO_CB(p)->same_flow)
2786 continue;
2787
2788 th2 = tcp_hdr(p);
2789
2790 if (*(u32 *)&th->source ^ *(u32 *)&th2->source) {
2791 NAPI_GRO_CB(p)->same_flow = 0;
2792 continue;
2793 }
2794
2795 goto found;
2796 }
2797
2798 goto out_check_final;
2799
2800found:
2801 flush = NAPI_GRO_CB(p)->flush;
2802 flush |= (__force int)(flags & TCP_FLAG_CWR);
2803 flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
2804 ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));
2805 flush |= (__force int)(th->ack_seq ^ th2->ack_seq);
2806 for (i = sizeof(*th); i < thlen; i += 4)
2807 flush |= *(u32 *)((u8 *)th + i) ^
2808 *(u32 *)((u8 *)th2 + i);
2809
2810 mss = skb_shinfo(p)->gso_size;
2811
2812 flush |= (len - 1) >= mss;
2813 flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
2814
2815 if (flush || skb_gro_receive(head, skb)) {
2816 mss = 1;
2817 goto out_check_final;
2818 }
2819
2820 p = *head;
2821 th2 = tcp_hdr(p);
2822 tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
2823
2824out_check_final:
2825 flush = len < mss;
2826 flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH |
2827 TCP_FLAG_RST | TCP_FLAG_SYN |
2828 TCP_FLAG_FIN));
2829
2830 if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))
2831 pp = head;
2832
2833out:
2834 NAPI_GRO_CB(skb)->flush |= flush;
2835
2836 return pp;
2837}
2838EXPORT_SYMBOL(tcp_gro_receive);
2839
2840int tcp_gro_complete(struct sk_buff *skb)
2841{
2842 struct tcphdr *th = tcp_hdr(skb);
2843
2844 skb->csum_start = skb_transport_header(skb) - skb->head;
2845 skb->csum_offset = offsetof(struct tcphdr, check);
2846 skb->ip_summed = CHECKSUM_PARTIAL;
2847
2848 skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
2849
2850 if (th->cwr)
2851 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
2852
2853 return 0;
2854}
2855EXPORT_SYMBOL(tcp_gro_complete);
2856
2857#ifdef CONFIG_TCP_MD5SIG
2858static unsigned long tcp_md5sig_users;
2859static struct tcp_md5sig_pool * __percpu *tcp_md5sig_pool;
2860static DEFINE_SPINLOCK(tcp_md5sig_pool_lock);
2861
2862static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool)
2863{
2864 int cpu;
2865 for_each_possible_cpu(cpu) {
2866 struct tcp_md5sig_pool *p = *per_cpu_ptr(pool, cpu);
2867 if (p) {
2868 if (p->md5_desc.tfm)
2869 crypto_free_hash(p->md5_desc.tfm);
2870 kfree(p);
2871 }
2872 }
2873 free_percpu(pool);
2874}
2875
2876void tcp_free_md5sig_pool(void)
2877{
2878 struct tcp_md5sig_pool * __percpu *pool = NULL;
2879
2880 spin_lock_bh(&tcp_md5sig_pool_lock);
2881 if (--tcp_md5sig_users == 0) {
2882 pool = tcp_md5sig_pool;
2883 tcp_md5sig_pool = NULL;
2884 }
2885 spin_unlock_bh(&tcp_md5sig_pool_lock);
2886 if (pool)
2887 __tcp_free_md5sig_pool(pool);
2888}
2889EXPORT_SYMBOL(tcp_free_md5sig_pool);
2890
2891static struct tcp_md5sig_pool * __percpu *
2892__tcp_alloc_md5sig_pool(struct sock *sk)
2893{
2894 int cpu;
2895 struct tcp_md5sig_pool * __percpu *pool;
2896
2897 pool = alloc_percpu(struct tcp_md5sig_pool *);
2898 if (!pool)
2899 return NULL;
2900
2901 for_each_possible_cpu(cpu) {
2902 struct tcp_md5sig_pool *p;
2903 struct crypto_hash *hash;
2904
2905 p = kzalloc(sizeof(*p), sk->sk_allocation);
2906 if (!p)
2907 goto out_free;
2908 *per_cpu_ptr(pool, cpu) = p;
2909
2910 hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
2911 if (!hash || IS_ERR(hash))
2912 goto out_free;
2913
2914 p->md5_desc.tfm = hash;
2915 }
2916 return pool;
2917out_free:
2918 __tcp_free_md5sig_pool(pool);
2919 return NULL;
2920}
2921
2922struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *sk)
2923{
2924 struct tcp_md5sig_pool * __percpu *pool;
2925 int alloc = 0;
2926
2927retry:
2928 spin_lock_bh(&tcp_md5sig_pool_lock);
2929 pool = tcp_md5sig_pool;
2930 if (tcp_md5sig_users++ == 0) {
2931 alloc = 1;
2932 spin_unlock_bh(&tcp_md5sig_pool_lock);
2933 } else if (!pool) {
2934 tcp_md5sig_users--;
2935 spin_unlock_bh(&tcp_md5sig_pool_lock);
2936 cpu_relax();
2937 goto retry;
2938 } else
2939 spin_unlock_bh(&tcp_md5sig_pool_lock);
2940
2941 if (alloc) {
2942
2943 struct tcp_md5sig_pool * __percpu *p;
2944
2945 p = __tcp_alloc_md5sig_pool(sk);
2946 spin_lock_bh(&tcp_md5sig_pool_lock);
2947 if (!p) {
2948 tcp_md5sig_users--;
2949 spin_unlock_bh(&tcp_md5sig_pool_lock);
2950 return NULL;
2951 }
2952 pool = tcp_md5sig_pool;
2953 if (pool) {
2954
2955 spin_unlock_bh(&tcp_md5sig_pool_lock);
2956 __tcp_free_md5sig_pool(p);
2957 } else {
2958 tcp_md5sig_pool = pool = p;
2959 spin_unlock_bh(&tcp_md5sig_pool_lock);
2960 }
2961 }
2962 return pool;
2963}
2964EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
2975{
2976 struct tcp_md5sig_pool * __percpu *p;
2977
2978 local_bh_disable();
2979
2980 spin_lock(&tcp_md5sig_pool_lock);
2981 p = tcp_md5sig_pool;
2982 if (p)
2983 tcp_md5sig_users++;
2984 spin_unlock(&tcp_md5sig_pool_lock);
2985
2986 if (p)
2987 return *this_cpu_ptr(p);
2988
2989 local_bh_enable();
2990 return NULL;
2991}
2992EXPORT_SYMBOL(tcp_get_md5sig_pool);
2993
2994void tcp_put_md5sig_pool(void)
2995{
2996 local_bh_enable();
2997 tcp_free_md5sig_pool();
2998}
2999EXPORT_SYMBOL(tcp_put_md5sig_pool);
3000
3001int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
3002 struct tcphdr *th)
3003{
3004 struct scatterlist sg;
3005 int err;
3006
3007 __sum16 old_checksum = th->check;
3008 th->check = 0;
3009
3010 sg_init_one(&sg, th, sizeof(struct tcphdr));
3011 err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(struct tcphdr));
3012 th->check = old_checksum;
3013 return err;
3014}
3015EXPORT_SYMBOL(tcp_md5_hash_header);
3016
3017int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
3018 struct sk_buff *skb, unsigned header_len)
3019{
3020 struct scatterlist sg;
3021 const struct tcphdr *tp = tcp_hdr(skb);
3022 struct hash_desc *desc = &hp->md5_desc;
3023 unsigned i;
3024 const unsigned head_data_len = skb_headlen(skb) > header_len ?
3025 skb_headlen(skb) - header_len : 0;
3026 const struct skb_shared_info *shi = skb_shinfo(skb);
3027 struct sk_buff *frag_iter;
3028
3029 sg_init_table(&sg, 1);
3030
3031 sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len);
3032 if (crypto_hash_update(desc, &sg, head_data_len))
3033 return 1;
3034
3035 for (i = 0; i < shi->nr_frags; ++i) {
3036 const struct skb_frag_struct *f = &shi->frags[i];
3037 sg_set_page(&sg, f->page, f->size, f->page_offset);
3038 if (crypto_hash_update(desc, &sg, f->size))
3039 return 1;
3040 }
3041
3042 skb_walk_frags(skb, frag_iter)
3043 if (tcp_md5_hash_skb_data(hp, frag_iter, 0))
3044 return 1;
3045
3046 return 0;
3047}
3048EXPORT_SYMBOL(tcp_md5_hash_skb_data);
3049
3050int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key)
3051{
3052 struct scatterlist sg;
3053
3054 sg_init_one(&sg, key->key, key->keylen);
3055 return crypto_hash_update(&hp->md5_desc, &sg, key->keylen);
3056}
3057EXPORT_SYMBOL(tcp_md5_hash_key);
3058
3059#endif
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085struct tcp_cookie_secret {
3086
3087
3088
3089
3090
3091 u32 secrets[COOKIE_WORKSPACE_WORDS];
3092 unsigned long expires;
3093};
3094
3095#define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL)
3096#define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2)
3097#define TCP_SECRET_LIFE (HZ * 600)
3098
3099static struct tcp_cookie_secret tcp_secret_one;
3100static struct tcp_cookie_secret tcp_secret_two;
3101
3102
3103static struct tcp_cookie_secret *tcp_secret_generating;
3104static struct tcp_cookie_secret *tcp_secret_primary;
3105static struct tcp_cookie_secret *tcp_secret_retiring;
3106static struct tcp_cookie_secret *tcp_secret_secondary;
3107
3108static DEFINE_SPINLOCK(tcp_secret_locker);
3109
3110
3111
3112static inline u32 tcp_cookie_work(const u32 *ws, const int n)
3113{
3114 return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])];
3115}
3116
3117
3118
3119
3120
3121int tcp_cookie_generator(u32 *bakery)
3122{
3123 unsigned long jiffy = jiffies;
3124
3125 if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) {
3126 spin_lock_bh(&tcp_secret_locker);
3127 if (!time_after_eq(jiffy, tcp_secret_generating->expires)) {
3128
3129 memcpy(bakery,
3130 &tcp_secret_generating->secrets[0],
3131 COOKIE_WORKSPACE_WORDS);
3132 } else {
3133
3134 get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS);
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145 if (unlikely(tcp_secret_primary->expires ==
3146 tcp_secret_secondary->expires)) {
3147 struct timespec tv;
3148
3149 getnstimeofday(&tv);
3150 bakery[COOKIE_DIGEST_WORDS+0] ^=
3151 (u32)tv.tv_nsec;
3152
3153 tcp_secret_secondary->expires = jiffy
3154 + TCP_SECRET_1MSL
3155 + (0x0f & tcp_cookie_work(bakery, 0));
3156 } else {
3157 tcp_secret_secondary->expires = jiffy
3158 + TCP_SECRET_LIFE
3159 + (0xff & tcp_cookie_work(bakery, 1));
3160 tcp_secret_primary->expires = jiffy
3161 + TCP_SECRET_2MSL
3162 + (0x1f & tcp_cookie_work(bakery, 2));
3163 }
3164 memcpy(&tcp_secret_secondary->secrets[0],
3165 bakery, COOKIE_WORKSPACE_WORDS);
3166
3167 rcu_assign_pointer(tcp_secret_generating,
3168 tcp_secret_secondary);
3169 rcu_assign_pointer(tcp_secret_retiring,
3170 tcp_secret_primary);
3171
3172
3173
3174
3175
3176
3177 }
3178 spin_unlock_bh(&tcp_secret_locker);
3179 } else {
3180 rcu_read_lock_bh();
3181 memcpy(bakery,
3182 &rcu_dereference(tcp_secret_generating)->secrets[0],
3183 COOKIE_WORKSPACE_WORDS);
3184 rcu_read_unlock_bh();
3185 }
3186 return 0;
3187}
3188EXPORT_SYMBOL(tcp_cookie_generator);
3189
3190void tcp_done(struct sock *sk)
3191{
3192 if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
3193 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
3194
3195 tcp_set_state(sk, TCP_CLOSE);
3196 tcp_clear_xmit_timers(sk);
3197
3198 sk->sk_shutdown = SHUTDOWN_MASK;
3199
3200 if (!sock_flag(sk, SOCK_DEAD))
3201 sk->sk_state_change(sk);
3202 else
3203 inet_csk_destroy_sock(sk);
3204}
3205EXPORT_SYMBOL_GPL(tcp_done);
3206
3207extern struct tcp_congestion_ops tcp_reno;
3208
3209static __initdata unsigned long thash_entries;
3210static int __init set_thash_entries(char *str)
3211{
3212 if (!str)
3213 return 0;
3214 thash_entries = simple_strtoul(str, &str, 0);
3215 return 1;
3216}
3217__setup("thash_entries=", set_thash_entries);
3218
3219void __init tcp_init(void)
3220{
3221 struct sk_buff *skb = NULL;
3222 unsigned long nr_pages, limit;
3223 int i, max_share, cnt;
3224 unsigned long jiffy = jiffies;
3225
3226 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
3227
3228 percpu_counter_init(&tcp_sockets_allocated, 0);
3229 percpu_counter_init(&tcp_orphan_count, 0);
3230 tcp_hashinfo.bind_bucket_cachep =
3231 kmem_cache_create("tcp_bind_bucket",
3232 sizeof(struct inet_bind_bucket), 0,
3233 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
3234
3235
3236
3237
3238
3239
3240 tcp_hashinfo.ehash =
3241 alloc_large_system_hash("TCP established",
3242 sizeof(struct inet_ehash_bucket),
3243 thash_entries,
3244 (totalram_pages >= 128 * 1024) ?
3245 13 : 15,
3246 0,
3247 NULL,
3248 &tcp_hashinfo.ehash_mask,
3249 thash_entries ? 0 : 512 * 1024);
3250 for (i = 0; i <= tcp_hashinfo.ehash_mask; i++) {
3251 INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i);
3252 INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i);
3253 }
3254 if (inet_ehash_locks_alloc(&tcp_hashinfo))
3255 panic("TCP: failed to alloc ehash_locks");
3256 tcp_hashinfo.bhash =
3257 alloc_large_system_hash("TCP bind",
3258 sizeof(struct inet_bind_hashbucket),
3259 tcp_hashinfo.ehash_mask + 1,
3260 (totalram_pages >= 128 * 1024) ?
3261 13 : 15,
3262 0,
3263 &tcp_hashinfo.bhash_size,
3264 NULL,
3265 64 * 1024);
3266 tcp_hashinfo.bhash_size = 1 << tcp_hashinfo.bhash_size;
3267 for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
3268 spin_lock_init(&tcp_hashinfo.bhash[i].lock);
3269 INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
3270 }
3271
3272
3273 cnt = tcp_hashinfo.ehash_mask + 1;
3274
3275 tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
3276 sysctl_tcp_max_orphans = cnt / 2;
3277 sysctl_max_syn_backlog = max(128, cnt / 256);
3278
3279
3280
3281
3282
3283 nr_pages = totalram_pages - totalhigh_pages;
3284 limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
3285 limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
3286 limit = max(limit, 128UL);
3287 sysctl_tcp_mem[0] = limit / 4 * 3;
3288 sysctl_tcp_mem[1] = limit;
3289 sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;
3290
3291
3292 limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7);
3293 max_share = min(4UL*1024*1024, limit);
3294
3295 sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
3296 sysctl_tcp_wmem[1] = 16*1024;
3297 sysctl_tcp_wmem[2] = max(64*1024, max_share);
3298
3299 sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
3300 sysctl_tcp_rmem[1] = 87380;
3301 sysctl_tcp_rmem[2] = max(87380, max_share);
3302
3303 printk(KERN_INFO "TCP: Hash tables configured "
3304 "(established %u bind %u)\n",
3305 tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
3306
3307 tcp_register_congestion_control(&tcp_reno);
3308
3309 memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets));
3310 memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets));
3311 tcp_secret_one.expires = jiffy;
3312 tcp_secret_two.expires = jiffy;
3313 tcp_secret_generating = &tcp_secret_one;
3314 tcp_secret_primary = &tcp_secret_one;
3315 tcp_secret_retiring = &tcp_secret_two;
3316 tcp_secret_secondary = &tcp_secret_two;
3317}
3318