linux/net/ipv4/tcp_ipv4.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              Implementation of the Transmission Control Protocol(TCP).
   7 *
   8 *              IPv4 specific functions
   9 *
  10 *
  11 *              code split from:
  12 *              linux/ipv4/tcp.c
  13 *              linux/ipv4/tcp_input.c
  14 *              linux/ipv4/tcp_output.c
  15 *
  16 *              See tcp.c for author information
  17 *
  18 *      This program is free software; you can redistribute it and/or
  19 *      modify it under the terms of the GNU General Public License
  20 *      as published by the Free Software Foundation; either version
  21 *      2 of the License, or (at your option) any later version.
  22 */
  23
  24/*
  25 * Changes:
  26 *              David S. Miller :       New socket lookup architecture.
  27 *                                      This code is dedicated to John Dyson.
  28 *              David S. Miller :       Change semantics of established hash,
  29 *                                      half is devoted to TIME_WAIT sockets
  30 *                                      and the rest go in the other half.
  31 *              Andi Kleen :            Add support for syncookies and fixed
  32 *                                      some bugs: ip options weren't passed to
  33 *                                      the TCP layer, missed a check for an
  34 *                                      ACK bit.
  35 *              Andi Kleen :            Implemented fast path mtu discovery.
  36 *                                      Fixed many serious bugs in the
  37 *                                      request_sock handling and moved
  38 *                                      most of it into the af independent code.
  39 *                                      Added tail drop and some other bugfixes.
  40 *                                      Added new listen semantics.
  41 *              Mike McLagan    :       Routing by source
  42 *      Juan Jose Ciarlante:            ip_dynaddr bits
  43 *              Andi Kleen:             various fixes.
  44 *      Vitaly E. Lavrov        :       Transparent proxy revived after year
  45 *                                      coma.
  46 *      Andi Kleen              :       Fix new listen.
  47 *      Andi Kleen              :       Fix accept error reporting.
  48 *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  49 *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  50 *                                      a single port at the same time.
  51 */
  52
  53
  54#include <linux/bottom_half.h>
  55#include <linux/types.h>
  56#include <linux/fcntl.h>
  57#include <linux/module.h>
  58#include <linux/random.h>
  59#include <linux/cache.h>
  60#include <linux/jhash.h>
  61#include <linux/init.h>
  62#include <linux/times.h>
  63
  64#include <net/net_namespace.h>
  65#include <net/icmp.h>
  66#include <net/inet_hashtables.h>
  67#include <net/tcp.h>
  68#include <net/transp_v6.h>
  69#include <net/ipv6.h>
  70#include <net/inet_common.h>
  71#include <net/timewait_sock.h>
  72#include <net/xfrm.h>
  73#include <net/netdma.h>
  74
  75#include <linux/inet.h>
  76#include <linux/ipv6.h>
  77#include <linux/stddef.h>
  78#include <linux/proc_fs.h>
  79#include <linux/seq_file.h>
  80
  81#include <linux/crypto.h>
  82#include <linux/scatterlist.h>
  83
  84int sysctl_tcp_tw_reuse __read_mostly;
  85int sysctl_tcp_low_latency __read_mostly;
  86
  87
  88#ifdef CONFIG_TCP_MD5SIG
  89static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
  90                                                   __be32 addr);
  91static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
  92                               __be32 daddr, __be32 saddr, struct tcphdr *th);
  93#else
  94static inline
  95struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
  96{
  97        return NULL;
  98}
  99#endif
 100
 101struct inet_hashinfo tcp_hashinfo;
 102
 103static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
 104{
 105        return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
 106                                          ip_hdr(skb)->saddr,
 107                                          tcp_hdr(skb)->dest,
 108                                          tcp_hdr(skb)->source);
 109}
 110
 111int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 112{
 113        const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
 114        struct tcp_sock *tp = tcp_sk(sk);
 115
 116        /* With PAWS, it is safe from the viewpoint
 117           of data integrity. Even without PAWS it is safe provided sequence
 118           spaces do not overlap i.e. at data rates <= 80Mbit/sec.
 119
 120           Actually, the idea is close to VJ's one, only timestamp cache is
 121           held not per host, but per port pair and TW bucket is used as state
 122           holder.
 123
 124           If TW bucket has been already destroyed we fall back to VJ's scheme
 125           and use initial timestamp retrieved from peer table.
 126         */
 127        if (tcptw->tw_ts_recent_stamp &&
 128            (twp == NULL || (sysctl_tcp_tw_reuse &&
 129                             get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
 130                tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
 131                if (tp->write_seq == 0)
 132                        tp->write_seq = 1;
 133                tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
 134                tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
 135                sock_hold(sktw);
 136                return 1;
 137        }
 138
 139        return 0;
 140}
 141
 142EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 143
 144/* This will initiate an outgoing connection. */
 145int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 146{
 147        struct inet_sock *inet = inet_sk(sk);
 148        struct tcp_sock *tp = tcp_sk(sk);
 149        struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
 150        struct rtable *rt;
 151        __be32 daddr, nexthop;
 152        int tmp;
 153        int err;
 154
 155        if (addr_len < sizeof(struct sockaddr_in))
 156                return -EINVAL;
 157
 158        if (usin->sin_family != AF_INET)
 159                return -EAFNOSUPPORT;
 160
 161        nexthop = daddr = usin->sin_addr.s_addr;
 162        if (inet->opt && inet->opt->srr) {
 163                if (!daddr)
 164                        return -EINVAL;
 165                nexthop = inet->opt->faddr;
 166        }
 167
 168        tmp = ip_route_connect(&rt, nexthop, inet->saddr,
 169                               RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
 170                               IPPROTO_TCP,
 171                               inet->sport, usin->sin_port, sk, 1);
 172        if (tmp < 0) {
 173                if (tmp == -ENETUNREACH)
 174                        IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
 175                return tmp;
 176        }
 177
 178        if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
 179                ip_rt_put(rt);
 180                return -ENETUNREACH;
 181        }
 182
 183        if (!inet->opt || !inet->opt->srr)
 184                daddr = rt->rt_dst;
 185
 186        if (!inet->saddr)
 187                inet->saddr = rt->rt_src;
 188        inet->rcv_saddr = inet->saddr;
 189
 190        if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
 191                /* Reset inherited state */
 192                tp->rx_opt.ts_recent       = 0;
 193                tp->rx_opt.ts_recent_stamp = 0;
 194                tp->write_seq              = 0;
 195        }
 196
 197        if (tcp_death_row.sysctl_tw_recycle &&
 198            !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
 199                struct inet_peer *peer = rt_get_peer(rt);
 200                /*
 201                 * VJ's idea. We save last timestamp seen from
 202                 * the destination in peer table, when entering state
 203                 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
 204                 * when trying new connection.
 205                 */
 206                if (peer != NULL &&
 207                    peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
 208                        tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
 209                        tp->rx_opt.ts_recent = peer->tcp_ts;
 210                }
 211        }
 212
 213        inet->dport = usin->sin_port;
 214        inet->daddr = daddr;
 215
 216        inet_csk(sk)->icsk_ext_hdr_len = 0;
 217        if (inet->opt)
 218                inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
 219
 220        tp->rx_opt.mss_clamp = 536;
 221
 222        /* Socket identity is still unknown (sport may be zero).
 223         * However we set state to SYN-SENT and not releasing socket
 224         * lock select source port, enter ourselves into the hash tables and
 225         * complete initialization after this.
 226         */
 227        tcp_set_state(sk, TCP_SYN_SENT);
 228        err = inet_hash_connect(&tcp_death_row, sk);
 229        if (err)
 230                goto failure;
 231
 232        err = ip_route_newports(&rt, IPPROTO_TCP,
 233                                inet->sport, inet->dport, sk);
 234        if (err)
 235                goto failure;
 236
 237        /* OK, now commit destination to socket.  */
 238        sk->sk_gso_type = SKB_GSO_TCPV4;
 239        sk_setup_caps(sk, &rt->u.dst);
 240
 241        if (!tp->write_seq)
 242                tp->write_seq = secure_tcp_sequence_number(inet->saddr,
 243                                                           inet->daddr,
 244                                                           inet->sport,
 245                                                           usin->sin_port);
 246
 247        inet->id = tp->write_seq ^ jiffies;
 248
 249        err = tcp_connect(sk);
 250        rt = NULL;
 251        if (err)
 252                goto failure;
 253
 254        return 0;
 255
 256failure:
 257        /*
 258         * This unhashes the socket and releases the local port,
 259         * if necessary.
 260         */
 261        tcp_set_state(sk, TCP_CLOSE);
 262        ip_rt_put(rt);
 263        sk->sk_route_caps = 0;
 264        inet->dport = 0;
 265        return err;
 266}
 267
 268/*
 269 * This routine does path mtu discovery as defined in RFC1191.
 270 */
 271static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
 272{
 273        struct dst_entry *dst;
 274        struct inet_sock *inet = inet_sk(sk);
 275
 276        /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
 277         * send out by Linux are always <576bytes so they should go through
 278         * unfragmented).
 279         */
 280        if (sk->sk_state == TCP_LISTEN)
 281                return;
 282
 283        /* We don't check in the destentry if pmtu discovery is forbidden
 284         * on this route. We just assume that no packet_to_big packets
 285         * are send back when pmtu discovery is not active.
 286         * There is a small race when the user changes this flag in the
 287         * route, but I think that's acceptable.
 288         */
 289        if ((dst = __sk_dst_check(sk, 0)) == NULL)
 290                return;
 291
 292        dst->ops->update_pmtu(dst, mtu);
 293
 294        /* Something is about to be wrong... Remember soft error
 295         * for the case, if this connection will not able to recover.
 296         */
 297        if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
 298                sk->sk_err_soft = EMSGSIZE;
 299
 300        mtu = dst_mtu(dst);
 301
 302        if (inet->pmtudisc != IP_PMTUDISC_DONT &&
 303            inet_csk(sk)->icsk_pmtu_cookie > mtu) {
 304                tcp_sync_mss(sk, mtu);
 305
 306                /* Resend the TCP packet because it's
 307                 * clear that the old packet has been
 308                 * dropped. This is the new "fast" path mtu
 309                 * discovery.
 310                 */
 311                tcp_simple_retransmit(sk);
 312        } /* else let the usual retransmit timer handle it */
 313}
 314
 315/*
 316 * This routine is called by the ICMP module when it gets some
 317 * sort of error condition.  If err < 0 then the socket should
 318 * be closed and the error returned to the user.  If err > 0
 319 * it's just the icmp type << 8 | icmp code.  After adjustment
 320 * header points to the first 8 bytes of the tcp header.  We need
 321 * to find the appropriate port.
 322 *
 323 * The locking strategy used here is very "optimistic". When
 324 * someone else accesses the socket the ICMP is just dropped
 325 * and for some paths there is no check at all.
 326 * A more general error queue to queue errors for later handling
 327 * is probably better.
 328 *
 329 */
 330
 331void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 332{
 333        struct iphdr *iph = (struct iphdr *)icmp_skb->data;
 334        struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
 335        struct inet_connection_sock *icsk;
 336        struct tcp_sock *tp;
 337        struct inet_sock *inet;
 338        const int type = icmp_hdr(icmp_skb)->type;
 339        const int code = icmp_hdr(icmp_skb)->code;
 340        struct sock *sk;
 341        struct sk_buff *skb;
 342        __u32 seq;
 343        __u32 remaining;
 344        int err;
 345        struct net *net = dev_net(icmp_skb->dev);
 346
 347        if (icmp_skb->len < (iph->ihl << 2) + 8) {
 348                ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 349                return;
 350        }
 351
 352        sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
 353                        iph->saddr, th->source, inet_iif(icmp_skb));
 354        if (!sk) {
 355                ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 356                return;
 357        }
 358        if (sk->sk_state == TCP_TIME_WAIT) {
 359                inet_twsk_put(inet_twsk(sk));
 360                return;
 361        }
 362
 363        bh_lock_sock(sk);
 364        /* If too many ICMPs get dropped on busy
 365         * servers this needs to be solved differently.
 366         */
 367        if (sock_owned_by_user(sk))
 368                NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
 369
 370        if (sk->sk_state == TCP_CLOSE)
 371                goto out;
 372
 373        icsk = inet_csk(sk);
 374        tp = tcp_sk(sk);
 375        seq = ntohl(th->seq);
 376        if (sk->sk_state != TCP_LISTEN &&
 377            !between(seq, tp->snd_una, tp->snd_nxt)) {
 378                NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 379                goto out;
 380        }
 381
 382        switch (type) {
 383        case ICMP_SOURCE_QUENCH:
 384                /* Just silently ignore these. */
 385                goto out;
 386        case ICMP_PARAMETERPROB:
 387                err = EPROTO;
 388                break;
 389        case ICMP_DEST_UNREACH:
 390                if (code > NR_ICMP_UNREACH)
 391                        goto out;
 392
 393                if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
 394                        if (!sock_owned_by_user(sk))
 395                                do_pmtu_discovery(sk, iph, info);
 396                        goto out;
 397                }
 398
 399                err = icmp_err_convert[code].errno;
 400                /* check if icmp_skb allows revert of backoff
 401                 * (see draft-zimmermann-tcp-lcd) */
 402                if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
 403                        break;
 404                if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
 405                    !icsk->icsk_backoff)
 406                        break;
 407
 408                icsk->icsk_backoff--;
 409                inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
 410                                         icsk->icsk_backoff;
 411                tcp_bound_rto(sk);
 412
 413                skb = tcp_write_queue_head(sk);
 414                BUG_ON(!skb);
 415
 416                remaining = icsk->icsk_rto - min(icsk->icsk_rto,
 417                                tcp_time_stamp - TCP_SKB_CB(skb)->when);
 418
 419                if (remaining) {
 420                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 421                                                  remaining, TCP_RTO_MAX);
 422                } else if (sock_owned_by_user(sk)) {
 423                        /* RTO revert clocked out retransmission,
 424                         * but socket is locked. Will defer. */
 425                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 426                                                  HZ/20, TCP_RTO_MAX);
 427                } else {
 428                        /* RTO revert clocked out retransmission.
 429                         * Will retransmit now */
 430                        tcp_retransmit_timer(sk);
 431                }
 432
 433                break;
 434        case ICMP_TIME_EXCEEDED:
 435                err = EHOSTUNREACH;
 436                break;
 437        default:
 438                goto out;
 439        }
 440
 441        switch (sk->sk_state) {
 442                struct request_sock *req, **prev;
 443        case TCP_LISTEN:
 444                if (sock_owned_by_user(sk))
 445                        goto out;
 446
 447                req = inet_csk_search_req(sk, &prev, th->dest,
 448                                          iph->daddr, iph->saddr);
 449                if (!req)
 450                        goto out;
 451
 452                /* ICMPs are not backlogged, hence we cannot get
 453                   an established socket here.
 454                 */
 455                WARN_ON(req->sk);
 456
 457                if (seq != tcp_rsk(req)->snt_isn) {
 458                        NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 459                        goto out;
 460                }
 461
 462                /*
 463                 * Still in SYN_RECV, just remove it silently.
 464                 * There is no good way to pass the error to the newly
 465                 * created socket, and POSIX does not want network
 466                 * errors returned from accept().
 467                 */
 468                inet_csk_reqsk_queue_drop(sk, req, prev);
 469                goto out;
 470
 471        case TCP_SYN_SENT:
 472        case TCP_SYN_RECV:  /* Cannot happen.
 473                               It can f.e. if SYNs crossed.
 474                             */
 475                if (!sock_owned_by_user(sk)) {
 476                        sk->sk_err = err;
 477
 478                        sk->sk_error_report(sk);
 479
 480                        tcp_done(sk);
 481                } else {
 482                        sk->sk_err_soft = err;
 483                }
 484                goto out;
 485        }
 486
 487        /* If we've already connected we will keep trying
 488         * until we time out, or the user gives up.
 489         *
 490         * rfc1122 4.2.3.9 allows to consider as hard errors
 491         * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
 492         * but it is obsoleted by pmtu discovery).
 493         *
 494         * Note, that in modern internet, where routing is unreliable
 495         * and in each dark corner broken firewalls sit, sending random
 496         * errors ordered by their masters even this two messages finally lose
 497         * their original sense (even Linux sends invalid PORT_UNREACHs)
 498         *
 499         * Now we are in compliance with RFCs.
 500         *                                                      --ANK (980905)
 501         */
 502
 503        inet = inet_sk(sk);
 504        if (!sock_owned_by_user(sk) && inet->recverr) {
 505                sk->sk_err = err;
 506                sk->sk_error_report(sk);
 507        } else  { /* Only an error on timeout */
 508                sk->sk_err_soft = err;
 509        }
 510
 511out:
 512        bh_unlock_sock(sk);
 513        sock_put(sk);
 514}
 515
 516/* This routine computes an IPv4 TCP checksum. */
 517void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
 518{
 519        struct inet_sock *inet = inet_sk(sk);
 520        struct tcphdr *th = tcp_hdr(skb);
 521
 522        if (skb->ip_summed == CHECKSUM_PARTIAL) {
 523                th->check = ~tcp_v4_check(len, inet->saddr,
 524                                          inet->daddr, 0);
 525                skb->csum_start = skb_transport_header(skb) - skb->head;
 526                skb->csum_offset = offsetof(struct tcphdr, check);
 527        } else {
 528                th->check = tcp_v4_check(len, inet->saddr, inet->daddr,
 529                                         csum_partial(th,
 530                                                      th->doff << 2,
 531                                                      skb->csum));
 532        }
 533}
 534
 535int tcp_v4_gso_send_check(struct sk_buff *skb)
 536{
 537        const struct iphdr *iph;
 538        struct tcphdr *th;
 539
 540        if (!pskb_may_pull(skb, sizeof(*th)))
 541                return -EINVAL;
 542
 543        iph = ip_hdr(skb);
 544        th = tcp_hdr(skb);
 545
 546        th->check = 0;
 547        th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
 548        skb->csum_start = skb_transport_header(skb) - skb->head;
 549        skb->csum_offset = offsetof(struct tcphdr, check);
 550        skb->ip_summed = CHECKSUM_PARTIAL;
 551        return 0;
 552}
 553
 554/*
 555 *      This routine will send an RST to the other tcp.
 556 *
 557 *      Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
 558 *                    for reset.
 559 *      Answer: if a packet caused RST, it is not for a socket
 560 *              existing in our system, if it is matched to a socket,
 561 *              it is just duplicate segment or bug in other side's TCP.
 562 *              So that we build reply only basing on parameters
 563 *              arrived with segment.
 564 *      Exception: precedence violation. We do not implement it in any case.
 565 */
 566
 567static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 568{
 569        struct tcphdr *th = tcp_hdr(skb);
 570        struct {
 571                struct tcphdr th;
 572#ifdef CONFIG_TCP_MD5SIG
 573                __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
 574#endif
 575        } rep;
 576        struct ip_reply_arg arg;
 577#ifdef CONFIG_TCP_MD5SIG
 578        struct tcp_md5sig_key *key;
 579#endif
 580        struct net *net;
 581
 582        /* Never send a reset in response to a reset. */
 583        if (th->rst)
 584                return;
 585
 586        if (skb_rtable(skb)->rt_type != RTN_LOCAL)
 587                return;
 588
 589        /* Swap the send and the receive. */
 590        memset(&rep, 0, sizeof(rep));
 591        rep.th.dest   = th->source;
 592        rep.th.source = th->dest;
 593        rep.th.doff   = sizeof(struct tcphdr) / 4;
 594        rep.th.rst    = 1;
 595
 596        if (th->ack) {
 597                rep.th.seq = th->ack_seq;
 598        } else {
 599                rep.th.ack = 1;
 600                rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
 601                                       skb->len - (th->doff << 2));
 602        }
 603
 604        memset(&arg, 0, sizeof(arg));
 605        arg.iov[0].iov_base = (unsigned char *)&rep;
 606        arg.iov[0].iov_len  = sizeof(rep.th);
 607
 608#ifdef CONFIG_TCP_MD5SIG
 609        key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
 610        if (key) {
 611                rep.opt[0] = htonl((TCPOPT_NOP << 24) |
 612                                   (TCPOPT_NOP << 16) |
 613                                   (TCPOPT_MD5SIG << 8) |
 614                                   TCPOLEN_MD5SIG);
 615                /* Update length and the length the header thinks exists */
 616                arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 617                rep.th.doff = arg.iov[0].iov_len / 4;
 618
 619                tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
 620                                     key, ip_hdr(skb)->saddr,
 621                                     ip_hdr(skb)->daddr, &rep.th);
 622        }
 623#endif
 624        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 625                                      ip_hdr(skb)->saddr, /* XXX */
 626                                      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 627        arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 628        arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
 629
 630        net = dev_net(skb_dst(skb)->dev);
 631        ip_send_reply(net->ipv4.tcp_sock, skb,
 632                      &arg, arg.iov[0].iov_len);
 633
 634        TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 635        TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
 636}
 637
 638/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
 639   outside socket context is ugly, certainly. What can I do?
 640 */
 641
 642static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 643                            u32 win, u32 ts, int oif,
 644                            struct tcp_md5sig_key *key,
 645                            int reply_flags)
 646{
 647        struct tcphdr *th = tcp_hdr(skb);
 648        struct {
 649                struct tcphdr th;
 650                __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
 651#ifdef CONFIG_TCP_MD5SIG
 652                           + (TCPOLEN_MD5SIG_ALIGNED >> 2)
 653#endif
 654                        ];
 655        } rep;
 656        struct ip_reply_arg arg;
 657        struct net *net = dev_net(skb_dst(skb)->dev);
 658
 659        memset(&rep.th, 0, sizeof(struct tcphdr));
 660        memset(&arg, 0, sizeof(arg));
 661
 662        arg.iov[0].iov_base = (unsigned char *)&rep;
 663        arg.iov[0].iov_len  = sizeof(rep.th);
 664        if (ts) {
 665                rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 666                                   (TCPOPT_TIMESTAMP << 8) |
 667                                   TCPOLEN_TIMESTAMP);
 668                rep.opt[1] = htonl(tcp_time_stamp);
 669                rep.opt[2] = htonl(ts);
 670                arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
 671        }
 672
 673        /* Swap the send and the receive. */
 674        rep.th.dest    = th->source;
 675        rep.th.source  = th->dest;
 676        rep.th.doff    = arg.iov[0].iov_len / 4;
 677        rep.th.seq     = htonl(seq);
 678        rep.th.ack_seq = htonl(ack);
 679        rep.th.ack     = 1;
 680        rep.th.window  = htons(win);
 681
 682#ifdef CONFIG_TCP_MD5SIG
 683        if (key) {
 684                int offset = (ts) ? 3 : 0;
 685
 686                rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
 687                                          (TCPOPT_NOP << 16) |
 688                                          (TCPOPT_MD5SIG << 8) |
 689                                          TCPOLEN_MD5SIG);
 690                arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 691                rep.th.doff = arg.iov[0].iov_len/4;
 692
 693                tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
 694                                    key, ip_hdr(skb)->saddr,
 695                                    ip_hdr(skb)->daddr, &rep.th);
 696        }
 697#endif
 698        arg.flags = reply_flags;
 699        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 700                                      ip_hdr(skb)->saddr, /* XXX */
 701                                      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 702        arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 703        if (oif)
 704                arg.bound_dev_if = oif;
 705
 706        ip_send_reply(net->ipv4.tcp_sock, skb,
 707                      &arg, arg.iov[0].iov_len);
 708
 709        TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 710}
 711
 712static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 713{
 714        struct inet_timewait_sock *tw = inet_twsk(sk);
 715        struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 716
 717        tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 718                        tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
 719                        tcptw->tw_ts_recent,
 720                        tw->tw_bound_dev_if,
 721                        tcp_twsk_md5_key(tcptw),
 722                        tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
 723                        );
 724
 725        inet_twsk_put(tw);
 726}
 727
 728static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 729                                  struct request_sock *req)
 730{
 731        tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
 732                        tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
 733                        req->ts_recent,
 734                        0,
 735                        tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
 736                        inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
 737}
 738
 739/*
 740 *      Send a SYN-ACK after having received a SYN.
 741 *      This still operates on a request_sock only, not on a big
 742 *      socket.
 743 */
 744static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
 745                                struct dst_entry *dst)
 746{
 747        const struct inet_request_sock *ireq = inet_rsk(req);
 748        int err = -1;
 749        struct sk_buff * skb;
 750
 751        /* First, grab a route. */
 752        if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
 753                return -1;
 754
 755        skb = tcp_make_synack(sk, dst, req);
 756
 757        if (skb) {
 758                struct tcphdr *th = tcp_hdr(skb);
 759
 760                th->check = tcp_v4_check(skb->len,
 761                                         ireq->loc_addr,
 762                                         ireq->rmt_addr,
 763                                         csum_partial(th, skb->len,
 764                                                      skb->csum));
 765
 766                err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
 767                                            ireq->rmt_addr,
 768                                            ireq->opt);
 769                err = net_xmit_eval(err);
 770        }
 771
 772        dst_release(dst);
 773        return err;
 774}
 775
 776static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req)
 777{
 778        return __tcp_v4_send_synack(sk, req, NULL);
 779}
 780
 781/*
 782 *      IPv4 request_sock destructor.
 783 */
 784static void tcp_v4_reqsk_destructor(struct request_sock *req)
 785{
 786        kfree(inet_rsk(req)->opt);
 787}
 788
 789#ifdef CONFIG_SYN_COOKIES
 790static void syn_flood_warning(struct sk_buff *skb)
 791{
 792        static unsigned long warntime;
 793
 794        if (time_after(jiffies, (warntime + HZ * 60))) {
 795                warntime = jiffies;
 796                printk(KERN_INFO
 797                       "possible SYN flooding on port %d. Sending cookies.\n",
 798                       ntohs(tcp_hdr(skb)->dest));
 799        }
 800}
 801#endif
 802
 803/*
 804 * Save and compile IPv4 options into the request_sock if needed.
 805 */
 806static struct ip_options *tcp_v4_save_options(struct sock *sk,
 807                                              struct sk_buff *skb)
 808{
 809        struct ip_options *opt = &(IPCB(skb)->opt);
 810        struct ip_options *dopt = NULL;
 811
 812        if (opt && opt->optlen) {
 813                int opt_size = optlength(opt);
 814                dopt = kmalloc(opt_size, GFP_ATOMIC);
 815                if (dopt) {
 816                        if (ip_options_echo(dopt, skb)) {
 817                                kfree(dopt);
 818                                dopt = NULL;
 819                        }
 820                }
 821        }
 822        return dopt;
 823}
 824
 825#ifdef CONFIG_TCP_MD5SIG
 826/*
 827 * RFC2385 MD5 checksumming requires a mapping of
 828 * IP address->MD5 Key.
 829 * We need to maintain these in the sk structure.
 830 */
 831
 832/* Find the Key structure for an address.  */
 833static struct tcp_md5sig_key *
 834                        tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
 835{
 836        struct tcp_sock *tp = tcp_sk(sk);
 837        int i;
 838
 839        if (!tp->md5sig_info || !tp->md5sig_info->entries4)
 840                return NULL;
 841        for (i = 0; i < tp->md5sig_info->entries4; i++) {
 842                if (tp->md5sig_info->keys4[i].addr == addr)
 843                        return &tp->md5sig_info->keys4[i].base;
 844        }
 845        return NULL;
 846}
 847
 848struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
 849                                         struct sock *addr_sk)
 850{
 851        return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr);
 852}
 853
 854EXPORT_SYMBOL(tcp_v4_md5_lookup);
 855
 856static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
 857                                                      struct request_sock *req)
 858{
 859        return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
 860}
 861
 862/* This can be called on a newly created socket, from other files */
 863int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
 864                      u8 *newkey, u8 newkeylen)
 865{
 866        /* Add Key to the list */
 867        struct tcp_md5sig_key *key;
 868        struct tcp_sock *tp = tcp_sk(sk);
 869        struct tcp4_md5sig_key *keys;
 870
 871        key = tcp_v4_md5_do_lookup(sk, addr);
 872        if (key) {
 873                /* Pre-existing entry - just update that one. */
 874                kfree(key->key);
 875                key->key = newkey;
 876                key->keylen = newkeylen;
 877        } else {
 878                struct tcp_md5sig_info *md5sig;
 879
 880                if (!tp->md5sig_info) {
 881                        tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
 882                                                  GFP_ATOMIC);
 883                        if (!tp->md5sig_info) {
 884                                kfree(newkey);
 885                                return -ENOMEM;
 886                        }
 887                        sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
 888                }
 889                if (tcp_alloc_md5sig_pool(sk) == NULL) {
 890                        kfree(newkey);
 891                        return -ENOMEM;
 892                }
 893                md5sig = tp->md5sig_info;
 894
 895                if (md5sig->alloced4 == md5sig->entries4) {
 896                        keys = kmalloc((sizeof(*keys) *
 897                                        (md5sig->entries4 + 1)), GFP_ATOMIC);
 898                        if (!keys) {
 899                                kfree(newkey);
 900                                tcp_free_md5sig_pool();
 901                                return -ENOMEM;
 902                        }
 903
 904                        if (md5sig->entries4)
 905                                memcpy(keys, md5sig->keys4,
 906                                       sizeof(*keys) * md5sig->entries4);
 907
 908                        /* Free old key list, and reference new one */
 909                        kfree(md5sig->keys4);
 910                        md5sig->keys4 = keys;
 911                        md5sig->alloced4++;
 912                }
 913                md5sig->entries4++;
 914                md5sig->keys4[md5sig->entries4 - 1].addr        = addr;
 915                md5sig->keys4[md5sig->entries4 - 1].base.key    = newkey;
 916                md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
 917        }
 918        return 0;
 919}
 920
 921EXPORT_SYMBOL(tcp_v4_md5_do_add);
 922
 923static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
 924                               u8 *newkey, u8 newkeylen)
 925{
 926        return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr,
 927                                 newkey, newkeylen);
 928}
 929
 930int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
 931{
 932        struct tcp_sock *tp = tcp_sk(sk);
 933        int i;
 934
 935        for (i = 0; i < tp->md5sig_info->entries4; i++) {
 936                if (tp->md5sig_info->keys4[i].addr == addr) {
 937                        /* Free the key */
 938                        kfree(tp->md5sig_info->keys4[i].base.key);
 939                        tp->md5sig_info->entries4--;
 940
 941                        if (tp->md5sig_info->entries4 == 0) {
 942                                kfree(tp->md5sig_info->keys4);
 943                                tp->md5sig_info->keys4 = NULL;
 944                                tp->md5sig_info->alloced4 = 0;
 945                        } else if (tp->md5sig_info->entries4 != i) {
 946                                /* Need to do some manipulation */
 947                                memmove(&tp->md5sig_info->keys4[i],
 948                                        &tp->md5sig_info->keys4[i+1],
 949                                        (tp->md5sig_info->entries4 - i) *
 950                                         sizeof(struct tcp4_md5sig_key));
 951                        }
 952                        tcp_free_md5sig_pool();
 953                        return 0;
 954                }
 955        }
 956        return -ENOENT;
 957}
 958
 959EXPORT_SYMBOL(tcp_v4_md5_do_del);
 960
 961static void tcp_v4_clear_md5_list(struct sock *sk)
 962{
 963        struct tcp_sock *tp = tcp_sk(sk);
 964
 965        /* Free each key, then the set of key keys,
 966         * the crypto element, and then decrement our
 967         * hold on the last resort crypto.
 968         */
 969        if (tp->md5sig_info->entries4) {
 970                int i;
 971                for (i = 0; i < tp->md5sig_info->entries4; i++)
 972                        kfree(tp->md5sig_info->keys4[i].base.key);
 973                tp->md5sig_info->entries4 = 0;
 974                tcp_free_md5sig_pool();
 975        }
 976        if (tp->md5sig_info->keys4) {
 977                kfree(tp->md5sig_info->keys4);
 978                tp->md5sig_info->keys4 = NULL;
 979                tp->md5sig_info->alloced4  = 0;
 980        }
 981}
 982
 983static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
 984                                 int optlen)
 985{
 986        struct tcp_md5sig cmd;
 987        struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
 988        u8 *newkey;
 989
 990        if (optlen < sizeof(cmd))
 991                return -EINVAL;
 992
 993        if (copy_from_user(&cmd, optval, sizeof(cmd)))
 994                return -EFAULT;
 995
 996        if (sin->sin_family != AF_INET)
 997                return -EINVAL;
 998
 999        if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
1000                if (!tcp_sk(sk)->md5sig_info)
1001                        return -ENOENT;
1002                return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
1003        }
1004
1005        if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1006                return -EINVAL;
1007
1008        if (!tcp_sk(sk)->md5sig_info) {
1009                struct tcp_sock *tp = tcp_sk(sk);
1010                struct tcp_md5sig_info *p;
1011
1012                p = kzalloc(sizeof(*p), sk->sk_allocation);
1013                if (!p)
1014                        return -EINVAL;
1015
1016                tp->md5sig_info = p;
1017                sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1018        }
1019
1020        newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
1021        if (!newkey)
1022                return -ENOMEM;
1023        return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1024                                 newkey, cmd.tcpm_keylen);
1025}
1026
1027static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1028                                        __be32 daddr, __be32 saddr, int nbytes)
1029{
1030        struct tcp4_pseudohdr *bp;
1031        struct scatterlist sg;
1032
1033        bp = &hp->md5_blk.ip4;
1034
1035        /*
1036         * 1. the TCP pseudo-header (in the order: source IP address,
1037         * destination IP address, zero-padded protocol number, and
1038         * segment length)
1039         */
1040        bp->saddr = saddr;
1041        bp->daddr = daddr;
1042        bp->pad = 0;
1043        bp->protocol = IPPROTO_TCP;
1044        bp->len = cpu_to_be16(nbytes);
1045
1046        sg_init_one(&sg, bp, sizeof(*bp));
1047        return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1048}
1049
1050static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1051                               __be32 daddr, __be32 saddr, struct tcphdr *th)
1052{
1053        struct tcp_md5sig_pool *hp;
1054        struct hash_desc *desc;
1055
1056        hp = tcp_get_md5sig_pool();
1057        if (!hp)
1058                goto clear_hash_noput;
1059        desc = &hp->md5_desc;
1060
1061        if (crypto_hash_init(desc))
1062                goto clear_hash;
1063        if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1064                goto clear_hash;
1065        if (tcp_md5_hash_header(hp, th))
1066                goto clear_hash;
1067        if (tcp_md5_hash_key(hp, key))
1068                goto clear_hash;
1069        if (crypto_hash_final(desc, md5_hash))
1070                goto clear_hash;
1071
1072        tcp_put_md5sig_pool();
1073        return 0;
1074
1075clear_hash:
1076        tcp_put_md5sig_pool();
1077clear_hash_noput:
1078        memset(md5_hash, 0, 16);
1079        return 1;
1080}
1081
1082int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1083                        struct sock *sk, struct request_sock *req,
1084                        struct sk_buff *skb)
1085{
1086        struct tcp_md5sig_pool *hp;
1087        struct hash_desc *desc;
1088        struct tcphdr *th = tcp_hdr(skb);
1089        __be32 saddr, daddr;
1090
1091        if (sk) {
1092                saddr = inet_sk(sk)->saddr;
1093                daddr = inet_sk(sk)->daddr;
1094        } else if (req) {
1095                saddr = inet_rsk(req)->loc_addr;
1096                daddr = inet_rsk(req)->rmt_addr;
1097        } else {
1098                const struct iphdr *iph = ip_hdr(skb);
1099                saddr = iph->saddr;
1100                daddr = iph->daddr;
1101        }
1102
1103        hp = tcp_get_md5sig_pool();
1104        if (!hp)
1105                goto clear_hash_noput;
1106        desc = &hp->md5_desc;
1107
1108        if (crypto_hash_init(desc))
1109                goto clear_hash;
1110
1111        if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1112                goto clear_hash;
1113        if (tcp_md5_hash_header(hp, th))
1114                goto clear_hash;
1115        if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1116                goto clear_hash;
1117        if (tcp_md5_hash_key(hp, key))
1118                goto clear_hash;
1119        if (crypto_hash_final(desc, md5_hash))
1120                goto clear_hash;
1121
1122        tcp_put_md5sig_pool();
1123        return 0;
1124
1125clear_hash:
1126        tcp_put_md5sig_pool();
1127clear_hash_noput:
1128        memset(md5_hash, 0, 16);
1129        return 1;
1130}
1131
1132EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1133
1134static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
1135{
1136        /*
1137         * This gets called for each TCP segment that arrives
1138         * so we want to be efficient.
1139         * We have 3 drop cases:
1140         * o No MD5 hash and one expected.
1141         * o MD5 hash and we're not expecting one.
1142         * o MD5 hash and its wrong.
1143         */
1144        __u8 *hash_location = NULL;
1145        struct tcp_md5sig_key *hash_expected;
1146        const struct iphdr *iph = ip_hdr(skb);
1147        struct tcphdr *th = tcp_hdr(skb);
1148        int genhash;
1149        unsigned char newhash[16];
1150
1151        hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1152        hash_location = tcp_parse_md5sig_option(th);
1153
1154        /* We've parsed the options - do we have a hash? */
1155        if (!hash_expected && !hash_location)
1156                return 0;
1157
1158        if (hash_expected && !hash_location) {
1159                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1160                return 1;
1161        }
1162
1163        if (!hash_expected && hash_location) {
1164                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1165                return 1;
1166        }
1167
1168        /* Okay, so this is hash_expected and hash_location -
1169         * so we need to calculate the checksum.
1170         */
1171        genhash = tcp_v4_md5_hash_skb(newhash,
1172                                      hash_expected,
1173                                      NULL, NULL, skb);
1174
1175        if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1176                if (net_ratelimit()) {
1177                        printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1178                               &iph->saddr, ntohs(th->source),
1179                               &iph->daddr, ntohs(th->dest),
1180                               genhash ? " tcp_v4_calc_md5_hash failed" : "");
1181                }
1182                return 1;
1183        }
1184        return 0;
1185}
1186
1187#endif
1188
1189struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1190        .family         =       PF_INET,
1191        .obj_size       =       sizeof(struct tcp_request_sock),
1192        .rtx_syn_ack    =       tcp_v4_send_synack,
1193        .send_ack       =       tcp_v4_reqsk_send_ack,
1194        .destructor     =       tcp_v4_reqsk_destructor,
1195        .send_reset     =       tcp_v4_send_reset,
1196};
1197
1198#ifdef CONFIG_TCP_MD5SIG
1199static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1200        .md5_lookup     =       tcp_v4_reqsk_md5_lookup,
1201        .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1202};
1203#endif
1204
1205static struct timewait_sock_ops tcp_timewait_sock_ops = {
1206        .twsk_obj_size  = sizeof(struct tcp_timewait_sock),
1207        .twsk_unique    = tcp_twsk_unique,
1208        .twsk_destructor= tcp_twsk_destructor,
1209};
1210
1211int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1212{
1213        struct inet_request_sock *ireq;
1214        struct tcp_options_received tmp_opt;
1215        struct request_sock *req;
1216        __be32 saddr = ip_hdr(skb)->saddr;
1217        __be32 daddr = ip_hdr(skb)->daddr;
1218        __u32 isn = TCP_SKB_CB(skb)->when;
1219        struct dst_entry *dst = NULL;
1220#ifdef CONFIG_SYN_COOKIES
1221        int want_cookie = 0;
1222#else
1223#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1224#endif
1225
1226        /* Never answer to SYNs send to broadcast or multicast */
1227        if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1228                goto drop;
1229
1230        /* TW buckets are converted to open requests without
1231         * limitations, they conserve resources and peer is
1232         * evidently real one.
1233         */
1234        if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1235#ifdef CONFIG_SYN_COOKIES
1236                if (sysctl_tcp_syncookies) {
1237                        want_cookie = 1;
1238                } else
1239#endif
1240                goto drop;
1241        }
1242
1243        /* Accept backlog is full. If we have already queued enough
1244         * of warm entries in syn queue, drop request. It is better than
1245         * clogging syn queue with openreqs with exponentially increasing
1246         * timeout.
1247         */
1248        if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1249                goto drop;
1250
1251        req = inet_reqsk_alloc(&tcp_request_sock_ops);
1252        if (!req)
1253                goto drop;
1254
1255#ifdef CONFIG_TCP_MD5SIG
1256        tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1257#endif
1258
1259        tcp_clear_options(&tmp_opt);
1260        tmp_opt.mss_clamp = 536;
1261        tmp_opt.user_mss  = tcp_sk(sk)->rx_opt.user_mss;
1262
1263        tcp_parse_options(skb, &tmp_opt, 0);
1264
1265        if (want_cookie && !tmp_opt.saw_tstamp)
1266                tcp_clear_options(&tmp_opt);
1267
1268        tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1269
1270        tcp_openreq_init(req, &tmp_opt, skb);
1271
1272        ireq = inet_rsk(req);
1273        ireq->loc_addr = daddr;
1274        ireq->rmt_addr = saddr;
1275        ireq->no_srccheck = inet_sk(sk)->transparent;
1276        ireq->opt = tcp_v4_save_options(sk, skb);
1277
1278        if (security_inet_conn_request(sk, skb, req))
1279                goto drop_and_free;
1280
1281        if (!want_cookie)
1282                TCP_ECN_create_request(req, tcp_hdr(skb));
1283
1284        if (want_cookie) {
1285#ifdef CONFIG_SYN_COOKIES
1286                syn_flood_warning(skb);
1287                req->cookie_ts = tmp_opt.tstamp_ok;
1288#endif
1289                isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1290        } else if (!isn) {
1291                struct inet_peer *peer = NULL;
1292
1293                /* VJ's idea. We save last timestamp seen
1294                 * from the destination in peer table, when entering
1295                 * state TIME-WAIT, and check against it before
1296                 * accepting new connection request.
1297                 *
1298                 * If "isn" is not zero, this request hit alive
1299                 * timewait bucket, so that all the necessary checks
1300                 * are made in the function processing timewait state.
1301                 */
1302                if (tmp_opt.saw_tstamp &&
1303                    tcp_death_row.sysctl_tw_recycle &&
1304                    (dst = inet_csk_route_req(sk, req)) != NULL &&
1305                    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1306                    peer->v4daddr == saddr) {
1307                        if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
1308                            (s32)(peer->tcp_ts - req->ts_recent) >
1309                                                        TCP_PAWS_WINDOW) {
1310                                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1311                                goto drop_and_release;
1312                        }
1313                }
1314                /* Kill the following clause, if you dislike this way. */
1315                else if (!sysctl_tcp_syncookies &&
1316                         (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1317                          (sysctl_max_syn_backlog >> 2)) &&
1318                         (!peer || !peer->tcp_ts_stamp) &&
1319                         (!dst || !dst_metric(dst, RTAX_RTT))) {
1320                        /* Without syncookies last quarter of
1321                         * backlog is filled with destinations,
1322                         * proven to be alive.
1323                         * It means that we continue to communicate
1324                         * to destinations, already remembered
1325                         * to the moment of synflood.
1326                         */
1327                        LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1328                                       &saddr, ntohs(tcp_hdr(skb)->source));
1329                        goto drop_and_release;
1330                }
1331
1332                isn = tcp_v4_init_sequence(skb);
1333        }
1334        tcp_rsk(req)->snt_isn = isn;
1335
1336        if (__tcp_v4_send_synack(sk, req, dst) || want_cookie)
1337                goto drop_and_free;
1338
1339        inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1340        return 0;
1341
1342drop_and_release:
1343        dst_release(dst);
1344drop_and_free:
1345        reqsk_free(req);
1346drop:
1347        return 0;
1348}
1349
1350
1351/*
1352 * The three way handshake has completed - we got a valid synack -
1353 * now create the new socket.
1354 */
1355struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1356                                  struct request_sock *req,
1357                                  struct dst_entry *dst)
1358{
1359        struct inet_request_sock *ireq;
1360        struct inet_sock *newinet;
1361        struct tcp_sock *newtp;
1362        struct sock *newsk;
1363#ifdef CONFIG_TCP_MD5SIG
1364        struct tcp_md5sig_key *key;
1365#endif
1366
1367        if (sk_acceptq_is_full(sk))
1368                goto exit_overflow;
1369
1370        if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1371                goto exit;
1372
1373        newsk = tcp_create_openreq_child(sk, req, skb);
1374        if (!newsk)
1375                goto exit;
1376
1377        newsk->sk_gso_type = SKB_GSO_TCPV4;
1378        sk_setup_caps(newsk, dst);
1379
1380        newtp                 = tcp_sk(newsk);
1381        newinet               = inet_sk(newsk);
1382        ireq                  = inet_rsk(req);
1383        newinet->daddr        = ireq->rmt_addr;
1384        newinet->rcv_saddr    = ireq->loc_addr;
1385        newinet->saddr        = ireq->loc_addr;
1386        newinet->opt          = ireq->opt;
1387        ireq->opt             = NULL;
1388        newinet->mc_index     = inet_iif(skb);
1389        newinet->mc_ttl       = ip_hdr(skb)->ttl;
1390        inet_csk(newsk)->icsk_ext_hdr_len = 0;
1391        if (newinet->opt)
1392                inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1393        newinet->id = newtp->write_seq ^ jiffies;
1394
1395        tcp_mtup_init(newsk);
1396        tcp_sync_mss(newsk, dst_mtu(dst));
1397        newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1398        if (tcp_sk(sk)->rx_opt.user_mss &&
1399            tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1400                newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1401
1402        tcp_initialize_rcv_mss(newsk);
1403
1404#ifdef CONFIG_TCP_MD5SIG
1405        /* Copy over the MD5 key from the original socket */
1406        if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) {
1407                /*
1408                 * We're using one, so create a matching key
1409                 * on the newsk structure. If we fail to get
1410                 * memory, then we end up not copying the key
1411                 * across. Shucks.
1412                 */
1413                char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1414                if (newkey != NULL)
1415                        tcp_v4_md5_do_add(newsk, newinet->daddr,
1416                                          newkey, key->keylen);
1417                newsk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1418        }
1419#endif
1420
1421        __inet_hash_nolisten(newsk);
1422        __inet_inherit_port(sk, newsk);
1423
1424        return newsk;
1425
1426exit_overflow:
1427        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1428exit:
1429        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1430        dst_release(dst);
1431        return NULL;
1432}
1433
1434static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1435{
1436        struct tcphdr *th = tcp_hdr(skb);
1437        const struct iphdr *iph = ip_hdr(skb);
1438        struct sock *nsk;
1439        struct request_sock **prev;
1440        /* Find possible connection requests. */
1441        struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1442                                                       iph->saddr, iph->daddr);
1443        if (req)
1444                return tcp_check_req(sk, skb, req, prev);
1445
1446        nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1447                        th->source, iph->daddr, th->dest, inet_iif(skb));
1448
1449        if (nsk) {
1450                if (nsk->sk_state != TCP_TIME_WAIT) {
1451                        bh_lock_sock(nsk);
1452                        return nsk;
1453                }
1454                inet_twsk_put(inet_twsk(nsk));
1455                return NULL;
1456        }
1457
1458#ifdef CONFIG_SYN_COOKIES
1459        if (!th->rst && !th->syn && th->ack)
1460                sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1461#endif
1462        return sk;
1463}
1464
1465static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1466{
1467        const struct iphdr *iph = ip_hdr(skb);
1468
1469        if (skb->ip_summed == CHECKSUM_COMPLETE) {
1470                if (!tcp_v4_check(skb->len, iph->saddr,
1471                                  iph->daddr, skb->csum)) {
1472                        skb->ip_summed = CHECKSUM_UNNECESSARY;
1473                        return 0;
1474                }
1475        }
1476
1477        skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1478                                       skb->len, IPPROTO_TCP, 0);
1479
1480        if (skb->len <= 76) {
1481                return __skb_checksum_complete(skb);
1482        }
1483        return 0;
1484}
1485
1486
1487/* The socket must have it's spinlock held when we get
1488 * here.
1489 *
1490 * We have a potential double-lock case here, so even when
1491 * doing backlog processing we use the BH locking scheme.
1492 * This is because we cannot sleep with the original spinlock
1493 * held.
1494 */
1495int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1496{
1497        struct sock *rsk;
1498#ifdef CONFIG_TCP_MD5SIG
1499        /*
1500         * We really want to reject the packet as early as possible
1501         * if:
1502         *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1503         *  o There is an MD5 option and we're not expecting one
1504         */
1505        if (tcp_v4_inbound_md5_hash(sk, skb))
1506                goto discard;
1507#endif
1508
1509        if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1510                TCP_CHECK_TIMER(sk);
1511                if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1512                        rsk = sk;
1513                        goto reset;
1514                }
1515                TCP_CHECK_TIMER(sk);
1516                return 0;
1517        }
1518
1519        if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1520                goto csum_err;
1521
1522        if (sk->sk_state == TCP_LISTEN) {
1523                struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1524                if (!nsk)
1525                        goto discard;
1526
1527                if (nsk != sk) {
1528                        if (tcp_child_process(sk, nsk, skb)) {
1529                                rsk = nsk;
1530                                goto reset;
1531                        }
1532                        return 0;
1533                }
1534        }
1535
1536        TCP_CHECK_TIMER(sk);
1537        if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1538                rsk = sk;
1539                goto reset;
1540        }
1541        TCP_CHECK_TIMER(sk);
1542        return 0;
1543
1544reset:
1545        tcp_v4_send_reset(rsk, skb);
1546discard:
1547        kfree_skb(skb);
1548        /* Be careful here. If this function gets more complicated and
1549         * gcc suffers from register pressure on the x86, sk (in %ebx)
1550         * might be destroyed here. This current version compiles correctly,
1551         * but you have been warned.
1552         */
1553        return 0;
1554
1555csum_err:
1556        TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1557        goto discard;
1558}
1559
1560/*
1561 *      From tcp_input.c
1562 */
1563
1564int tcp_v4_rcv(struct sk_buff *skb)
1565{
1566        const struct iphdr *iph;
1567        struct tcphdr *th;
1568        struct sock *sk;
1569        int ret;
1570        struct net *net = dev_net(skb->dev);
1571
1572        if (skb->pkt_type != PACKET_HOST)
1573                goto discard_it;
1574
1575        /* Count it even if it's bad */
1576        TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1577
1578        if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1579                goto discard_it;
1580
1581        th = tcp_hdr(skb);
1582
1583        if (th->doff < sizeof(struct tcphdr) / 4)
1584                goto bad_packet;
1585        if (!pskb_may_pull(skb, th->doff * 4))
1586                goto discard_it;
1587
1588        /* An explanation is required here, I think.
1589         * Packet length and doff are validated by header prediction,
1590         * provided case of th->doff==0 is eliminated.
1591         * So, we defer the checks. */
1592        if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1593                goto bad_packet;
1594
1595        th = tcp_hdr(skb);
1596        iph = ip_hdr(skb);
1597        TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1598        TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1599                                    skb->len - th->doff * 4);
1600        TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1601        TCP_SKB_CB(skb)->when    = 0;
1602        TCP_SKB_CB(skb)->flags   = iph->tos;
1603        TCP_SKB_CB(skb)->sacked  = 0;
1604
1605        sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1606        if (!sk)
1607                goto no_tcp_socket;
1608
1609process:
1610        if (sk->sk_state == TCP_TIME_WAIT)
1611                goto do_time_wait;
1612
1613        if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1614                goto discard_and_relse;
1615        nf_reset(skb);
1616
1617        if (sk_filter(sk, skb))
1618                goto discard_and_relse;
1619
1620        skb->dev = NULL;
1621
1622        bh_lock_sock_nested(sk);
1623        ret = 0;
1624        if (!sock_owned_by_user(sk)) {
1625#ifdef CONFIG_NET_DMA
1626                struct tcp_sock *tp = tcp_sk(sk);
1627                if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1628                        tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1629                if (tp->ucopy.dma_chan)
1630                        ret = tcp_v4_do_rcv(sk, skb);
1631                else
1632#endif
1633                {
1634                        if (!tcp_prequeue(sk, skb))
1635                                ret = tcp_v4_do_rcv(sk, skb);
1636                }
1637        } else
1638                sk_add_backlog(sk, skb);
1639        bh_unlock_sock(sk);
1640
1641        sock_put(sk);
1642
1643        return ret;
1644
1645no_tcp_socket:
1646        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1647                goto discard_it;
1648
1649        if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1650bad_packet:
1651                TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1652        } else {
1653                tcp_v4_send_reset(NULL, skb);
1654        }
1655
1656discard_it:
1657        /* Discard frame. */
1658        kfree_skb(skb);
1659        return 0;
1660
1661discard_and_relse:
1662        sock_put(sk);
1663        goto discard_it;
1664
1665do_time_wait:
1666        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1667                inet_twsk_put(inet_twsk(sk));
1668                goto discard_it;
1669        }
1670
1671        if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1672                TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1673                inet_twsk_put(inet_twsk(sk));
1674                goto discard_it;
1675        }
1676        switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1677        case TCP_TW_SYN: {
1678                struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1679                                                        &tcp_hashinfo,
1680                                                        iph->daddr, th->dest,
1681                                                        inet_iif(skb));
1682                if (sk2) {
1683                        inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1684                        inet_twsk_put(inet_twsk(sk));
1685                        sk = sk2;
1686                        goto process;
1687                }
1688                /* Fall through to ACK */
1689        }
1690        case TCP_TW_ACK:
1691                tcp_v4_timewait_ack(sk, skb);
1692                break;
1693        case TCP_TW_RST:
1694                goto no_tcp_socket;
1695        case TCP_TW_SUCCESS:;
1696        }
1697        goto discard_it;
1698}
1699
1700/* VJ's idea. Save last timestamp seen from this destination
1701 * and hold it at least for normal timewait interval to use for duplicate
1702 * segment detection in subsequent connections, before they enter synchronized
1703 * state.
1704 */
1705
1706int tcp_v4_remember_stamp(struct sock *sk)
1707{
1708        struct inet_sock *inet = inet_sk(sk);
1709        struct tcp_sock *tp = tcp_sk(sk);
1710        struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1711        struct inet_peer *peer = NULL;
1712        int release_it = 0;
1713
1714        if (!rt || rt->rt_dst != inet->daddr) {
1715                peer = inet_getpeer(inet->daddr, 1);
1716                release_it = 1;
1717        } else {
1718                if (!rt->peer)
1719                        rt_bind_peer(rt, 1);
1720                peer = rt->peer;
1721        }
1722
1723        if (peer) {
1724                if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1725                    (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
1726                     peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
1727                        peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
1728                        peer->tcp_ts = tp->rx_opt.ts_recent;
1729                }
1730                if (release_it)
1731                        inet_putpeer(peer);
1732                return 1;
1733        }
1734
1735        return 0;
1736}
1737
1738int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1739{
1740        struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
1741
1742        if (peer) {
1743                const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1744
1745                if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
1746                    (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
1747                     peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
1748                        peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
1749                        peer->tcp_ts       = tcptw->tw_ts_recent;
1750                }
1751                inet_putpeer(peer);
1752                return 1;
1753        }
1754
1755        return 0;
1756}
1757
1758const struct inet_connection_sock_af_ops ipv4_specific = {
1759        .queue_xmit        = ip_queue_xmit,
1760        .send_check        = tcp_v4_send_check,
1761        .rebuild_header    = inet_sk_rebuild_header,
1762        .conn_request      = tcp_v4_conn_request,
1763        .syn_recv_sock     = tcp_v4_syn_recv_sock,
1764        .remember_stamp    = tcp_v4_remember_stamp,
1765        .net_header_len    = sizeof(struct iphdr),
1766        .setsockopt        = ip_setsockopt,
1767        .getsockopt        = ip_getsockopt,
1768        .addr2sockaddr     = inet_csk_addr2sockaddr,
1769        .sockaddr_len      = sizeof(struct sockaddr_in),
1770        .bind_conflict     = inet_csk_bind_conflict,
1771#ifdef CONFIG_COMPAT
1772        .compat_setsockopt = compat_ip_setsockopt,
1773        .compat_getsockopt = compat_ip_getsockopt,
1774#endif
1775};
1776
1777#ifdef CONFIG_TCP_MD5SIG
1778static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1779        .md5_lookup             = tcp_v4_md5_lookup,
1780        .calc_md5_hash          = tcp_v4_md5_hash_skb,
1781        .md5_add                = tcp_v4_md5_add_func,
1782        .md5_parse              = tcp_v4_parse_md5_keys,
1783};
1784#endif
1785
1786/* NOTE: A lot of things set to zero explicitly by call to
1787 *       sk_alloc() so need not be done here.
1788 */
1789static int tcp_v4_init_sock(struct sock *sk)
1790{
1791        struct inet_connection_sock *icsk = inet_csk(sk);
1792        struct tcp_sock *tp = tcp_sk(sk);
1793
1794        skb_queue_head_init(&tp->out_of_order_queue);
1795        tcp_init_xmit_timers(sk);
1796        tcp_prequeue_init(tp);
1797
1798        icsk->icsk_rto = TCP_TIMEOUT_INIT;
1799        tp->mdev = TCP_TIMEOUT_INIT;
1800
1801        /* So many TCP implementations out there (incorrectly) count the
1802         * initial SYN frame in their delayed-ACK and congestion control
1803         * algorithms that we must have the following bandaid to talk
1804         * efficiently to them.  -DaveM
1805         */
1806        tp->snd_cwnd = 2;
1807
1808        /* See draft-stevens-tcpca-spec-01 for discussion of the
1809         * initialization of these values.
1810         */
1811        tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1812        tp->snd_cwnd_clamp = ~0;
1813        tp->mss_cache = 536;
1814
1815        tp->reordering = sysctl_tcp_reordering;
1816        icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1817
1818        sk->sk_state = TCP_CLOSE;
1819
1820        sk->sk_write_space = sk_stream_write_space;
1821        sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1822
1823        icsk->icsk_af_ops = &ipv4_specific;
1824        icsk->icsk_sync_mss = tcp_sync_mss;
1825#ifdef CONFIG_TCP_MD5SIG
1826        tp->af_specific = &tcp_sock_ipv4_specific;
1827#endif
1828
1829        sk->sk_sndbuf = sysctl_tcp_wmem[1];
1830        sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1831
1832        local_bh_disable();
1833        percpu_counter_inc(&tcp_sockets_allocated);
1834        local_bh_enable();
1835
1836        return 0;
1837}
1838
1839void tcp_v4_destroy_sock(struct sock *sk)
1840{
1841        struct tcp_sock *tp = tcp_sk(sk);
1842
1843        tcp_clear_xmit_timers(sk);
1844
1845        tcp_cleanup_congestion_control(sk);
1846
1847        /* Cleanup up the write buffer. */
1848        tcp_write_queue_purge(sk);
1849
1850        /* Cleans up our, hopefully empty, out_of_order_queue. */
1851        __skb_queue_purge(&tp->out_of_order_queue);
1852
1853#ifdef CONFIG_TCP_MD5SIG
1854        /* Clean up the MD5 key list, if any */
1855        if (tp->md5sig_info) {
1856                tcp_v4_clear_md5_list(sk);
1857                kfree(tp->md5sig_info);
1858                tp->md5sig_info = NULL;
1859        }
1860#endif
1861
1862#ifdef CONFIG_NET_DMA
1863        /* Cleans up our sk_async_wait_queue */
1864        __skb_queue_purge(&sk->sk_async_wait_queue);
1865#endif
1866
1867        /* Clean prequeue, it must be empty really */
1868        __skb_queue_purge(&tp->ucopy.prequeue);
1869
1870        /* Clean up a referenced TCP bind bucket. */
1871        if (inet_csk(sk)->icsk_bind_hash)
1872                inet_put_port(sk);
1873
1874        /*
1875         * If sendmsg cached page exists, toss it.
1876         */
1877        if (sk->sk_sndmsg_page) {
1878                __free_page(sk->sk_sndmsg_page);
1879                sk->sk_sndmsg_page = NULL;
1880        }
1881
1882        percpu_counter_dec(&tcp_sockets_allocated);
1883}
1884
1885EXPORT_SYMBOL(tcp_v4_destroy_sock);
1886
1887#ifdef CONFIG_PROC_FS
1888/* Proc filesystem TCP sock list dumping. */
1889
1890static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1891{
1892        return hlist_nulls_empty(head) ? NULL :
1893                list_entry(head->first, struct inet_timewait_sock, tw_node);
1894}
1895
1896static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1897{
1898        return !is_a_nulls(tw->tw_node.next) ?
1899                hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1900}
1901
1902static void *listening_get_next(struct seq_file *seq, void *cur)
1903{
1904        struct inet_connection_sock *icsk;
1905        struct hlist_nulls_node *node;
1906        struct sock *sk = cur;
1907        struct inet_listen_hashbucket *ilb;
1908        struct tcp_iter_state *st = seq->private;
1909        struct net *net = seq_file_net(seq);
1910
1911        if (!sk) {
1912                st->bucket = 0;
1913                ilb = &tcp_hashinfo.listening_hash[0];
1914                spin_lock_bh(&ilb->lock);
1915                sk = sk_nulls_head(&ilb->head);
1916                goto get_sk;
1917        }
1918        ilb = &tcp_hashinfo.listening_hash[st->bucket];
1919        ++st->num;
1920
1921        if (st->state == TCP_SEQ_STATE_OPENREQ) {
1922                struct request_sock *req = cur;
1923
1924                icsk = inet_csk(st->syn_wait_sk);
1925                req = req->dl_next;
1926                while (1) {
1927                        while (req) {
1928                                if (req->rsk_ops->family == st->family) {
1929                                        cur = req;
1930                                        goto out;
1931                                }
1932                                req = req->dl_next;
1933                        }
1934                        if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1935                                break;
1936get_req:
1937                        req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1938                }
1939                sk        = sk_next(st->syn_wait_sk);
1940                st->state = TCP_SEQ_STATE_LISTENING;
1941                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1942        } else {
1943                icsk = inet_csk(sk);
1944                read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1945                if (reqsk_queue_len(&icsk->icsk_accept_queue))
1946                        goto start_req;
1947                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1948                sk = sk_next(sk);
1949        }
1950get_sk:
1951        sk_nulls_for_each_from(sk, node) {
1952                if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
1953                        cur = sk;
1954                        goto out;
1955                }
1956                icsk = inet_csk(sk);
1957                read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1958                if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1959start_req:
1960                        st->uid         = sock_i_uid(sk);
1961                        st->syn_wait_sk = sk;
1962                        st->state       = TCP_SEQ_STATE_OPENREQ;
1963                        st->sbucket     = 0;
1964                        goto get_req;
1965                }
1966                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1967        }
1968        spin_unlock_bh(&ilb->lock);
1969        if (++st->bucket < INET_LHTABLE_SIZE) {
1970                ilb = &tcp_hashinfo.listening_hash[st->bucket];
1971                spin_lock_bh(&ilb->lock);
1972                sk = sk_nulls_head(&ilb->head);
1973                goto get_sk;
1974        }
1975        cur = NULL;
1976out:
1977        return cur;
1978}
1979
1980static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1981{
1982        void *rc = listening_get_next(seq, NULL);
1983
1984        while (rc && *pos) {
1985                rc = listening_get_next(seq, rc);
1986                --*pos;
1987        }
1988        return rc;
1989}
1990
1991static inline int empty_bucket(struct tcp_iter_state *st)
1992{
1993        return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
1994                hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
1995}
1996
1997static void *established_get_first(struct seq_file *seq)
1998{
1999        struct tcp_iter_state *st = seq->private;
2000        struct net *net = seq_file_net(seq);
2001        void *rc = NULL;
2002
2003        for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
2004                struct sock *sk;
2005                struct hlist_nulls_node *node;
2006                struct inet_timewait_sock *tw;
2007                spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2008
2009                /* Lockless fast path for the common case of empty buckets */
2010                if (empty_bucket(st))
2011                        continue;
2012
2013                spin_lock_bh(lock);
2014                sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2015                        if (sk->sk_family != st->family ||
2016                            !net_eq(sock_net(sk), net)) {
2017                                continue;
2018                        }
2019                        rc = sk;
2020                        goto out;
2021                }
2022                st->state = TCP_SEQ_STATE_TIME_WAIT;
2023                inet_twsk_for_each(tw, node,
2024                                   &tcp_hashinfo.ehash[st->bucket].twchain) {
2025                        if (tw->tw_family != st->family ||
2026                            !net_eq(twsk_net(tw), net)) {
2027                                continue;
2028                        }
2029                        rc = tw;
2030                        goto out;
2031                }
2032                spin_unlock_bh(lock);
2033                st->state = TCP_SEQ_STATE_ESTABLISHED;
2034        }
2035out:
2036        return rc;
2037}
2038
2039static void *established_get_next(struct seq_file *seq, void *cur)
2040{
2041        struct sock *sk = cur;
2042        struct inet_timewait_sock *tw;
2043        struct hlist_nulls_node *node;
2044        struct tcp_iter_state *st = seq->private;
2045        struct net *net = seq_file_net(seq);
2046
2047        ++st->num;
2048
2049        if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2050                tw = cur;
2051                tw = tw_next(tw);
2052get_tw:
2053                while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2054                        tw = tw_next(tw);
2055                }
2056                if (tw) {
2057                        cur = tw;
2058                        goto out;
2059                }
2060                spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2061                st->state = TCP_SEQ_STATE_ESTABLISHED;
2062
2063                /* Look for next non empty bucket */
2064                while (++st->bucket < tcp_hashinfo.ehash_size &&
2065                                empty_bucket(st))
2066                        ;
2067                if (st->bucket >= tcp_hashinfo.ehash_size)
2068                        return NULL;
2069
2070                spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2071                sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2072        } else
2073                sk = sk_nulls_next(sk);
2074
2075        sk_nulls_for_each_from(sk, node) {
2076                if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2077                        goto found;
2078        }
2079
2080        st->state = TCP_SEQ_STATE_TIME_WAIT;
2081        tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2082        goto get_tw;
2083found:
2084        cur = sk;
2085out:
2086        return cur;
2087}
2088
2089static void *established_get_idx(struct seq_file *seq, loff_t pos)
2090{
2091        void *rc = established_get_first(seq);
2092
2093        while (rc && pos) {
2094                rc = established_get_next(seq, rc);
2095                --pos;
2096        }
2097        return rc;
2098}
2099
2100static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2101{
2102        void *rc;
2103        struct tcp_iter_state *st = seq->private;
2104
2105        st->state = TCP_SEQ_STATE_LISTENING;
2106        rc        = listening_get_idx(seq, &pos);
2107
2108        if (!rc) {
2109                st->state = TCP_SEQ_STATE_ESTABLISHED;
2110                rc        = established_get_idx(seq, pos);
2111        }
2112
2113        return rc;
2114}
2115
2116static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2117{
2118        struct tcp_iter_state *st = seq->private;
2119        st->state = TCP_SEQ_STATE_LISTENING;
2120        st->num = 0;
2121        return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2122}
2123
2124static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2125{
2126        void *rc = NULL;
2127        struct tcp_iter_state *st;
2128
2129        if (v == SEQ_START_TOKEN) {
2130                rc = tcp_get_idx(seq, 0);
2131                goto out;
2132        }
2133        st = seq->private;
2134
2135        switch (st->state) {
2136        case TCP_SEQ_STATE_OPENREQ:
2137        case TCP_SEQ_STATE_LISTENING:
2138                rc = listening_get_next(seq, v);
2139                if (!rc) {
2140                        st->state = TCP_SEQ_STATE_ESTABLISHED;
2141                        rc        = established_get_first(seq);
2142                }
2143                break;
2144        case TCP_SEQ_STATE_ESTABLISHED:
2145        case TCP_SEQ_STATE_TIME_WAIT:
2146                rc = established_get_next(seq, v);
2147                break;
2148        }
2149out:
2150        ++*pos;
2151        return rc;
2152}
2153
2154static void tcp_seq_stop(struct seq_file *seq, void *v)
2155{
2156        struct tcp_iter_state *st = seq->private;
2157
2158        switch (st->state) {
2159        case TCP_SEQ_STATE_OPENREQ:
2160                if (v) {
2161                        struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2162                        read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2163                }
2164        case TCP_SEQ_STATE_LISTENING:
2165                if (v != SEQ_START_TOKEN)
2166                        spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2167                break;
2168        case TCP_SEQ_STATE_TIME_WAIT:
2169        case TCP_SEQ_STATE_ESTABLISHED:
2170                if (v)
2171                        spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2172                break;
2173        }
2174}
2175
2176static int tcp_seq_open(struct inode *inode, struct file *file)
2177{
2178        struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2179        struct tcp_iter_state *s;
2180        int err;
2181
2182        err = seq_open_net(inode, file, &afinfo->seq_ops,
2183                          sizeof(struct tcp_iter_state));
2184        if (err < 0)
2185                return err;
2186
2187        s = ((struct seq_file *)file->private_data)->private;
2188        s->family               = afinfo->family;
2189        return 0;
2190}
2191
2192int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2193{
2194        int rc = 0;
2195        struct proc_dir_entry *p;
2196
2197        afinfo->seq_fops.open           = tcp_seq_open;
2198        afinfo->seq_fops.read           = seq_read;
2199        afinfo->seq_fops.llseek         = seq_lseek;
2200        afinfo->seq_fops.release        = seq_release_net;
2201
2202        afinfo->seq_ops.start           = tcp_seq_start;
2203        afinfo->seq_ops.next            = tcp_seq_next;
2204        afinfo->seq_ops.stop            = tcp_seq_stop;
2205
2206        p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2207                             &afinfo->seq_fops, afinfo);
2208        if (!p)
2209                rc = -ENOMEM;
2210        return rc;
2211}
2212
2213void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2214{
2215        proc_net_remove(net, afinfo->name);
2216}
2217
2218static void get_openreq4(struct sock *sk, struct request_sock *req,
2219                         struct seq_file *f, int i, int uid, int *len)
2220{
2221        const struct inet_request_sock *ireq = inet_rsk(req);
2222        int ttd = req->expires - jiffies;
2223
2224        seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2225                " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n",
2226                i,
2227                ireq->loc_addr,
2228                ntohs(inet_sk(sk)->sport),
2229                ireq->rmt_addr,
2230                ntohs(ireq->rmt_port),
2231                TCP_SYN_RECV,
2232                0, 0, /* could print option size, but that is af dependent. */
2233                1,    /* timers active (only the expire timer) */
2234                jiffies_to_clock_t(ttd),
2235                req->retrans,
2236                uid,
2237                0,  /* non standard timer */
2238                0, /* open_requests have no inode */
2239                atomic_read(&sk->sk_refcnt),
2240                req,
2241                len);
2242}
2243
2244static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2245{
2246        int timer_active;
2247        unsigned long timer_expires;
2248        struct tcp_sock *tp = tcp_sk(sk);
2249        const struct inet_connection_sock *icsk = inet_csk(sk);
2250        struct inet_sock *inet = inet_sk(sk);
2251        __be32 dest = inet->daddr;
2252        __be32 src = inet->rcv_saddr;
2253        __u16 destp = ntohs(inet->dport);
2254        __u16 srcp = ntohs(inet->sport);
2255
2256        if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2257                timer_active    = 1;
2258                timer_expires   = icsk->icsk_timeout;
2259        } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2260                timer_active    = 4;
2261                timer_expires   = icsk->icsk_timeout;
2262        } else if (timer_pending(&sk->sk_timer)) {
2263                timer_active    = 2;
2264                timer_expires   = sk->sk_timer.expires;
2265        } else {
2266                timer_active    = 0;
2267                timer_expires = jiffies;
2268        }
2269
2270        seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2271                        "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",
2272                i, src, srcp, dest, destp, sk->sk_state,
2273                tp->write_seq - tp->snd_una,
2274                sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
2275                                             (tp->rcv_nxt - tp->copied_seq),
2276                timer_active,
2277                jiffies_to_clock_t(timer_expires - jiffies),
2278                icsk->icsk_retransmits,
2279                sock_i_uid(sk),
2280                icsk->icsk_probes_out,
2281                sock_i_ino(sk),
2282                atomic_read(&sk->sk_refcnt), sk,
2283                jiffies_to_clock_t(icsk->icsk_rto),
2284                jiffies_to_clock_t(icsk->icsk_ack.ato),
2285                (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2286                tp->snd_cwnd,
2287                tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
2288                len);
2289}
2290
2291static void get_timewait4_sock(struct inet_timewait_sock *tw,
2292                               struct seq_file *f, int i, int *len)
2293{
2294        __be32 dest, src;
2295        __u16 destp, srcp;
2296        int ttd = tw->tw_ttd - jiffies;
2297
2298        if (ttd < 0)
2299                ttd = 0;
2300
2301        dest  = tw->tw_daddr;
2302        src   = tw->tw_rcv_saddr;
2303        destp = ntohs(tw->tw_dport);
2304        srcp  = ntohs(tw->tw_sport);
2305
2306        seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2307                " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n",
2308                i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2309                3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2310                atomic_read(&tw->tw_refcnt), tw, len);
2311}
2312
2313#define TMPSZ 150
2314
2315static int tcp4_seq_show(struct seq_file *seq, void *v)
2316{
2317        struct tcp_iter_state *st;
2318        int len;
2319
2320        if (v == SEQ_START_TOKEN) {
2321                seq_printf(seq, "%-*s\n", TMPSZ - 1,
2322                           "  sl  local_address rem_address   st tx_queue "
2323                           "rx_queue tr tm->when retrnsmt   uid  timeout "
2324                           "inode");
2325                goto out;
2326        }
2327        st = seq->private;
2328
2329        switch (st->state) {
2330        case TCP_SEQ_STATE_LISTENING:
2331        case TCP_SEQ_STATE_ESTABLISHED:
2332                get_tcp4_sock(v, seq, st->num, &len);
2333                break;
2334        case TCP_SEQ_STATE_OPENREQ:
2335                get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2336                break;
2337        case TCP_SEQ_STATE_TIME_WAIT:
2338                get_timewait4_sock(v, seq, st->num, &len);
2339                break;
2340        }
2341        seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2342out:
2343        return 0;
2344}
2345
2346static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2347        .name           = "tcp",
2348        .family         = AF_INET,
2349        .seq_fops       = {
2350                .owner          = THIS_MODULE,
2351        },
2352        .seq_ops        = {
2353                .show           = tcp4_seq_show,
2354        },
2355};
2356
2357static int tcp4_proc_init_net(struct net *net)
2358{
2359        return tcp_proc_register(net, &tcp4_seq_afinfo);
2360}
2361
2362static void tcp4_proc_exit_net(struct net *net)
2363{
2364        tcp_proc_unregister(net, &tcp4_seq_afinfo);
2365}
2366
2367static struct pernet_operations tcp4_net_ops = {
2368        .init = tcp4_proc_init_net,
2369        .exit = tcp4_proc_exit_net,
2370};
2371
2372int __init tcp4_proc_init(void)
2373{
2374        return register_pernet_subsys(&tcp4_net_ops);
2375}
2376
2377void tcp4_proc_exit(void)
2378{
2379        unregister_pernet_subsys(&tcp4_net_ops);
2380}
2381#endif /* CONFIG_PROC_FS */
2382
2383struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2384{
2385        struct iphdr *iph = skb_gro_network_header(skb);
2386
2387        switch (skb->ip_summed) {
2388        case CHECKSUM_COMPLETE:
2389                if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2390                                  skb->csum)) {
2391                        skb->ip_summed = CHECKSUM_UNNECESSARY;
2392                        break;
2393                }
2394
2395                /* fall through */
2396        case CHECKSUM_NONE:
2397                NAPI_GRO_CB(skb)->flush = 1;
2398                return NULL;
2399        }
2400
2401        return tcp_gro_receive(head, skb);
2402}
2403EXPORT_SYMBOL(tcp4_gro_receive);
2404
2405int tcp4_gro_complete(struct sk_buff *skb)
2406{
2407        struct iphdr *iph = ip_hdr(skb);
2408        struct tcphdr *th = tcp_hdr(skb);
2409
2410        th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2411                                  iph->saddr, iph->daddr, 0);
2412        skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2413
2414        return tcp_gro_complete(skb);
2415}
2416EXPORT_SYMBOL(tcp4_gro_complete);
2417
2418struct proto tcp_prot = {
2419        .name                   = "TCP",
2420        .owner                  = THIS_MODULE,
2421        .close                  = tcp_close,
2422        .connect                = tcp_v4_connect,
2423        .disconnect             = tcp_disconnect,
2424        .accept                 = inet_csk_accept,
2425        .ioctl                  = tcp_ioctl,
2426        .init                   = tcp_v4_init_sock,
2427        .destroy                = tcp_v4_destroy_sock,
2428        .shutdown               = tcp_shutdown,
2429        .setsockopt             = tcp_setsockopt,
2430        .getsockopt             = tcp_getsockopt,
2431        .recvmsg                = tcp_recvmsg,
2432        .backlog_rcv            = tcp_v4_do_rcv,
2433        .hash                   = inet_hash,
2434        .unhash                 = inet_unhash,
2435        .get_port               = inet_csk_get_port,
2436        .enter_memory_pressure  = tcp_enter_memory_pressure,
2437        .sockets_allocated      = &tcp_sockets_allocated,
2438        .orphan_count           = &tcp_orphan_count,
2439        .memory_allocated       = &tcp_memory_allocated,
2440        .memory_pressure        = &tcp_memory_pressure,
2441        .sysctl_mem             = sysctl_tcp_mem,
2442        .sysctl_wmem            = sysctl_tcp_wmem,
2443        .sysctl_rmem            = sysctl_tcp_rmem,
2444        .max_header             = MAX_TCP_HEADER,
2445        .obj_size               = sizeof(struct tcp_sock),
2446        .slab_flags             = SLAB_DESTROY_BY_RCU,
2447        .twsk_prot              = &tcp_timewait_sock_ops,
2448        .rsk_prot               = &tcp_request_sock_ops,
2449        .h.hashinfo             = &tcp_hashinfo,
2450#ifdef CONFIG_COMPAT
2451        .compat_setsockopt      = compat_tcp_setsockopt,
2452        .compat_getsockopt      = compat_tcp_getsockopt,
2453#endif
2454};
2455
2456
2457static int __net_init tcp_sk_init(struct net *net)
2458{
2459        return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2460                                    PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2461}
2462
2463static void __net_exit tcp_sk_exit(struct net *net)
2464{
2465        inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2466        inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET);
2467}
2468
2469static struct pernet_operations __net_initdata tcp_sk_ops = {
2470       .init = tcp_sk_init,
2471       .exit = tcp_sk_exit,
2472};
2473
2474void __init tcp_v4_init(void)
2475{
2476        inet_hashinfo_init(&tcp_hashinfo);
2477        if (register_pernet_subsys(&tcp_sk_ops))
2478                panic("Failed to create the TCP control socket.\n");
2479}
2480
2481EXPORT_SYMBOL(ipv4_specific);
2482EXPORT_SYMBOL(tcp_hashinfo);
2483EXPORT_SYMBOL(tcp_prot);
2484EXPORT_SYMBOL(tcp_v4_conn_request);
2485EXPORT_SYMBOL(tcp_v4_connect);
2486EXPORT_SYMBOL(tcp_v4_do_rcv);
2487EXPORT_SYMBOL(tcp_v4_remember_stamp);
2488EXPORT_SYMBOL(tcp_v4_send_check);
2489EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2490
2491#ifdef CONFIG_PROC_FS
2492EXPORT_SYMBOL(tcp_proc_register);
2493EXPORT_SYMBOL(tcp_proc_unregister);
2494#endif
2495EXPORT_SYMBOL(sysctl_tcp_low_latency);
2496
2497