linux/net/ipv4/tcp_ipv4.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              Implementation of the Transmission Control Protocol(TCP).
   7 *
   8 *              IPv4 specific functions
   9 *
  10 *
  11 *              code split from:
  12 *              linux/ipv4/tcp.c
  13 *              linux/ipv4/tcp_input.c
  14 *              linux/ipv4/tcp_output.c
  15 *
  16 *              See tcp.c for author information
  17 *
  18 *      This program is free software; you can redistribute it and/or
  19 *      modify it under the terms of the GNU General Public License
  20 *      as published by the Free Software Foundation; either version
  21 *      2 of the License, or (at your option) any later version.
  22 */
  23
  24/*
  25 * Changes:
  26 *              David S. Miller :       New socket lookup architecture.
  27 *                                      This code is dedicated to John Dyson.
  28 *              David S. Miller :       Change semantics of established hash,
  29 *                                      half is devoted to TIME_WAIT sockets
  30 *                                      and the rest go in the other half.
  31 *              Andi Kleen :            Add support for syncookies and fixed
  32 *                                      some bugs: ip options weren't passed to
  33 *                                      the TCP layer, missed a check for an
  34 *                                      ACK bit.
  35 *              Andi Kleen :            Implemented fast path mtu discovery.
  36 *                                      Fixed many serious bugs in the
  37 *                                      request_sock handling and moved
  38 *                                      most of it into the af independent code.
  39 *                                      Added tail drop and some other bugfixes.
  40 *                                      Added new listen semantics.
  41 *              Mike McLagan    :       Routing by source
  42 *      Juan Jose Ciarlante:            ip_dynaddr bits
  43 *              Andi Kleen:             various fixes.
  44 *      Vitaly E. Lavrov        :       Transparent proxy revived after year
  45 *                                      coma.
  46 *      Andi Kleen              :       Fix new listen.
  47 *      Andi Kleen              :       Fix accept error reporting.
  48 *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  49 *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  50 *                                      a single port at the same time.
  51 */
  52
  53
  54#include <linux/bottom_half.h>
  55#include <linux/types.h>
  56#include <linux/fcntl.h>
  57#include <linux/module.h>
  58#include <linux/random.h>
  59#include <linux/cache.h>
  60#include <linux/jhash.h>
  61#include <linux/init.h>
  62#include <linux/times.h>
  63#include <linux/slab.h>
  64
  65#include <net/net_namespace.h>
  66#include <net/icmp.h>
  67#include <net/inet_hashtables.h>
  68#include <net/tcp.h>
  69#include <net/transp_v6.h>
  70#include <net/ipv6.h>
  71#include <net/inet_common.h>
  72#include <net/timewait_sock.h>
  73#include <net/xfrm.h>
  74#include <net/netdma.h>
  75
  76#include <linux/inet.h>
  77#include <linux/ipv6.h>
  78#include <linux/stddef.h>
  79#include <linux/proc_fs.h>
  80#include <linux/seq_file.h>
  81
  82#include <linux/crypto.h>
  83#include <linux/scatterlist.h>
  84
  85int sysctl_tcp_tw_reuse __read_mostly;
  86int sysctl_tcp_low_latency __read_mostly;
  87EXPORT_SYMBOL(sysctl_tcp_low_latency);
  88
  89
  90#ifdef CONFIG_TCP_MD5SIG
  91static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
  92                                                   __be32 addr);
  93static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
  94                               __be32 daddr, __be32 saddr, struct tcphdr *th);
  95#else
  96static inline
  97struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
  98{
  99        return NULL;
 100}
 101#endif
 102
 103struct inet_hashinfo tcp_hashinfo;
 104EXPORT_SYMBOL(tcp_hashinfo);
 105
 106static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
 107{
 108        return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
 109                                          ip_hdr(skb)->saddr,
 110                                          tcp_hdr(skb)->dest,
 111                                          tcp_hdr(skb)->source);
 112}
 113
 114int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 115{
 116        const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
 117        struct tcp_sock *tp = tcp_sk(sk);
 118
 119        /* With PAWS, it is safe from the viewpoint
 120           of data integrity. Even without PAWS it is safe provided sequence
 121           spaces do not overlap i.e. at data rates <= 80Mbit/sec.
 122
 123           Actually, the idea is close to VJ's one, only timestamp cache is
 124           held not per host, but per port pair and TW bucket is used as state
 125           holder.
 126
 127           If TW bucket has been already destroyed we fall back to VJ's scheme
 128           and use initial timestamp retrieved from peer table.
 129         */
 130        if (tcptw->tw_ts_recent_stamp &&
 131            (twp == NULL || (sysctl_tcp_tw_reuse &&
 132                             get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
 133                tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
 134                if (tp->write_seq == 0)
 135                        tp->write_seq = 1;
 136                tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
 137                tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
 138                sock_hold(sktw);
 139                return 1;
 140        }
 141
 142        return 0;
 143}
 144EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 145
 146/* This will initiate an outgoing connection. */
 147int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 148{
 149        struct inet_sock *inet = inet_sk(sk);
 150        struct tcp_sock *tp = tcp_sk(sk);
 151        struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
 152        struct rtable *rt;
 153        __be32 daddr, nexthop;
 154        int tmp;
 155        int err;
 156
 157        if (addr_len < sizeof(struct sockaddr_in))
 158                return -EINVAL;
 159
 160        if (usin->sin_family != AF_INET)
 161                return -EAFNOSUPPORT;
 162
 163        nexthop = daddr = usin->sin_addr.s_addr;
 164        if (inet->opt && inet->opt->srr) {
 165                if (!daddr)
 166                        return -EINVAL;
 167                nexthop = inet->opt->faddr;
 168        }
 169
 170        tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr,
 171                               RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
 172                               IPPROTO_TCP,
 173                               inet->inet_sport, usin->sin_port, sk, 1);
 174        if (tmp < 0) {
 175                if (tmp == -ENETUNREACH)
 176                        IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
 177                return tmp;
 178        }
 179
 180        if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
 181                ip_rt_put(rt);
 182                return -ENETUNREACH;
 183        }
 184
 185        if (!inet->opt || !inet->opt->srr)
 186                daddr = rt->rt_dst;
 187
 188        if (!inet->inet_saddr)
 189                inet->inet_saddr = rt->rt_src;
 190        inet->inet_rcv_saddr = inet->inet_saddr;
 191
 192        if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
 193                /* Reset inherited state */
 194                tp->rx_opt.ts_recent       = 0;
 195                tp->rx_opt.ts_recent_stamp = 0;
 196                tp->write_seq              = 0;
 197        }
 198
 199        if (tcp_death_row.sysctl_tw_recycle &&
 200            !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
 201                struct inet_peer *peer = rt_get_peer(rt);
 202                /*
 203                 * VJ's idea. We save last timestamp seen from
 204                 * the destination in peer table, when entering state
 205                 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
 206                 * when trying new connection.
 207                 */
 208                if (peer) {
 209                        inet_peer_refcheck(peer);
 210                        if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
 211                                tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
 212                                tp->rx_opt.ts_recent = peer->tcp_ts;
 213                        }
 214                }
 215        }
 216
 217        inet->inet_dport = usin->sin_port;
 218        inet->inet_daddr = daddr;
 219
 220        inet_csk(sk)->icsk_ext_hdr_len = 0;
 221        if (inet->opt)
 222                inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
 223
 224        tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
 225
 226        /* Socket identity is still unknown (sport may be zero).
 227         * However we set state to SYN-SENT and not releasing socket
 228         * lock select source port, enter ourselves into the hash tables and
 229         * complete initialization after this.
 230         */
 231        tcp_set_state(sk, TCP_SYN_SENT);
 232        err = inet_hash_connect(&tcp_death_row, sk);
 233        if (err)
 234                goto failure;
 235
 236        err = ip_route_newports(&rt, IPPROTO_TCP,
 237                                inet->inet_sport, inet->inet_dport, sk);
 238        if (err)
 239                goto failure;
 240
 241        /* OK, now commit destination to socket.  */
 242        sk->sk_gso_type = SKB_GSO_TCPV4;
 243        sk_setup_caps(sk, &rt->dst);
 244
 245        if (!tp->write_seq)
 246                tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
 247                                                           inet->inet_daddr,
 248                                                           inet->inet_sport,
 249                                                           usin->sin_port);
 250
 251        inet->inet_id = tp->write_seq ^ jiffies;
 252
 253        err = tcp_connect(sk);
 254        rt = NULL;
 255        if (err)
 256                goto failure;
 257
 258        return 0;
 259
 260failure:
 261        /*
 262         * This unhashes the socket and releases the local port,
 263         * if necessary.
 264         */
 265        tcp_set_state(sk, TCP_CLOSE);
 266        ip_rt_put(rt);
 267        sk->sk_route_caps = 0;
 268        inet->inet_dport = 0;
 269        return err;
 270}
 271EXPORT_SYMBOL(tcp_v4_connect);
 272
 273/*
 274 * This routine does path mtu discovery as defined in RFC1191.
 275 */
 276static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
 277{
 278        struct dst_entry *dst;
 279        struct inet_sock *inet = inet_sk(sk);
 280
 281        /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
 282         * send out by Linux are always <576bytes so they should go through
 283         * unfragmented).
 284         */
 285        if (sk->sk_state == TCP_LISTEN)
 286                return;
 287
 288        /* We don't check in the destentry if pmtu discovery is forbidden
 289         * on this route. We just assume that no packet_to_big packets
 290         * are send back when pmtu discovery is not active.
 291         * There is a small race when the user changes this flag in the
 292         * route, but I think that's acceptable.
 293         */
 294        if ((dst = __sk_dst_check(sk, 0)) == NULL)
 295                return;
 296
 297        dst->ops->update_pmtu(dst, mtu);
 298
 299        /* Something is about to be wrong... Remember soft error
 300         * for the case, if this connection will not able to recover.
 301         */
 302        if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
 303                sk->sk_err_soft = EMSGSIZE;
 304
 305        mtu = dst_mtu(dst);
 306
 307        if (inet->pmtudisc != IP_PMTUDISC_DONT &&
 308            inet_csk(sk)->icsk_pmtu_cookie > mtu) {
 309                tcp_sync_mss(sk, mtu);
 310
 311                /* Resend the TCP packet because it's
 312                 * clear that the old packet has been
 313                 * dropped. This is the new "fast" path mtu
 314                 * discovery.
 315                 */
 316                tcp_simple_retransmit(sk);
 317        } /* else let the usual retransmit timer handle it */
 318}
 319
 320/*
 321 * This routine is called by the ICMP module when it gets some
 322 * sort of error condition.  If err < 0 then the socket should
 323 * be closed and the error returned to the user.  If err > 0
 324 * it's just the icmp type << 8 | icmp code.  After adjustment
 325 * header points to the first 8 bytes of the tcp header.  We need
 326 * to find the appropriate port.
 327 *
 328 * The locking strategy used here is very "optimistic". When
 329 * someone else accesses the socket the ICMP is just dropped
 330 * and for some paths there is no check at all.
 331 * A more general error queue to queue errors for later handling
 332 * is probably better.
 333 *
 334 */
 335
 336void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 337{
 338        struct iphdr *iph = (struct iphdr *)icmp_skb->data;
 339        struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
 340        struct inet_connection_sock *icsk;
 341        struct tcp_sock *tp;
 342        struct inet_sock *inet;
 343        const int type = icmp_hdr(icmp_skb)->type;
 344        const int code = icmp_hdr(icmp_skb)->code;
 345        struct sock *sk;
 346        struct sk_buff *skb;
 347        __u32 seq;
 348        __u32 remaining;
 349        int err;
 350        struct net *net = dev_net(icmp_skb->dev);
 351
 352        if (icmp_skb->len < (iph->ihl << 2) + 8) {
 353                ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 354                return;
 355        }
 356
 357        sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
 358                        iph->saddr, th->source, inet_iif(icmp_skb));
 359        if (!sk) {
 360                ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 361                return;
 362        }
 363        if (sk->sk_state == TCP_TIME_WAIT) {
 364                inet_twsk_put(inet_twsk(sk));
 365                return;
 366        }
 367
 368        bh_lock_sock(sk);
 369        /* If too many ICMPs get dropped on busy
 370         * servers this needs to be solved differently.
 371         */
 372        if (sock_owned_by_user(sk))
 373                NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
 374
 375        if (sk->sk_state == TCP_CLOSE)
 376                goto out;
 377
 378        if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
 379                NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
 380                goto out;
 381        }
 382
 383        icsk = inet_csk(sk);
 384        tp = tcp_sk(sk);
 385        seq = ntohl(th->seq);
 386        if (sk->sk_state != TCP_LISTEN &&
 387            !between(seq, tp->snd_una, tp->snd_nxt)) {
 388                NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 389                goto out;
 390        }
 391
 392        switch (type) {
 393        case ICMP_SOURCE_QUENCH:
 394                /* Just silently ignore these. */
 395                goto out;
 396        case ICMP_PARAMETERPROB:
 397                err = EPROTO;
 398                break;
 399        case ICMP_DEST_UNREACH:
 400                if (code > NR_ICMP_UNREACH)
 401                        goto out;
 402
 403                if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
 404                        if (!sock_owned_by_user(sk))
 405                                do_pmtu_discovery(sk, iph, info);
 406                        goto out;
 407                }
 408
 409                err = icmp_err_convert[code].errno;
 410                /* check if icmp_skb allows revert of backoff
 411                 * (see draft-zimmermann-tcp-lcd) */
 412                if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
 413                        break;
 414                if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
 415                    !icsk->icsk_backoff)
 416                        break;
 417
 418                if (sock_owned_by_user(sk))
 419                        break;
 420
 421                icsk->icsk_backoff--;
 422                inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
 423                                         icsk->icsk_backoff;
 424                tcp_bound_rto(sk);
 425
 426                skb = tcp_write_queue_head(sk);
 427                BUG_ON(!skb);
 428
 429                remaining = icsk->icsk_rto - min(icsk->icsk_rto,
 430                                tcp_time_stamp - TCP_SKB_CB(skb)->when);
 431
 432                if (remaining) {
 433                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 434                                                  remaining, TCP_RTO_MAX);
 435                } else {
 436                        /* RTO revert clocked out retransmission.
 437                         * Will retransmit now */
 438                        tcp_retransmit_timer(sk);
 439                }
 440
 441                break;
 442        case ICMP_TIME_EXCEEDED:
 443                err = EHOSTUNREACH;
 444                break;
 445        default:
 446                goto out;
 447        }
 448
 449        switch (sk->sk_state) {
 450                struct request_sock *req, **prev;
 451        case TCP_LISTEN:
 452                if (sock_owned_by_user(sk))
 453                        goto out;
 454
 455                req = inet_csk_search_req(sk, &prev, th->dest,
 456                                          iph->daddr, iph->saddr);
 457                if (!req)
 458                        goto out;
 459
 460                /* ICMPs are not backlogged, hence we cannot get
 461                   an established socket here.
 462                 */
 463                WARN_ON(req->sk);
 464
 465                if (seq != tcp_rsk(req)->snt_isn) {
 466                        NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 467                        goto out;
 468                }
 469
 470                /*
 471                 * Still in SYN_RECV, just remove it silently.
 472                 * There is no good way to pass the error to the newly
 473                 * created socket, and POSIX does not want network
 474                 * errors returned from accept().
 475                 */
 476                inet_csk_reqsk_queue_drop(sk, req, prev);
 477                goto out;
 478
 479        case TCP_SYN_SENT:
 480        case TCP_SYN_RECV:  /* Cannot happen.
 481                               It can f.e. if SYNs crossed.
 482                             */
 483                if (!sock_owned_by_user(sk)) {
 484                        sk->sk_err = err;
 485
 486                        sk->sk_error_report(sk);
 487
 488                        tcp_done(sk);
 489                } else {
 490                        sk->sk_err_soft = err;
 491                }
 492                goto out;
 493        }
 494
 495        /* If we've already connected we will keep trying
 496         * until we time out, or the user gives up.
 497         *
 498         * rfc1122 4.2.3.9 allows to consider as hard errors
 499         * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
 500         * but it is obsoleted by pmtu discovery).
 501         *
 502         * Note, that in modern internet, where routing is unreliable
 503         * and in each dark corner broken firewalls sit, sending random
 504         * errors ordered by their masters even this two messages finally lose
 505         * their original sense (even Linux sends invalid PORT_UNREACHs)
 506         *
 507         * Now we are in compliance with RFCs.
 508         *                                                      --ANK (980905)
 509         */
 510
 511        inet = inet_sk(sk);
 512        if (!sock_owned_by_user(sk) && inet->recverr) {
 513                sk->sk_err = err;
 514                sk->sk_error_report(sk);
 515        } else  { /* Only an error on timeout */
 516                sk->sk_err_soft = err;
 517        }
 518
 519out:
 520        bh_unlock_sock(sk);
 521        sock_put(sk);
 522}
 523
 524static void __tcp_v4_send_check(struct sk_buff *skb,
 525                                __be32 saddr, __be32 daddr)
 526{
 527        struct tcphdr *th = tcp_hdr(skb);
 528
 529        if (skb->ip_summed == CHECKSUM_PARTIAL) {
 530                th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
 531                skb->csum_start = skb_transport_header(skb) - skb->head;
 532                skb->csum_offset = offsetof(struct tcphdr, check);
 533        } else {
 534                th->check = tcp_v4_check(skb->len, saddr, daddr,
 535                                         csum_partial(th,
 536                                                      th->doff << 2,
 537                                                      skb->csum));
 538        }
 539}
 540
 541/* This routine computes an IPv4 TCP checksum. */
 542void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
 543{
 544        struct inet_sock *inet = inet_sk(sk);
 545
 546        __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
 547}
 548EXPORT_SYMBOL(tcp_v4_send_check);
 549
 550int tcp_v4_gso_send_check(struct sk_buff *skb)
 551{
 552        const struct iphdr *iph;
 553        struct tcphdr *th;
 554
 555        if (!pskb_may_pull(skb, sizeof(*th)))
 556                return -EINVAL;
 557
 558        iph = ip_hdr(skb);
 559        th = tcp_hdr(skb);
 560
 561        th->check = 0;
 562        skb->ip_summed = CHECKSUM_PARTIAL;
 563        __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
 564        return 0;
 565}
 566
 567/*
 568 *      This routine will send an RST to the other tcp.
 569 *
 570 *      Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
 571 *                    for reset.
 572 *      Answer: if a packet caused RST, it is not for a socket
 573 *              existing in our system, if it is matched to a socket,
 574 *              it is just duplicate segment or bug in other side's TCP.
 575 *              So that we build reply only basing on parameters
 576 *              arrived with segment.
 577 *      Exception: precedence violation. We do not implement it in any case.
 578 */
 579
 580static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 581{
 582        struct tcphdr *th = tcp_hdr(skb);
 583        struct {
 584                struct tcphdr th;
 585#ifdef CONFIG_TCP_MD5SIG
 586                __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
 587#endif
 588        } rep;
 589        struct ip_reply_arg arg;
 590#ifdef CONFIG_TCP_MD5SIG
 591        struct tcp_md5sig_key *key;
 592#endif
 593        struct net *net;
 594
 595        /* Never send a reset in response to a reset. */
 596        if (th->rst)
 597                return;
 598
 599        if (skb_rtable(skb)->rt_type != RTN_LOCAL)
 600                return;
 601
 602        /* Swap the send and the receive. */
 603        memset(&rep, 0, sizeof(rep));
 604        rep.th.dest   = th->source;
 605        rep.th.source = th->dest;
 606        rep.th.doff   = sizeof(struct tcphdr) / 4;
 607        rep.th.rst    = 1;
 608
 609        if (th->ack) {
 610                rep.th.seq = th->ack_seq;
 611        } else {
 612                rep.th.ack = 1;
 613                rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
 614                                       skb->len - (th->doff << 2));
 615        }
 616
 617        memset(&arg, 0, sizeof(arg));
 618        arg.iov[0].iov_base = (unsigned char *)&rep;
 619        arg.iov[0].iov_len  = sizeof(rep.th);
 620
 621#ifdef CONFIG_TCP_MD5SIG
 622        key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
 623        if (key) {
 624                rep.opt[0] = htonl((TCPOPT_NOP << 24) |
 625                                   (TCPOPT_NOP << 16) |
 626                                   (TCPOPT_MD5SIG << 8) |
 627                                   TCPOLEN_MD5SIG);
 628                /* Update length and the length the header thinks exists */
 629                arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 630                rep.th.doff = arg.iov[0].iov_len / 4;
 631
 632                tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
 633                                     key, ip_hdr(skb)->saddr,
 634                                     ip_hdr(skb)->daddr, &rep.th);
 635        }
 636#endif
 637        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 638                                      ip_hdr(skb)->saddr, /* XXX */
 639                                      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 640        arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 641        arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
 642
 643        net = dev_net(skb_dst(skb)->dev);
 644        ip_send_reply(net->ipv4.tcp_sock, skb,
 645                      &arg, arg.iov[0].iov_len);
 646
 647        TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 648        TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
 649}
 650
 651/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
 652   outside socket context is ugly, certainly. What can I do?
 653 */
 654
 655static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 656                            u32 win, u32 ts, int oif,
 657                            struct tcp_md5sig_key *key,
 658                            int reply_flags)
 659{
 660        struct tcphdr *th = tcp_hdr(skb);
 661        struct {
 662                struct tcphdr th;
 663                __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
 664#ifdef CONFIG_TCP_MD5SIG
 665                           + (TCPOLEN_MD5SIG_ALIGNED >> 2)
 666#endif
 667                        ];
 668        } rep;
 669        struct ip_reply_arg arg;
 670        struct net *net = dev_net(skb_dst(skb)->dev);
 671
 672        memset(&rep.th, 0, sizeof(struct tcphdr));
 673        memset(&arg, 0, sizeof(arg));
 674
 675        arg.iov[0].iov_base = (unsigned char *)&rep;
 676        arg.iov[0].iov_len  = sizeof(rep.th);
 677        if (ts) {
 678                rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 679                                   (TCPOPT_TIMESTAMP << 8) |
 680                                   TCPOLEN_TIMESTAMP);
 681                rep.opt[1] = htonl(tcp_time_stamp);
 682                rep.opt[2] = htonl(ts);
 683                arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
 684        }
 685
 686        /* Swap the send and the receive. */
 687        rep.th.dest    = th->source;
 688        rep.th.source  = th->dest;
 689        rep.th.doff    = arg.iov[0].iov_len / 4;
 690        rep.th.seq     = htonl(seq);
 691        rep.th.ack_seq = htonl(ack);
 692        rep.th.ack     = 1;
 693        rep.th.window  = htons(win);
 694
 695#ifdef CONFIG_TCP_MD5SIG
 696        if (key) {
 697                int offset = (ts) ? 3 : 0;
 698
 699                rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
 700                                          (TCPOPT_NOP << 16) |
 701                                          (TCPOPT_MD5SIG << 8) |
 702                                          TCPOLEN_MD5SIG);
 703                arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 704                rep.th.doff = arg.iov[0].iov_len/4;
 705
 706                tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
 707                                    key, ip_hdr(skb)->saddr,
 708                                    ip_hdr(skb)->daddr, &rep.th);
 709        }
 710#endif
 711        arg.flags = reply_flags;
 712        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 713                                      ip_hdr(skb)->saddr, /* XXX */
 714                                      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 715        arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 716        if (oif)
 717                arg.bound_dev_if = oif;
 718
 719        ip_send_reply(net->ipv4.tcp_sock, skb,
 720                      &arg, arg.iov[0].iov_len);
 721
 722        TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 723}
 724
 725static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 726{
 727        struct inet_timewait_sock *tw = inet_twsk(sk);
 728        struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 729
 730        tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 731                        tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
 732                        tcptw->tw_ts_recent,
 733                        tw->tw_bound_dev_if,
 734                        tcp_twsk_md5_key(tcptw),
 735                        tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
 736                        );
 737
 738        inet_twsk_put(tw);
 739}
 740
 741static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 742                                  struct request_sock *req)
 743{
 744        tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
 745                        tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
 746                        req->ts_recent,
 747                        0,
 748                        tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
 749                        inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
 750}
 751
 752/*
 753 *      Send a SYN-ACK after having received a SYN.
 754 *      This still operates on a request_sock only, not on a big
 755 *      socket.
 756 */
 757static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 758                              struct request_sock *req,
 759                              struct request_values *rvp)
 760{
 761        const struct inet_request_sock *ireq = inet_rsk(req);
 762        int err = -1;
 763        struct sk_buff * skb;
 764
 765        /* First, grab a route. */
 766        if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
 767                return -1;
 768
 769        skb = tcp_make_synack(sk, dst, req, rvp);
 770
 771        if (skb) {
 772                __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
 773
 774                err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
 775                                            ireq->rmt_addr,
 776                                            ireq->opt);
 777                err = net_xmit_eval(err);
 778        }
 779
 780        dst_release(dst);
 781        return err;
 782}
 783
 784static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
 785                              struct request_values *rvp)
 786{
 787        TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
 788        return tcp_v4_send_synack(sk, NULL, req, rvp);
 789}
 790
 791/*
 792 *      IPv4 request_sock destructor.
 793 */
 794static void tcp_v4_reqsk_destructor(struct request_sock *req)
 795{
 796        kfree(inet_rsk(req)->opt);
 797}
 798
 799static void syn_flood_warning(const struct sk_buff *skb)
 800{
 801        const char *msg;
 802
 803#ifdef CONFIG_SYN_COOKIES
 804        if (sysctl_tcp_syncookies)
 805                msg = "Sending cookies";
 806        else
 807#endif
 808                msg = "Dropping request";
 809
 810        pr_info("TCP: Possible SYN flooding on port %d. %s.\n",
 811                                ntohs(tcp_hdr(skb)->dest), msg);
 812}
 813
 814/*
 815 * Save and compile IPv4 options into the request_sock if needed.
 816 */
 817static struct ip_options *tcp_v4_save_options(struct sock *sk,
 818                                              struct sk_buff *skb)
 819{
 820        struct ip_options *opt = &(IPCB(skb)->opt);
 821        struct ip_options *dopt = NULL;
 822
 823        if (opt && opt->optlen) {
 824                int opt_size = optlength(opt);
 825                dopt = kmalloc(opt_size, GFP_ATOMIC);
 826                if (dopt) {
 827                        if (ip_options_echo(dopt, skb)) {
 828                                kfree(dopt);
 829                                dopt = NULL;
 830                        }
 831                }
 832        }
 833        return dopt;
 834}
 835
 836#ifdef CONFIG_TCP_MD5SIG
 837/*
 838 * RFC2385 MD5 checksumming requires a mapping of
 839 * IP address->MD5 Key.
 840 * We need to maintain these in the sk structure.
 841 */
 842
 843/* Find the Key structure for an address.  */
 844static struct tcp_md5sig_key *
 845                        tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
 846{
 847        struct tcp_sock *tp = tcp_sk(sk);
 848        int i;
 849
 850        if (!tp->md5sig_info || !tp->md5sig_info->entries4)
 851                return NULL;
 852        for (i = 0; i < tp->md5sig_info->entries4; i++) {
 853                if (tp->md5sig_info->keys4[i].addr == addr)
 854                        return &tp->md5sig_info->keys4[i].base;
 855        }
 856        return NULL;
 857}
 858
 859struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
 860                                         struct sock *addr_sk)
 861{
 862        return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
 863}
 864EXPORT_SYMBOL(tcp_v4_md5_lookup);
 865
 866static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
 867                                                      struct request_sock *req)
 868{
 869        return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
 870}
 871
 872/* This can be called on a newly created socket, from other files */
 873int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
 874                      u8 *newkey, u8 newkeylen)
 875{
 876        /* Add Key to the list */
 877        struct tcp_md5sig_key *key;
 878        struct tcp_sock *tp = tcp_sk(sk);
 879        struct tcp4_md5sig_key *keys;
 880
 881        key = tcp_v4_md5_do_lookup(sk, addr);
 882        if (key) {
 883                /* Pre-existing entry - just update that one. */
 884                kfree(key->key);
 885                key->key = newkey;
 886                key->keylen = newkeylen;
 887        } else {
 888                struct tcp_md5sig_info *md5sig;
 889
 890                if (!tp->md5sig_info) {
 891                        tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
 892                                                  GFP_ATOMIC);
 893                        if (!tp->md5sig_info) {
 894                                kfree(newkey);
 895                                return -ENOMEM;
 896                        }
 897                        sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 898                }
 899                if (tcp_alloc_md5sig_pool(sk) == NULL) {
 900                        kfree(newkey);
 901                        return -ENOMEM;
 902                }
 903                md5sig = tp->md5sig_info;
 904
 905                if (md5sig->alloced4 == md5sig->entries4) {
 906                        keys = kmalloc((sizeof(*keys) *
 907                                        (md5sig->entries4 + 1)), GFP_ATOMIC);
 908                        if (!keys) {
 909                                kfree(newkey);
 910                                tcp_free_md5sig_pool();
 911                                return -ENOMEM;
 912                        }
 913
 914                        if (md5sig->entries4)
 915                                memcpy(keys, md5sig->keys4,
 916                                       sizeof(*keys) * md5sig->entries4);
 917
 918                        /* Free old key list, and reference new one */
 919                        kfree(md5sig->keys4);
 920                        md5sig->keys4 = keys;
 921                        md5sig->alloced4++;
 922                }
 923                md5sig->entries4++;
 924                md5sig->keys4[md5sig->entries4 - 1].addr        = addr;
 925                md5sig->keys4[md5sig->entries4 - 1].base.key    = newkey;
 926                md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
 927        }
 928        return 0;
 929}
 930EXPORT_SYMBOL(tcp_v4_md5_do_add);
 931
 932static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
 933                               u8 *newkey, u8 newkeylen)
 934{
 935        return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,
 936                                 newkey, newkeylen);
 937}
 938
 939int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
 940{
 941        struct tcp_sock *tp = tcp_sk(sk);
 942        int i;
 943
 944        for (i = 0; i < tp->md5sig_info->entries4; i++) {
 945                if (tp->md5sig_info->keys4[i].addr == addr) {
 946                        /* Free the key */
 947                        kfree(tp->md5sig_info->keys4[i].base.key);
 948                        tp->md5sig_info->entries4--;
 949
 950                        if (tp->md5sig_info->entries4 == 0) {
 951                                kfree(tp->md5sig_info->keys4);
 952                                tp->md5sig_info->keys4 = NULL;
 953                                tp->md5sig_info->alloced4 = 0;
 954                        } else if (tp->md5sig_info->entries4 != i) {
 955                                /* Need to do some manipulation */
 956                                memmove(&tp->md5sig_info->keys4[i],
 957                                        &tp->md5sig_info->keys4[i+1],
 958                                        (tp->md5sig_info->entries4 - i) *
 959                                         sizeof(struct tcp4_md5sig_key));
 960                        }
 961                        tcp_free_md5sig_pool();
 962                        return 0;
 963                }
 964        }
 965        return -ENOENT;
 966}
 967EXPORT_SYMBOL(tcp_v4_md5_do_del);
 968
 969static void tcp_v4_clear_md5_list(struct sock *sk)
 970{
 971        struct tcp_sock *tp = tcp_sk(sk);
 972
 973        /* Free each key, then the set of key keys,
 974         * the crypto element, and then decrement our
 975         * hold on the last resort crypto.
 976         */
 977        if (tp->md5sig_info->entries4) {
 978                int i;
 979                for (i = 0; i < tp->md5sig_info->entries4; i++)
 980                        kfree(tp->md5sig_info->keys4[i].base.key);
 981                tp->md5sig_info->entries4 = 0;
 982                tcp_free_md5sig_pool();
 983        }
 984        if (tp->md5sig_info->keys4) {
 985                kfree(tp->md5sig_info->keys4);
 986                tp->md5sig_info->keys4 = NULL;
 987                tp->md5sig_info->alloced4  = 0;
 988        }
 989}
 990
 991static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
 992                                 int optlen)
 993{
 994        struct tcp_md5sig cmd;
 995        struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
 996        u8 *newkey;
 997
 998        if (optlen < sizeof(cmd))
 999                return -EINVAL;
1000
1001        if (copy_from_user(&cmd, optval, sizeof(cmd)))
1002                return -EFAULT;
1003
1004        if (sin->sin_family != AF_INET)
1005                return -EINVAL;
1006
1007        if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
1008                if (!tcp_sk(sk)->md5sig_info)
1009                        return -ENOENT;
1010                return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
1011        }
1012
1013        if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1014                return -EINVAL;
1015
1016        if (!tcp_sk(sk)->md5sig_info) {
1017                struct tcp_sock *tp = tcp_sk(sk);
1018                struct tcp_md5sig_info *p;
1019
1020                p = kzalloc(sizeof(*p), sk->sk_allocation);
1021                if (!p)
1022                        return -EINVAL;
1023
1024                tp->md5sig_info = p;
1025                sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1026        }
1027
1028        newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
1029        if (!newkey)
1030                return -ENOMEM;
1031        return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1032                                 newkey, cmd.tcpm_keylen);
1033}
1034
1035static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1036                                        __be32 daddr, __be32 saddr, int nbytes)
1037{
1038        struct tcp4_pseudohdr *bp;
1039        struct scatterlist sg;
1040
1041        bp = &hp->md5_blk.ip4;
1042
1043        /*
1044         * 1. the TCP pseudo-header (in the order: source IP address,
1045         * destination IP address, zero-padded protocol number, and
1046         * segment length)
1047         */
1048        bp->saddr = saddr;
1049        bp->daddr = daddr;
1050        bp->pad = 0;
1051        bp->protocol = IPPROTO_TCP;
1052        bp->len = cpu_to_be16(nbytes);
1053
1054        sg_init_one(&sg, bp, sizeof(*bp));
1055        return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1056}
1057
1058static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1059                               __be32 daddr, __be32 saddr, struct tcphdr *th)
1060{
1061        struct tcp_md5sig_pool *hp;
1062        struct hash_desc *desc;
1063
1064        hp = tcp_get_md5sig_pool();
1065        if (!hp)
1066                goto clear_hash_noput;
1067        desc = &hp->md5_desc;
1068
1069        if (crypto_hash_init(desc))
1070                goto clear_hash;
1071        if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1072                goto clear_hash;
1073        if (tcp_md5_hash_header(hp, th))
1074                goto clear_hash;
1075        if (tcp_md5_hash_key(hp, key))
1076                goto clear_hash;
1077        if (crypto_hash_final(desc, md5_hash))
1078                goto clear_hash;
1079
1080        tcp_put_md5sig_pool();
1081        return 0;
1082
1083clear_hash:
1084        tcp_put_md5sig_pool();
1085clear_hash_noput:
1086        memset(md5_hash, 0, 16);
1087        return 1;
1088}
1089
1090int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1091                        struct sock *sk, struct request_sock *req,
1092                        struct sk_buff *skb)
1093{
1094        struct tcp_md5sig_pool *hp;
1095        struct hash_desc *desc;
1096        struct tcphdr *th = tcp_hdr(skb);
1097        __be32 saddr, daddr;
1098
1099        if (sk) {
1100                saddr = inet_sk(sk)->inet_saddr;
1101                daddr = inet_sk(sk)->inet_daddr;
1102        } else if (req) {
1103                saddr = inet_rsk(req)->loc_addr;
1104                daddr = inet_rsk(req)->rmt_addr;
1105        } else {
1106                const struct iphdr *iph = ip_hdr(skb);
1107                saddr = iph->saddr;
1108                daddr = iph->daddr;
1109        }
1110
1111        hp = tcp_get_md5sig_pool();
1112        if (!hp)
1113                goto clear_hash_noput;
1114        desc = &hp->md5_desc;
1115
1116        if (crypto_hash_init(desc))
1117                goto clear_hash;
1118
1119        if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1120                goto clear_hash;
1121        if (tcp_md5_hash_header(hp, th))
1122                goto clear_hash;
1123        if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1124                goto clear_hash;
1125        if (tcp_md5_hash_key(hp, key))
1126                goto clear_hash;
1127        if (crypto_hash_final(desc, md5_hash))
1128                goto clear_hash;
1129
1130        tcp_put_md5sig_pool();
1131        return 0;
1132
1133clear_hash:
1134        tcp_put_md5sig_pool();
1135clear_hash_noput:
1136        memset(md5_hash, 0, 16);
1137        return 1;
1138}
1139EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1140
1141static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
1142{
1143        /*
1144         * This gets called for each TCP segment that arrives
1145         * so we want to be efficient.
1146         * We have 3 drop cases:
1147         * o No MD5 hash and one expected.
1148         * o MD5 hash and we're not expecting one.
1149         * o MD5 hash and its wrong.
1150         */
1151        __u8 *hash_location = NULL;
1152        struct tcp_md5sig_key *hash_expected;
1153        const struct iphdr *iph = ip_hdr(skb);
1154        struct tcphdr *th = tcp_hdr(skb);
1155        int genhash;
1156        unsigned char newhash[16];
1157
1158        hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1159        hash_location = tcp_parse_md5sig_option(th);
1160
1161        /* We've parsed the options - do we have a hash? */
1162        if (!hash_expected && !hash_location)
1163                return 0;
1164
1165        if (hash_expected && !hash_location) {
1166                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1167                return 1;
1168        }
1169
1170        if (!hash_expected && hash_location) {
1171                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1172                return 1;
1173        }
1174
1175        /* Okay, so this is hash_expected and hash_location -
1176         * so we need to calculate the checksum.
1177         */
1178        genhash = tcp_v4_md5_hash_skb(newhash,
1179                                      hash_expected,
1180                                      NULL, NULL, skb);
1181
1182        if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1183                if (net_ratelimit()) {
1184                        printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1185                               &iph->saddr, ntohs(th->source),
1186                               &iph->daddr, ntohs(th->dest),
1187                               genhash ? " tcp_v4_calc_md5_hash failed" : "");
1188                }
1189                return 1;
1190        }
1191        return 0;
1192}
1193
1194#endif
1195
1196struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1197        .family         =       PF_INET,
1198        .obj_size       =       sizeof(struct tcp_request_sock),
1199        .rtx_syn_ack    =       tcp_v4_rtx_synack,
1200        .send_ack       =       tcp_v4_reqsk_send_ack,
1201        .destructor     =       tcp_v4_reqsk_destructor,
1202        .send_reset     =       tcp_v4_send_reset,
1203        .syn_ack_timeout =      tcp_syn_ack_timeout,
1204};
1205
1206#ifdef CONFIG_TCP_MD5SIG
1207static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1208        .md5_lookup     =       tcp_v4_reqsk_md5_lookup,
1209        .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1210};
1211#endif
1212
1213int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1214{
1215        struct tcp_extend_values tmp_ext;
1216        struct tcp_options_received tmp_opt;
1217        u8 *hash_location;
1218        struct request_sock *req;
1219        struct inet_request_sock *ireq;
1220        struct tcp_sock *tp = tcp_sk(sk);
1221        struct dst_entry *dst = NULL;
1222        __be32 saddr = ip_hdr(skb)->saddr;
1223        __be32 daddr = ip_hdr(skb)->daddr;
1224        __u32 isn = TCP_SKB_CB(skb)->when;
1225#ifdef CONFIG_SYN_COOKIES
1226        int want_cookie = 0;
1227#else
1228#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1229#endif
1230
1231        /* Never answer to SYNs send to broadcast or multicast */
1232        if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1233                goto drop;
1234
1235        /* TW buckets are converted to open requests without
1236         * limitations, they conserve resources and peer is
1237         * evidently real one.
1238         */
1239        if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1240                if (net_ratelimit())
1241                        syn_flood_warning(skb);
1242#ifdef CONFIG_SYN_COOKIES
1243                if (sysctl_tcp_syncookies) {
1244                        want_cookie = 1;
1245                } else
1246#endif
1247                goto drop;
1248        }
1249
1250        /* Accept backlog is full. If we have already queued enough
1251         * of warm entries in syn queue, drop request. It is better than
1252         * clogging syn queue with openreqs with exponentially increasing
1253         * timeout.
1254         */
1255        if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1256                goto drop;
1257
1258        req = inet_reqsk_alloc(&tcp_request_sock_ops);
1259        if (!req)
1260                goto drop;
1261
1262#ifdef CONFIG_TCP_MD5SIG
1263        tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1264#endif
1265
1266        tcp_clear_options(&tmp_opt);
1267        tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1268        tmp_opt.user_mss  = tp->rx_opt.user_mss;
1269        tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
1270
1271        if (tmp_opt.cookie_plus > 0 &&
1272            tmp_opt.saw_tstamp &&
1273            !tp->rx_opt.cookie_out_never &&
1274            (sysctl_tcp_cookie_size > 0 ||
1275             (tp->cookie_values != NULL &&
1276              tp->cookie_values->cookie_desired > 0))) {
1277                u8 *c;
1278                u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1279                int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1280
1281                if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1282                        goto drop_and_release;
1283
1284                /* Secret recipe starts with IP addresses */
1285                *mess++ ^= (__force u32)daddr;
1286                *mess++ ^= (__force u32)saddr;
1287
1288                /* plus variable length Initiator Cookie */
1289                c = (u8 *)mess;
1290                while (l-- > 0)
1291                        *c++ ^= *hash_location++;
1292
1293#ifdef CONFIG_SYN_COOKIES
1294                want_cookie = 0;        /* not our kind of cookie */
1295#endif
1296                tmp_ext.cookie_out_never = 0; /* false */
1297                tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1298        } else if (!tp->rx_opt.cookie_in_always) {
1299                /* redundant indications, but ensure initialization. */
1300                tmp_ext.cookie_out_never = 1; /* true */
1301                tmp_ext.cookie_plus = 0;
1302        } else {
1303                goto drop_and_release;
1304        }
1305        tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1306
1307        if (want_cookie && !tmp_opt.saw_tstamp)
1308                tcp_clear_options(&tmp_opt);
1309
1310        tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1311        tcp_openreq_init(req, &tmp_opt, skb);
1312
1313        ireq = inet_rsk(req);
1314        ireq->loc_addr = daddr;
1315        ireq->rmt_addr = saddr;
1316        ireq->no_srccheck = inet_sk(sk)->transparent;
1317        ireq->opt = tcp_v4_save_options(sk, skb);
1318
1319        if (security_inet_conn_request(sk, skb, req))
1320                goto drop_and_free;
1321
1322        if (!want_cookie || tmp_opt.tstamp_ok)
1323                TCP_ECN_create_request(req, tcp_hdr(skb));
1324
1325        if (want_cookie) {
1326                isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1327                req->cookie_ts = tmp_opt.tstamp_ok;
1328        } else if (!isn) {
1329                struct inet_peer *peer = NULL;
1330
1331                /* VJ's idea. We save last timestamp seen
1332                 * from the destination in peer table, when entering
1333                 * state TIME-WAIT, and check against it before
1334                 * accepting new connection request.
1335                 *
1336                 * If "isn" is not zero, this request hit alive
1337                 * timewait bucket, so that all the necessary checks
1338                 * are made in the function processing timewait state.
1339                 */
1340                if (tmp_opt.saw_tstamp &&
1341                    tcp_death_row.sysctl_tw_recycle &&
1342                    (dst = inet_csk_route_req(sk, req)) != NULL &&
1343                    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1344                    peer->daddr.a4 == saddr) {
1345                        inet_peer_refcheck(peer);
1346                        if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1347                            (s32)(peer->tcp_ts - req->ts_recent) >
1348                                                        TCP_PAWS_WINDOW) {
1349                                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1350                                goto drop_and_release;
1351                        }
1352                }
1353                /* Kill the following clause, if you dislike this way. */
1354                else if (!sysctl_tcp_syncookies &&
1355                         (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1356                          (sysctl_max_syn_backlog >> 2)) &&
1357                         (!peer || !peer->tcp_ts_stamp) &&
1358                         (!dst || !dst_metric(dst, RTAX_RTT))) {
1359                        /* Without syncookies last quarter of
1360                         * backlog is filled with destinations,
1361                         * proven to be alive.
1362                         * It means that we continue to communicate
1363                         * to destinations, already remembered
1364                         * to the moment of synflood.
1365                         */
1366                        LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1367                                       &saddr, ntohs(tcp_hdr(skb)->source));
1368                        goto drop_and_release;
1369                }
1370
1371                isn = tcp_v4_init_sequence(skb);
1372        }
1373        tcp_rsk(req)->snt_isn = isn;
1374
1375        if (tcp_v4_send_synack(sk, dst, req,
1376                               (struct request_values *)&tmp_ext) ||
1377            want_cookie)
1378                goto drop_and_free;
1379
1380        inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1381        return 0;
1382
1383drop_and_release:
1384        dst_release(dst);
1385drop_and_free:
1386        reqsk_free(req);
1387drop:
1388        return 0;
1389}
1390EXPORT_SYMBOL(tcp_v4_conn_request);
1391
1392
1393/*
1394 * The three way handshake has completed - we got a valid synack -
1395 * now create the new socket.
1396 */
1397struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1398                                  struct request_sock *req,
1399                                  struct dst_entry *dst)
1400{
1401        struct inet_request_sock *ireq;
1402        struct inet_sock *newinet;
1403        struct tcp_sock *newtp;
1404        struct sock *newsk;
1405#ifdef CONFIG_TCP_MD5SIG
1406        struct tcp_md5sig_key *key;
1407#endif
1408
1409        if (sk_acceptq_is_full(sk))
1410                goto exit_overflow;
1411
1412        if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1413                goto exit;
1414
1415        newsk = tcp_create_openreq_child(sk, req, skb);
1416        if (!newsk)
1417                goto exit_nonewsk;
1418
1419        newsk->sk_gso_type = SKB_GSO_TCPV4;
1420        sk_setup_caps(newsk, dst);
1421
1422        newtp                 = tcp_sk(newsk);
1423        newinet               = inet_sk(newsk);
1424        ireq                  = inet_rsk(req);
1425        newinet->inet_daddr   = ireq->rmt_addr;
1426        newinet->inet_rcv_saddr = ireq->loc_addr;
1427        newinet->inet_saddr           = ireq->loc_addr;
1428        newinet->opt          = ireq->opt;
1429        ireq->opt             = NULL;
1430        newinet->mc_index     = inet_iif(skb);
1431        newinet->mc_ttl       = ip_hdr(skb)->ttl;
1432        inet_csk(newsk)->icsk_ext_hdr_len = 0;
1433        if (newinet->opt)
1434                inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1435        newinet->inet_id = newtp->write_seq ^ jiffies;
1436
1437        tcp_mtup_init(newsk);
1438        tcp_sync_mss(newsk, dst_mtu(dst));
1439        newtp->advmss = dst_metric_advmss(dst);
1440        if (tcp_sk(sk)->rx_opt.user_mss &&
1441            tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1442                newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1443
1444        tcp_initialize_rcv_mss(newsk);
1445
1446#ifdef CONFIG_TCP_MD5SIG
1447        /* Copy over the MD5 key from the original socket */
1448        key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
1449        if (key != NULL) {
1450                /*
1451                 * We're using one, so create a matching key
1452                 * on the newsk structure. If we fail to get
1453                 * memory, then we end up not copying the key
1454                 * across. Shucks.
1455                 */
1456                char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1457                if (newkey != NULL)
1458                        tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
1459                                          newkey, key->keylen);
1460                sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1461        }
1462#endif
1463
1464        if (__inet_inherit_port(sk, newsk) < 0) {
1465                sock_put(newsk);
1466                goto exit;
1467        }
1468        __inet_hash_nolisten(newsk, NULL);
1469
1470        return newsk;
1471
1472exit_overflow:
1473        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1474exit_nonewsk:
1475        dst_release(dst);
1476exit:
1477        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1478        return NULL;
1479}
1480EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1481
1482static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1483{
1484        struct tcphdr *th = tcp_hdr(skb);
1485        const struct iphdr *iph = ip_hdr(skb);
1486        struct sock *nsk;
1487        struct request_sock **prev;
1488        /* Find possible connection requests. */
1489        struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1490                                                       iph->saddr, iph->daddr);
1491        if (req)
1492                return tcp_check_req(sk, skb, req, prev);
1493
1494        nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1495                        th->source, iph->daddr, th->dest, inet_iif(skb));
1496
1497        if (nsk) {
1498                if (nsk->sk_state != TCP_TIME_WAIT) {
1499                        bh_lock_sock(nsk);
1500                        return nsk;
1501                }
1502                inet_twsk_put(inet_twsk(nsk));
1503                return NULL;
1504        }
1505
1506#ifdef CONFIG_SYN_COOKIES
1507        if (!th->syn)
1508                sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1509#endif
1510        return sk;
1511}
1512
1513static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1514{
1515        const struct iphdr *iph = ip_hdr(skb);
1516
1517        if (skb->ip_summed == CHECKSUM_COMPLETE) {
1518                if (!tcp_v4_check(skb->len, iph->saddr,
1519                                  iph->daddr, skb->csum)) {
1520                        skb->ip_summed = CHECKSUM_UNNECESSARY;
1521                        return 0;
1522                }
1523        }
1524
1525        skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1526                                       skb->len, IPPROTO_TCP, 0);
1527
1528        if (skb->len <= 76) {
1529                return __skb_checksum_complete(skb);
1530        }
1531        return 0;
1532}
1533
1534
1535/* The socket must have it's spinlock held when we get
1536 * here.
1537 *
1538 * We have a potential double-lock case here, so even when
1539 * doing backlog processing we use the BH locking scheme.
1540 * This is because we cannot sleep with the original spinlock
1541 * held.
1542 */
1543int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1544{
1545        struct sock *rsk;
1546#ifdef CONFIG_TCP_MD5SIG
1547        /*
1548         * We really want to reject the packet as early as possible
1549         * if:
1550         *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1551         *  o There is an MD5 option and we're not expecting one
1552         */
1553        if (tcp_v4_inbound_md5_hash(sk, skb))
1554                goto discard;
1555#endif
1556
1557        if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1558                sock_rps_save_rxhash(sk, skb->rxhash);
1559                TCP_CHECK_TIMER(sk);
1560                if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1561                        rsk = sk;
1562                        goto reset;
1563                }
1564                TCP_CHECK_TIMER(sk);
1565                return 0;
1566        }
1567
1568        if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1569                goto csum_err;
1570
1571        if (sk->sk_state == TCP_LISTEN) {
1572                struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1573                if (!nsk)
1574                        goto discard;
1575
1576                if (nsk != sk) {
1577                        if (tcp_child_process(sk, nsk, skb)) {
1578                                rsk = nsk;
1579                                goto reset;
1580                        }
1581                        return 0;
1582                }
1583        } else
1584                sock_rps_save_rxhash(sk, skb->rxhash);
1585
1586
1587        TCP_CHECK_TIMER(sk);
1588        if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1589                rsk = sk;
1590                goto reset;
1591        }
1592        TCP_CHECK_TIMER(sk);
1593        return 0;
1594
1595reset:
1596        tcp_v4_send_reset(rsk, skb);
1597discard:
1598        kfree_skb(skb);
1599        /* Be careful here. If this function gets more complicated and
1600         * gcc suffers from register pressure on the x86, sk (in %ebx)
1601         * might be destroyed here. This current version compiles correctly,
1602         * but you have been warned.
1603         */
1604        return 0;
1605
1606csum_err:
1607        TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1608        goto discard;
1609}
1610EXPORT_SYMBOL(tcp_v4_do_rcv);
1611
1612/*
1613 *      From tcp_input.c
1614 */
1615
1616int tcp_v4_rcv(struct sk_buff *skb)
1617{
1618        const struct iphdr *iph;
1619        struct tcphdr *th;
1620        struct sock *sk;
1621        int ret;
1622        struct net *net = dev_net(skb->dev);
1623
1624        if (skb->pkt_type != PACKET_HOST)
1625                goto discard_it;
1626
1627        /* Count it even if it's bad */
1628        TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1629
1630        if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1631                goto discard_it;
1632
1633        th = tcp_hdr(skb);
1634
1635        if (th->doff < sizeof(struct tcphdr) / 4)
1636                goto bad_packet;
1637        if (!pskb_may_pull(skb, th->doff * 4))
1638                goto discard_it;
1639
1640        /* An explanation is required here, I think.
1641         * Packet length and doff are validated by header prediction,
1642         * provided case of th->doff==0 is eliminated.
1643         * So, we defer the checks. */
1644        if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1645                goto bad_packet;
1646
1647        th = tcp_hdr(skb);
1648        iph = ip_hdr(skb);
1649        TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1650        TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1651                                    skb->len - th->doff * 4);
1652        TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1653        TCP_SKB_CB(skb)->when    = 0;
1654        TCP_SKB_CB(skb)->flags   = iph->tos;
1655        TCP_SKB_CB(skb)->sacked  = 0;
1656
1657        sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1658        if (!sk)
1659                goto no_tcp_socket;
1660
1661process:
1662        if (sk->sk_state == TCP_TIME_WAIT)
1663                goto do_time_wait;
1664
1665        if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1666                NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1667                goto discard_and_relse;
1668        }
1669
1670        if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1671                goto discard_and_relse;
1672        nf_reset(skb);
1673
1674        if (sk_filter(sk, skb))
1675                goto discard_and_relse;
1676
1677        skb->dev = NULL;
1678
1679        bh_lock_sock_nested(sk);
1680        ret = 0;
1681        if (!sock_owned_by_user(sk)) {
1682#ifdef CONFIG_NET_DMA
1683                struct tcp_sock *tp = tcp_sk(sk);
1684                if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1685                        tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1686                if (tp->ucopy.dma_chan)
1687                        ret = tcp_v4_do_rcv(sk, skb);
1688                else
1689#endif
1690                {
1691                        if (!tcp_prequeue(sk, skb))
1692                                ret = tcp_v4_do_rcv(sk, skb);
1693                }
1694        } else if (unlikely(sk_add_backlog(sk, skb))) {
1695                bh_unlock_sock(sk);
1696                NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1697                goto discard_and_relse;
1698        }
1699        bh_unlock_sock(sk);
1700
1701        sock_put(sk);
1702
1703        return ret;
1704
1705no_tcp_socket:
1706        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1707                goto discard_it;
1708
1709        if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1710bad_packet:
1711                TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1712        } else {
1713                tcp_v4_send_reset(NULL, skb);
1714        }
1715
1716discard_it:
1717        /* Discard frame. */
1718        kfree_skb(skb);
1719        return 0;
1720
1721discard_and_relse:
1722        sock_put(sk);
1723        goto discard_it;
1724
1725do_time_wait:
1726        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1727                inet_twsk_put(inet_twsk(sk));
1728                goto discard_it;
1729        }
1730
1731        if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1732                TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1733                inet_twsk_put(inet_twsk(sk));
1734                goto discard_it;
1735        }
1736        switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1737        case TCP_TW_SYN: {
1738                struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1739                                                        &tcp_hashinfo,
1740                                                        iph->daddr, th->dest,
1741                                                        inet_iif(skb));
1742                if (sk2) {
1743                        inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1744                        inet_twsk_put(inet_twsk(sk));
1745                        sk = sk2;
1746                        goto process;
1747                }
1748                /* Fall through to ACK */
1749        }
1750        case TCP_TW_ACK:
1751                tcp_v4_timewait_ack(sk, skb);
1752                break;
1753        case TCP_TW_RST:
1754                goto no_tcp_socket;
1755        case TCP_TW_SUCCESS:;
1756        }
1757        goto discard_it;
1758}
1759
1760struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
1761{
1762        struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
1763        struct inet_sock *inet = inet_sk(sk);
1764        struct inet_peer *peer;
1765
1766        if (!rt || rt->rt_dst != inet->inet_daddr) {
1767                peer = inet_getpeer_v4(inet->inet_daddr, 1);
1768                *release_it = true;
1769        } else {
1770                if (!rt->peer)
1771                        rt_bind_peer(rt, 1);
1772                peer = rt->peer;
1773                *release_it = false;
1774        }
1775
1776        return peer;
1777}
1778EXPORT_SYMBOL(tcp_v4_get_peer);
1779
1780void *tcp_v4_tw_get_peer(struct sock *sk)
1781{
1782        struct inet_timewait_sock *tw = inet_twsk(sk);
1783
1784        return inet_getpeer_v4(tw->tw_daddr, 1);
1785}
1786EXPORT_SYMBOL(tcp_v4_tw_get_peer);
1787
1788static struct timewait_sock_ops tcp_timewait_sock_ops = {
1789        .twsk_obj_size  = sizeof(struct tcp_timewait_sock),
1790        .twsk_unique    = tcp_twsk_unique,
1791        .twsk_destructor= tcp_twsk_destructor,
1792        .twsk_getpeer   = tcp_v4_tw_get_peer,
1793};
1794
1795const struct inet_connection_sock_af_ops ipv4_specific = {
1796        .queue_xmit        = ip_queue_xmit,
1797        .send_check        = tcp_v4_send_check,
1798        .rebuild_header    = inet_sk_rebuild_header,
1799        .conn_request      = tcp_v4_conn_request,
1800        .syn_recv_sock     = tcp_v4_syn_recv_sock,
1801        .get_peer          = tcp_v4_get_peer,
1802        .net_header_len    = sizeof(struct iphdr),
1803        .setsockopt        = ip_setsockopt,
1804        .getsockopt        = ip_getsockopt,
1805        .addr2sockaddr     = inet_csk_addr2sockaddr,
1806        .sockaddr_len      = sizeof(struct sockaddr_in),
1807        .bind_conflict     = inet_csk_bind_conflict,
1808#ifdef CONFIG_COMPAT
1809        .compat_setsockopt = compat_ip_setsockopt,
1810        .compat_getsockopt = compat_ip_getsockopt,
1811#endif
1812};
1813EXPORT_SYMBOL(ipv4_specific);
1814
1815#ifdef CONFIG_TCP_MD5SIG
1816static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1817        .md5_lookup             = tcp_v4_md5_lookup,
1818        .calc_md5_hash          = tcp_v4_md5_hash_skb,
1819        .md5_add                = tcp_v4_md5_add_func,
1820        .md5_parse              = tcp_v4_parse_md5_keys,
1821};
1822#endif
1823
1824/* NOTE: A lot of things set to zero explicitly by call to
1825 *       sk_alloc() so need not be done here.
1826 */
1827static int tcp_v4_init_sock(struct sock *sk)
1828{
1829        struct inet_connection_sock *icsk = inet_csk(sk);
1830        struct tcp_sock *tp = tcp_sk(sk);
1831
1832        skb_queue_head_init(&tp->out_of_order_queue);
1833        tcp_init_xmit_timers(sk);
1834        tcp_prequeue_init(tp);
1835
1836        icsk->icsk_rto = TCP_TIMEOUT_INIT;
1837        tp->mdev = TCP_TIMEOUT_INIT;
1838
1839        /* So many TCP implementations out there (incorrectly) count the
1840         * initial SYN frame in their delayed-ACK and congestion control
1841         * algorithms that we must have the following bandaid to talk
1842         * efficiently to them.  -DaveM
1843         */
1844        tp->snd_cwnd = 2;
1845
1846        /* See draft-stevens-tcpca-spec-01 for discussion of the
1847         * initialization of these values.
1848         */
1849        tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1850        tp->snd_cwnd_clamp = ~0;
1851        tp->mss_cache = TCP_MSS_DEFAULT;
1852
1853        tp->reordering = sysctl_tcp_reordering;
1854        icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1855
1856        sk->sk_state = TCP_CLOSE;
1857
1858        sk->sk_write_space = sk_stream_write_space;
1859        sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1860
1861        icsk->icsk_af_ops = &ipv4_specific;
1862        icsk->icsk_sync_mss = tcp_sync_mss;
1863#ifdef CONFIG_TCP_MD5SIG
1864        tp->af_specific = &tcp_sock_ipv4_specific;
1865#endif
1866
1867        /* TCP Cookie Transactions */
1868        if (sysctl_tcp_cookie_size > 0) {
1869                /* Default, cookies without s_data_payload. */
1870                tp->cookie_values =
1871                        kzalloc(sizeof(*tp->cookie_values),
1872                                sk->sk_allocation);
1873                if (tp->cookie_values != NULL)
1874                        kref_init(&tp->cookie_values->kref);
1875        }
1876        /* Presumed zeroed, in order of appearance:
1877         *      cookie_in_always, cookie_out_never,
1878         *      s_data_constant, s_data_in, s_data_out
1879         */
1880        sk->sk_sndbuf = sysctl_tcp_wmem[1];
1881        sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1882
1883        local_bh_disable();
1884        percpu_counter_inc(&tcp_sockets_allocated);
1885        local_bh_enable();
1886
1887        return 0;
1888}
1889
1890void tcp_v4_destroy_sock(struct sock *sk)
1891{
1892        struct tcp_sock *tp = tcp_sk(sk);
1893
1894        tcp_clear_xmit_timers(sk);
1895
1896        tcp_cleanup_congestion_control(sk);
1897
1898        /* Cleanup up the write buffer. */
1899        tcp_write_queue_purge(sk);
1900
1901        /* Cleans up our, hopefully empty, out_of_order_queue. */
1902        __skb_queue_purge(&tp->out_of_order_queue);
1903
1904#ifdef CONFIG_TCP_MD5SIG
1905        /* Clean up the MD5 key list, if any */
1906        if (tp->md5sig_info) {
1907                tcp_v4_clear_md5_list(sk);
1908                kfree(tp->md5sig_info);
1909                tp->md5sig_info = NULL;
1910        }
1911#endif
1912
1913#ifdef CONFIG_NET_DMA
1914        /* Cleans up our sk_async_wait_queue */
1915        __skb_queue_purge(&sk->sk_async_wait_queue);
1916#endif
1917
1918        /* Clean prequeue, it must be empty really */
1919        __skb_queue_purge(&tp->ucopy.prequeue);
1920
1921        /* Clean up a referenced TCP bind bucket. */
1922        if (inet_csk(sk)->icsk_bind_hash)
1923                inet_put_port(sk);
1924
1925        /*
1926         * If sendmsg cached page exists, toss it.
1927         */
1928        if (sk->sk_sndmsg_page) {
1929                __free_page(sk->sk_sndmsg_page);
1930                sk->sk_sndmsg_page = NULL;
1931        }
1932
1933        /* TCP Cookie Transactions */
1934        if (tp->cookie_values != NULL) {
1935                kref_put(&tp->cookie_values->kref,
1936                         tcp_cookie_values_release);
1937                tp->cookie_values = NULL;
1938        }
1939
1940        percpu_counter_dec(&tcp_sockets_allocated);
1941}
1942EXPORT_SYMBOL(tcp_v4_destroy_sock);
1943
1944#ifdef CONFIG_PROC_FS
1945/* Proc filesystem TCP sock list dumping. */
1946
1947static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1948{
1949        return hlist_nulls_empty(head) ? NULL :
1950                list_entry(head->first, struct inet_timewait_sock, tw_node);
1951}
1952
1953static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1954{
1955        return !is_a_nulls(tw->tw_node.next) ?
1956                hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1957}
1958
1959/*
1960 * Get next listener socket follow cur.  If cur is NULL, get first socket
1961 * starting from bucket given in st->bucket; when st->bucket is zero the
1962 * very first socket in the hash table is returned.
1963 */
1964static void *listening_get_next(struct seq_file *seq, void *cur)
1965{
1966        struct inet_connection_sock *icsk;
1967        struct hlist_nulls_node *node;
1968        struct sock *sk = cur;
1969        struct inet_listen_hashbucket *ilb;
1970        struct tcp_iter_state *st = seq->private;
1971        struct net *net = seq_file_net(seq);
1972
1973        if (!sk) {
1974                ilb = &tcp_hashinfo.listening_hash[st->bucket];
1975                spin_lock_bh(&ilb->lock);
1976                sk = sk_nulls_head(&ilb->head);
1977                st->offset = 0;
1978                goto get_sk;
1979        }
1980        ilb = &tcp_hashinfo.listening_hash[st->bucket];
1981        ++st->num;
1982        ++st->offset;
1983
1984        if (st->state == TCP_SEQ_STATE_OPENREQ) {
1985                struct request_sock *req = cur;
1986
1987                icsk = inet_csk(st->syn_wait_sk);
1988                req = req->dl_next;
1989                while (1) {
1990                        while (req) {
1991                                if (req->rsk_ops->family == st->family) {
1992                                        cur = req;
1993                                        goto out;
1994                                }
1995                                req = req->dl_next;
1996                        }
1997                        if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1998                                break;
1999get_req:
2000                        req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2001                }
2002                sk        = sk_nulls_next(st->syn_wait_sk);
2003                st->state = TCP_SEQ_STATE_LISTENING;
2004                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2005        } else {
2006                icsk = inet_csk(sk);
2007                read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2008                if (reqsk_queue_len(&icsk->icsk_accept_queue))
2009                        goto start_req;
2010                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2011                sk = sk_nulls_next(sk);
2012        }
2013get_sk:
2014        sk_nulls_for_each_from(sk, node) {
2015                if (!net_eq(sock_net(sk), net))
2016                        continue;
2017                if (sk->sk_family == st->family) {
2018                        cur = sk;
2019                        goto out;
2020                }
2021                icsk = inet_csk(sk);
2022                read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2023                if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2024start_req:
2025                        st->uid         = sock_i_uid(sk);
2026                        st->syn_wait_sk = sk;
2027                        st->state       = TCP_SEQ_STATE_OPENREQ;
2028                        st->sbucket     = 0;
2029                        goto get_req;
2030                }
2031                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2032        }
2033        spin_unlock_bh(&ilb->lock);
2034        st->offset = 0;
2035        if (++st->bucket < INET_LHTABLE_SIZE) {
2036                ilb = &tcp_hashinfo.listening_hash[st->bucket];
2037                spin_lock_bh(&ilb->lock);
2038                sk = sk_nulls_head(&ilb->head);
2039                goto get_sk;
2040        }
2041        cur = NULL;
2042out:
2043        return cur;
2044}
2045
2046static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2047{
2048        struct tcp_iter_state *st = seq->private;
2049        void *rc;
2050
2051        st->bucket = 0;
2052        st->offset = 0;
2053        rc = listening_get_next(seq, NULL);
2054
2055        while (rc && *pos) {
2056                rc = listening_get_next(seq, rc);
2057                --*pos;
2058        }
2059        return rc;
2060}
2061
2062static inline int empty_bucket(struct tcp_iter_state *st)
2063{
2064        return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2065                hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2066}
2067
2068/*
2069 * Get first established socket starting from bucket given in st->bucket.
2070 * If st->bucket is zero, the very first socket in the hash is returned.
2071 */
2072static void *established_get_first(struct seq_file *seq)
2073{
2074        struct tcp_iter_state *st = seq->private;
2075        struct net *net = seq_file_net(seq);
2076        void *rc = NULL;
2077
2078        st->offset = 0;
2079        for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2080                struct sock *sk;
2081                struct hlist_nulls_node *node;
2082                struct inet_timewait_sock *tw;
2083                spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2084
2085                /* Lockless fast path for the common case of empty buckets */
2086                if (empty_bucket(st))
2087                        continue;
2088
2089                spin_lock_bh(lock);
2090                sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2091                        if (sk->sk_family != st->family ||
2092                            !net_eq(sock_net(sk), net)) {
2093                                continue;
2094                        }
2095                        rc = sk;
2096                        goto out;
2097                }
2098                st->state = TCP_SEQ_STATE_TIME_WAIT;
2099                inet_twsk_for_each(tw, node,
2100                                   &tcp_hashinfo.ehash[st->bucket].twchain) {
2101                        if (tw->tw_family != st->family ||
2102                            !net_eq(twsk_net(tw), net)) {
2103                                continue;
2104                        }
2105                        rc = tw;
2106                        goto out;
2107                }
2108                spin_unlock_bh(lock);
2109                st->state = TCP_SEQ_STATE_ESTABLISHED;
2110        }
2111out:
2112        return rc;
2113}
2114
2115static void *established_get_next(struct seq_file *seq, void *cur)
2116{
2117        struct sock *sk = cur;
2118        struct inet_timewait_sock *tw;
2119        struct hlist_nulls_node *node;
2120        struct tcp_iter_state *st = seq->private;
2121        struct net *net = seq_file_net(seq);
2122
2123        ++st->num;
2124        ++st->offset;
2125
2126        if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2127                tw = cur;
2128                tw = tw_next(tw);
2129get_tw:
2130                while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2131                        tw = tw_next(tw);
2132                }
2133                if (tw) {
2134                        cur = tw;
2135                        goto out;
2136                }
2137                spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2138                st->state = TCP_SEQ_STATE_ESTABLISHED;
2139
2140                /* Look for next non empty bucket */
2141                st->offset = 0;
2142                while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2143                                empty_bucket(st))
2144                        ;
2145                if (st->bucket > tcp_hashinfo.ehash_mask)
2146                        return NULL;
2147
2148                spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2149                sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2150        } else
2151                sk = sk_nulls_next(sk);
2152
2153        sk_nulls_for_each_from(sk, node) {
2154                if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2155                        goto found;
2156        }
2157
2158        st->state = TCP_SEQ_STATE_TIME_WAIT;
2159        tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2160        goto get_tw;
2161found:
2162        cur = sk;
2163out:
2164        return cur;
2165}
2166
2167static void *established_get_idx(struct seq_file *seq, loff_t pos)
2168{
2169        struct tcp_iter_state *st = seq->private;
2170        void *rc;
2171
2172        st->bucket = 0;
2173        rc = established_get_first(seq);
2174
2175        while (rc && pos) {
2176                rc = established_get_next(seq, rc);
2177                --pos;
2178        }
2179        return rc;
2180}
2181
2182static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2183{
2184        void *rc;
2185        struct tcp_iter_state *st = seq->private;
2186
2187        st->state = TCP_SEQ_STATE_LISTENING;
2188        rc        = listening_get_idx(seq, &pos);
2189
2190        if (!rc) {
2191                st->state = TCP_SEQ_STATE_ESTABLISHED;
2192                rc        = established_get_idx(seq, pos);
2193        }
2194
2195        return rc;
2196}
2197
2198static void *tcp_seek_last_pos(struct seq_file *seq)
2199{
2200        struct tcp_iter_state *st = seq->private;
2201        int offset = st->offset;
2202        int orig_num = st->num;
2203        void *rc = NULL;
2204
2205        switch (st->state) {
2206        case TCP_SEQ_STATE_OPENREQ:
2207        case TCP_SEQ_STATE_LISTENING:
2208                if (st->bucket >= INET_LHTABLE_SIZE)
2209                        break;
2210                st->state = TCP_SEQ_STATE_LISTENING;
2211                rc = listening_get_next(seq, NULL);
2212                while (offset-- && rc)
2213                        rc = listening_get_next(seq, rc);
2214                if (rc)
2215                        break;
2216                st->bucket = 0;
2217                /* Fallthrough */
2218        case TCP_SEQ_STATE_ESTABLISHED:
2219        case TCP_SEQ_STATE_TIME_WAIT:
2220                st->state = TCP_SEQ_STATE_ESTABLISHED;
2221                if (st->bucket > tcp_hashinfo.ehash_mask)
2222                        break;
2223                rc = established_get_first(seq);
2224                while (offset-- && rc)
2225                        rc = established_get_next(seq, rc);
2226        }
2227
2228        st->num = orig_num;
2229
2230        return rc;
2231}
2232
2233static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2234{
2235        struct tcp_iter_state *st = seq->private;
2236        void *rc;
2237
2238        if (*pos && *pos == st->last_pos) {
2239                rc = tcp_seek_last_pos(seq);
2240                if (rc)
2241                        goto out;
2242        }
2243
2244        st->state = TCP_SEQ_STATE_LISTENING;
2245        st->num = 0;
2246        st->bucket = 0;
2247        st->offset = 0;
2248        rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2249
2250out:
2251        st->last_pos = *pos;
2252        return rc;
2253}
2254
2255static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2256{
2257        struct tcp_iter_state *st = seq->private;
2258        void *rc = NULL;
2259
2260        if (v == SEQ_START_TOKEN) {
2261                rc = tcp_get_idx(seq, 0);
2262                goto out;
2263        }
2264
2265        switch (st->state) {
2266        case TCP_SEQ_STATE_OPENREQ:
2267        case TCP_SEQ_STATE_LISTENING:
2268                rc = listening_get_next(seq, v);
2269                if (!rc) {
2270                        st->state = TCP_SEQ_STATE_ESTABLISHED;
2271                        st->bucket = 0;
2272                        st->offset = 0;
2273                        rc        = established_get_first(seq);
2274                }
2275                break;
2276        case TCP_SEQ_STATE_ESTABLISHED:
2277        case TCP_SEQ_STATE_TIME_WAIT:
2278                rc = established_get_next(seq, v);
2279                break;
2280        }
2281out:
2282        ++*pos;
2283        st->last_pos = *pos;
2284        return rc;
2285}
2286
2287static void tcp_seq_stop(struct seq_file *seq, void *v)
2288{
2289        struct tcp_iter_state *st = seq->private;
2290
2291        switch (st->state) {
2292        case TCP_SEQ_STATE_OPENREQ:
2293                if (v) {
2294                        struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2295                        read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2296                }
2297        case TCP_SEQ_STATE_LISTENING:
2298                if (v != SEQ_START_TOKEN)
2299                        spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2300                break;
2301        case TCP_SEQ_STATE_TIME_WAIT:
2302        case TCP_SEQ_STATE_ESTABLISHED:
2303                if (v)
2304                        spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2305                break;
2306        }
2307}
2308
2309static int tcp_seq_open(struct inode *inode, struct file *file)
2310{
2311        struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2312        struct tcp_iter_state *s;
2313        int err;
2314
2315        err = seq_open_net(inode, file, &afinfo->seq_ops,
2316                          sizeof(struct tcp_iter_state));
2317        if (err < 0)
2318                return err;
2319
2320        s = ((struct seq_file *)file->private_data)->private;
2321        s->family               = afinfo->family;
2322        s->last_pos             = 0;
2323        return 0;
2324}
2325
2326int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2327{
2328        int rc = 0;
2329        struct proc_dir_entry *p;
2330
2331        afinfo->seq_fops.open           = tcp_seq_open;
2332        afinfo->seq_fops.read           = seq_read;
2333        afinfo->seq_fops.llseek         = seq_lseek;
2334        afinfo->seq_fops.release        = seq_release_net;
2335
2336        afinfo->seq_ops.start           = tcp_seq_start;
2337        afinfo->seq_ops.next            = tcp_seq_next;
2338        afinfo->seq_ops.stop            = tcp_seq_stop;
2339
2340        p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2341                             &afinfo->seq_fops, afinfo);
2342        if (!p)
2343                rc = -ENOMEM;
2344        return rc;
2345}
2346EXPORT_SYMBOL(tcp_proc_register);
2347
2348void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2349{
2350        proc_net_remove(net, afinfo->name);
2351}
2352EXPORT_SYMBOL(tcp_proc_unregister);
2353
2354static void get_openreq4(struct sock *sk, struct request_sock *req,
2355                         struct seq_file *f, int i, int uid, int *len)
2356{
2357        const struct inet_request_sock *ireq = inet_rsk(req);
2358        int ttd = req->expires - jiffies;
2359
2360        seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2361                " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n",
2362                i,
2363                ireq->loc_addr,
2364                ntohs(inet_sk(sk)->inet_sport),
2365                ireq->rmt_addr,
2366                ntohs(ireq->rmt_port),
2367                TCP_SYN_RECV,
2368                0, 0, /* could print option size, but that is af dependent. */
2369                1,    /* timers active (only the expire timer) */
2370                jiffies_to_clock_t(ttd),
2371                req->retrans,
2372                uid,
2373                0,  /* non standard timer */
2374                0, /* open_requests have no inode */
2375                atomic_read(&sk->sk_refcnt),
2376                req,
2377                len);
2378}
2379
2380static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2381{
2382        int timer_active;
2383        unsigned long timer_expires;
2384        struct tcp_sock *tp = tcp_sk(sk);
2385        const struct inet_connection_sock *icsk = inet_csk(sk);
2386        struct inet_sock *inet = inet_sk(sk);
2387        __be32 dest = inet->inet_daddr;
2388        __be32 src = inet->inet_rcv_saddr;
2389        __u16 destp = ntohs(inet->inet_dport);
2390        __u16 srcp = ntohs(inet->inet_sport);
2391        int rx_queue;
2392
2393        if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2394                timer_active    = 1;
2395                timer_expires   = icsk->icsk_timeout;
2396        } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2397                timer_active    = 4;
2398                timer_expires   = icsk->icsk_timeout;
2399        } else if (timer_pending(&sk->sk_timer)) {
2400                timer_active    = 2;
2401                timer_expires   = sk->sk_timer.expires;
2402        } else {
2403                timer_active    = 0;
2404                timer_expires = jiffies;
2405        }
2406
2407        if (sk->sk_state == TCP_LISTEN)
2408                rx_queue = sk->sk_ack_backlog;
2409        else
2410                /*
2411                 * because we dont lock socket, we might find a transient negative value
2412                 */
2413                rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2414
2415        seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2416                        "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",
2417                i, src, srcp, dest, destp, sk->sk_state,
2418                tp->write_seq - tp->snd_una,
2419                rx_queue,
2420                timer_active,
2421                jiffies_to_clock_t(timer_expires - jiffies),
2422                icsk->icsk_retransmits,
2423                sock_i_uid(sk),
2424                icsk->icsk_probes_out,
2425                sock_i_ino(sk),
2426                atomic_read(&sk->sk_refcnt), sk,
2427                jiffies_to_clock_t(icsk->icsk_rto),
2428                jiffies_to_clock_t(icsk->icsk_ack.ato),
2429                (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2430                tp->snd_cwnd,
2431                tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
2432                len);
2433}
2434
2435static void get_timewait4_sock(struct inet_timewait_sock *tw,
2436                               struct seq_file *f, int i, int *len)
2437{
2438        __be32 dest, src;
2439        __u16 destp, srcp;
2440        int ttd = tw->tw_ttd - jiffies;
2441
2442        if (ttd < 0)
2443                ttd = 0;
2444
2445        dest  = tw->tw_daddr;
2446        src   = tw->tw_rcv_saddr;
2447        destp = ntohs(tw->tw_dport);
2448        srcp  = ntohs(tw->tw_sport);
2449
2450        seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2451                " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n",
2452                i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2453                3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2454                atomic_read(&tw->tw_refcnt), tw, len);
2455}
2456
2457#define TMPSZ 150
2458
2459static int tcp4_seq_show(struct seq_file *seq, void *v)
2460{
2461        struct tcp_iter_state *st;
2462        int len;
2463
2464        if (v == SEQ_START_TOKEN) {
2465                seq_printf(seq, "%-*s\n", TMPSZ - 1,
2466                           "  sl  local_address rem_address   st tx_queue "
2467                           "rx_queue tr tm->when retrnsmt   uid  timeout "
2468                           "inode");
2469                goto out;
2470        }
2471        st = seq->private;
2472
2473        switch (st->state) {
2474        case TCP_SEQ_STATE_LISTENING:
2475        case TCP_SEQ_STATE_ESTABLISHED:
2476                get_tcp4_sock(v, seq, st->num, &len);
2477                break;
2478        case TCP_SEQ_STATE_OPENREQ:
2479                get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2480                break;
2481        case TCP_SEQ_STATE_TIME_WAIT:
2482                get_timewait4_sock(v, seq, st->num, &len);
2483                break;
2484        }
2485        seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2486out:
2487        return 0;
2488}
2489
2490static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2491        .name           = "tcp",
2492        .family         = AF_INET,
2493        .seq_fops       = {
2494                .owner          = THIS_MODULE,
2495        },
2496        .seq_ops        = {
2497                .show           = tcp4_seq_show,
2498        },
2499};
2500
2501static int __net_init tcp4_proc_init_net(struct net *net)
2502{
2503        return tcp_proc_register(net, &tcp4_seq_afinfo);
2504}
2505
2506static void __net_exit tcp4_proc_exit_net(struct net *net)
2507{
2508        tcp_proc_unregister(net, &tcp4_seq_afinfo);
2509}
2510
2511static struct pernet_operations tcp4_net_ops = {
2512        .init = tcp4_proc_init_net,
2513        .exit = tcp4_proc_exit_net,
2514};
2515
2516int __init tcp4_proc_init(void)
2517{
2518        return register_pernet_subsys(&tcp4_net_ops);
2519}
2520
2521void tcp4_proc_exit(void)
2522{
2523        unregister_pernet_subsys(&tcp4_net_ops);
2524}
2525#endif /* CONFIG_PROC_FS */
2526
2527struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2528{
2529        struct iphdr *iph = skb_gro_network_header(skb);
2530
2531        switch (skb->ip_summed) {
2532        case CHECKSUM_COMPLETE:
2533                if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2534                                  skb->csum)) {
2535                        skb->ip_summed = CHECKSUM_UNNECESSARY;
2536                        break;
2537                }
2538
2539                /* fall through */
2540        case CHECKSUM_NONE:
2541                NAPI_GRO_CB(skb)->flush = 1;
2542                return NULL;
2543        }
2544
2545        return tcp_gro_receive(head, skb);
2546}
2547
2548int tcp4_gro_complete(struct sk_buff *skb)
2549{
2550        struct iphdr *iph = ip_hdr(skb);
2551        struct tcphdr *th = tcp_hdr(skb);
2552
2553        th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2554                                  iph->saddr, iph->daddr, 0);
2555        skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2556
2557        return tcp_gro_complete(skb);
2558}
2559
2560struct proto tcp_prot = {
2561        .name                   = "TCP",
2562        .owner                  = THIS_MODULE,
2563        .close                  = tcp_close,
2564        .connect                = tcp_v4_connect,
2565        .disconnect             = tcp_disconnect,
2566        .accept                 = inet_csk_accept,
2567        .ioctl                  = tcp_ioctl,
2568        .init                   = tcp_v4_init_sock,
2569        .destroy                = tcp_v4_destroy_sock,
2570        .shutdown               = tcp_shutdown,
2571        .setsockopt             = tcp_setsockopt,
2572        .getsockopt             = tcp_getsockopt,
2573        .recvmsg                = tcp_recvmsg,
2574        .sendmsg                = tcp_sendmsg,
2575        .sendpage               = tcp_sendpage,
2576        .backlog_rcv            = tcp_v4_do_rcv,
2577        .hash                   = inet_hash,
2578        .unhash                 = inet_unhash,
2579        .get_port               = inet_csk_get_port,
2580        .enter_memory_pressure  = tcp_enter_memory_pressure,
2581        .sockets_allocated      = &tcp_sockets_allocated,
2582        .orphan_count           = &tcp_orphan_count,
2583        .memory_allocated       = &tcp_memory_allocated,
2584        .memory_pressure        = &tcp_memory_pressure,
2585        .sysctl_mem             = sysctl_tcp_mem,
2586        .sysctl_wmem            = sysctl_tcp_wmem,
2587        .sysctl_rmem            = sysctl_tcp_rmem,
2588        .max_header             = MAX_TCP_HEADER,
2589        .obj_size               = sizeof(struct tcp_sock),
2590        .slab_flags             = SLAB_DESTROY_BY_RCU,
2591        .twsk_prot              = &tcp_timewait_sock_ops,
2592        .rsk_prot               = &tcp_request_sock_ops,
2593        .h.hashinfo             = &tcp_hashinfo,
2594        .no_autobind            = true,
2595#ifdef CONFIG_COMPAT
2596        .compat_setsockopt      = compat_tcp_setsockopt,
2597        .compat_getsockopt      = compat_tcp_getsockopt,
2598#endif
2599};
2600EXPORT_SYMBOL(tcp_prot);
2601
2602
2603static int __net_init tcp_sk_init(struct net *net)
2604{
2605        return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2606                                    PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2607}
2608
2609static void __net_exit tcp_sk_exit(struct net *net)
2610{
2611        inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2612}
2613
2614static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2615{
2616        inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2617}
2618
2619static struct pernet_operations __net_initdata tcp_sk_ops = {
2620       .init       = tcp_sk_init,
2621       .exit       = tcp_sk_exit,
2622       .exit_batch = tcp_sk_exit_batch,
2623};
2624
2625void __init tcp_v4_init(void)
2626{
2627        inet_hashinfo_init(&tcp_hashinfo);
2628        if (register_pernet_subsys(&tcp_sk_ops))
2629                panic("Failed to create the TCP control socket.\n");
2630}
2631