LXR linux/net/ipv4/tcp

   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              Implementation of the Transmission Control Protocol(TCP).
   7 *
   8 *              IPv4 specific functions
   9 *
  10 *
  11 *              code split from:
  12 *              linux/ipv4/tcp.c
  13 *              linux/ipv4/tcp_input.c
  14 *              linux/ipv4/tcp_output.c
  15 *
  16 *              See tcp.c for author information
  17 *
  18 *      This program is free software; you can redistribute it and/or
  19 *      modify it under the terms of the GNU General Public License
  20 *      as published by the Free Software Foundation; either version
  21 *      2 of the License, or (at your option) any later version.
  22 */
  23
  24/*
  25 * Changes:
  26 *              David S. Miller :       New socket lookup architecture.
  27 *                                      This code is dedicated to John Dyson.
  28 *              David S. Miller :       Change semantics of established hash,
  29 *                                      half is devoted to TIME_WAIT sockets
  30 *                                      and the rest go in the other half.
  31 *              Andi Kleen :            Add support for syncookies and fixed
  32 *                                      some bugs: ip options weren't passed to
  33 *                                      the TCP layer, missed a check for an
  34 *                                      ACK bit.
  35 *              Andi Kleen :            Implemented fast path mtu discovery.
  36 *                                      Fixed many serious bugs in the
  37 *                                      request_sock handling and moved
  38 *                                      most of it into the af independent code.
  39 *                                      Added tail drop and some other bugfixes.
  40 *                                      Added new listen semantics.
  41 *              Mike McLagan    :       Routing by source
  42 *      Juan Jose Ciarlante:            ip_dynaddr bits
  43 *              Andi Kleen:             various fixes.
  44 *      Vitaly E. Lavrov        :       Transparent proxy revived after year
  45 *                                      coma.
  46 *      Andi Kleen              :       Fix new listen.
  47 *      Andi Kleen              :       Fix accept error reporting.
  48 *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  49 *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  50 *                                      a single port at the same time.
  51 */
  52
  53
  54#include <linux/bottom_half.h>
  55#include <linux/types.h>
  56#include <linux/fcntl.h>
  57#include <linux/module.h>
  58#include <linux/random.h>
  59#include <linux/cache.h>
  60#include <linux/jhash.h>
  61#include <linux/init.h>
  62#include <linux/times.h>
  63#include <linux/slab.h>
  64
  65#include <net/net_namespace.h>
  66#include <net/icmp.h>
  67#include <net/inet_hashtables.h>
  68#include <net/tcp.h>
  69#include <net/transp_v6.h>
  70#include <net/ipv6.h>
  71#include <net/inet_common.h>
  72#include <net/timewait_sock.h>
  73#include <net/xfrm.h>
  74#include <net/netdma.h>
  75
  76#include <linux/inet.h>
  77#include <linux/ipv6.h>
  78#include <linux/stddef.h>
  79#include <linux/proc_fs.h>
  80#include <linux/seq_file.h>
  81
  82#include <linux/crypto.h>
  83#include <linux/scatterlist.h>
  84
  85int sysctl_tcp_tw_reuse __read_mostly;
  86int sysctl_tcp_low_latency __read_mostly;
  87EXPORT_SYMBOL(sysctl_tcp_low_latency);
  88
  89
  90#ifdef CONFIG_TCP_MD5SIG
  91static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
  92                                                   __be32 addr);
  93static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
  94                               __be32 daddr, __be32 saddr, struct tcphdr *th);
  95#else
  96static inline
  97struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
  98{
  99        return NULL;
 100}
 101#endif
 102
 103struct inet_hashinfo tcp_hashinfo;
 104EXPORT_SYMBOL(tcp_hashinfo);
 105
 106static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
 107{
 108        return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
 109                                          ip_hdr(skb)->saddr,
 110                                          tcp_hdr(skb)->dest,
 111                                          tcp_hdr(skb)->source);
 112}
 113
 114int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 115{
 116        const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
 117        struct tcp_sock *tp = tcp_sk(sk);
 118
 119        /* With PAWS, it is safe from the viewpoint
 120           of data integrity. Even without PAWS it is safe provided sequence
 121           spaces do not overlap i.e. at data rates <= 80Mbit/sec.
 122
 123           Actually, the idea is close to VJ's one, only timestamp cache is
 124           held not per host, but per port pair and TW bucket is used as state
 125           holder.
 126
 127           If TW bucket has been already destroyed we fall back to VJ's scheme
 128           and use initial timestamp retrieved from peer table.
 129         */
 130        if (tcptw->tw_ts_recent_stamp &&
 131            (twp == NULL || (sysctl_tcp_tw_reuse &&
 132                             get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
 133                tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
 134                if (tp->write_seq == 0)
 135                        tp->write_seq = 1;
 136                tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
 137                tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
 138                sock_hold(sktw);
 139                return 1;
 140        }
 141
 142        return 0;
 143}
 144EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 145
 146/* This will initiate an outgoing connection. */
 147int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 148{
 149        struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
 150        struct inet_sock *inet = inet_sk(sk);
 151        struct tcp_sock *tp = tcp_sk(sk);
 152        __be16 orig_sport, orig_dport;
 153        __be32 daddr, nexthop;
 154        struct flowi4 *fl4;
 155        struct rtable *rt;
 156        int err;
 157        struct ip_options_rcu *inet_opt;
 158
 159        if (addr_len < sizeof(struct sockaddr_in))
 160                return -EINVAL;
 161
 162        if (usin->sin_family != AF_INET)
 163                return -EAFNOSUPPORT;
 164
 165        nexthop = daddr = usin->sin_addr.s_addr;
 166        inet_opt = rcu_dereference_protected(inet->inet_opt,
 167                                             sock_owned_by_user(sk));
 168        if (inet_opt && inet_opt->opt.srr) {
 169                if (!daddr)
 170                        return -EINVAL;
 171                nexthop = inet_opt->opt.faddr;
 172        }
 173
 174        orig_sport = inet->inet_sport;
 175        orig_dport = usin->sin_port;
 176        fl4 = &inet->cork.fl.u.ip4;
 177        rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
 178                              RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
 179                              IPPROTO_TCP,
 180                              orig_sport, orig_dport, sk, true);
 181        if (IS_ERR(rt)) {
 182                err = PTR_ERR(rt);
 183                if (err == -ENETUNREACH)
 184                        IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
 185                return err;
 186        }
 187
 188        if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
 189                ip_rt_put(rt);
 190                return -ENETUNREACH;
 191        }
 192
 193        if (!inet_opt || !inet_opt->opt.srr)
 194                daddr = fl4->daddr;
 195
 196        if (!inet->inet_saddr)
 197                inet->inet_saddr = fl4->saddr;
 198        inet->inet_rcv_saddr = inet->inet_saddr;
 199
 200        if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
 201                /* Reset inherited state */
 202                tp->rx_opt.ts_recent       = 0;
 203                tp->rx_opt.ts_recent_stamp = 0;
 204                tp->write_seq              = 0;
 205        }
 206
 207        if (tcp_death_row.sysctl_tw_recycle &&
 208            !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
 209                struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
 210                /*
 211                 * VJ's idea. We save last timestamp seen from
 212                 * the destination in peer table, when entering state
 213                 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
 214                 * when trying new connection.
 215                 */
 216                if (peer) {
 217                        inet_peer_refcheck(peer);
 218                        if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
 219                                tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
 220                                tp->rx_opt.ts_recent = peer->tcp_ts;
 221                        }
 222                }
 223        }
 224
 225        inet->inet_dport = usin->sin_port;
 226        inet->inet_daddr = daddr;
 227
 228        inet_csk(sk)->icsk_ext_hdr_len = 0;
 229        if (inet_opt)
 230                inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
 231
 232        tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
 233
 234        /* Socket identity is still unknown (sport may be zero).
 235         * However we set state to SYN-SENT and not releasing socket
 236         * lock select source port, enter ourselves into the hash tables and
 237         * complete initialization after this.
 238         */
 239        tcp_set_state(sk, TCP_SYN_SENT);
 240        err = inet_hash_connect(&tcp_death_row, sk);
 241        if (err)
 242                goto failure;
 243
 244        rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
 245                               inet->inet_sport, inet->inet_dport, sk);
 246        if (IS_ERR(rt)) {
 247                err = PTR_ERR(rt);
 248                rt = NULL;
 249                goto failure;
 250        }
 251        /* OK, now commit destination to socket.  */
 252        sk->sk_gso_type = SKB_GSO_TCPV4;
 253        sk_setup_caps(sk, &rt->dst);
 254
 255        if (!tp->write_seq)
 256                tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
 257                                                           inet->inet_daddr,
 258                                                           inet->inet_sport,
 259                                                           usin->sin_port);
 260
 261        inet->inet_id = tp->write_seq ^ jiffies;
 262
 263        err = tcp_connect(sk);
 264        rt = NULL;
 265        if (err)
 266                goto failure;
 267
 268        return 0;
 269
 270failure:
 271        /*
 272         * This unhashes the socket and releases the local port,
 273         * if necessary.
 274         */
 275        tcp_set_state(sk, TCP_CLOSE);
 276        ip_rt_put(rt);
 277        sk->sk_route_caps = 0;
 278        inet->inet_dport = 0;
 279        return err;
 280}
 281EXPORT_SYMBOL(tcp_v4_connect);
 282
 283/*
 284 * This routine does path mtu discovery as defined in RFC1191.
 285 */
 286static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
 287{
 288        struct dst_entry *dst;
 289        struct inet_sock *inet = inet_sk(sk);
 290
 291        /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
 292         * send out by Linux are always <576bytes so they should go through
 293         * unfragmented).
 294         */
 295        if (sk->sk_state == TCP_LISTEN)
 296                return;
 297
 298        /* We don't check in the destentry if pmtu discovery is forbidden
 299         * on this route. We just assume that no packet_to_big packets
 300         * are send back when pmtu discovery is not active.
 301         * There is a small race when the user changes this flag in the
 302         * route, but I think that's acceptable.
 303         */
 304        if ((dst = __sk_dst_check(sk, 0)) == NULL)
 305                return;
 306
 307        dst->ops->update_pmtu(dst, mtu);
 308
 309        /* Something is about to be wrong... Remember soft error
 310         * for the case, if this connection will not able to recover.
 311         */
 312        if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
 313                sk->sk_err_soft = EMSGSIZE;
 314
 315        mtu = dst_mtu(dst);
 316
 317        if (inet->pmtudisc != IP_PMTUDISC_DONT &&
 318            inet_csk(sk)->icsk_pmtu_cookie > mtu) {
 319                tcp_sync_mss(sk, mtu);
 320
 321                /* Resend the TCP packet because it's
 322                 * clear that the old packet has been
 323                 * dropped. This is the new "fast" path mtu
 324                 * discovery.
 325                 */
 326                tcp_simple_retransmit(sk);
 327        } /* else let the usual retransmit timer handle it */
 328}
 329
 330/*
 331 * This routine is called by the ICMP module when it gets some
 332 * sort of error condition.  If err < 0 then the socket should
 333 * be closed and the error returned to the user.  If err > 0
 334 * it's just the icmp type << 8 | icmp code.  After adjustment
 335 * header points to the first 8 bytes of the tcp header.  We need
 336 * to find the appropriate port.
 337 *
 338 * The locking strategy used here is very "optimistic". When
 339 * someone else accesses the socket the ICMP is just dropped
 340 * and for some paths there is no check at all.
 341 * A more general error queue to queue errors for later handling
 342 * is probably better.
 343 *
 344 */
 345
 346void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 347{
 348        const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
 349        struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
 350        struct inet_connection_sock *icsk;
 351        struct tcp_sock *tp;
 352        struct inet_sock *inet;
 353        const int type = icmp_hdr(icmp_skb)->type;
 354        const int code = icmp_hdr(icmp_skb)->code;
 355        struct sock *sk;
 356        struct sk_buff *skb;
 357        __u32 seq;
 358        __u32 remaining;
 359        int err;
 360        struct net *net = dev_net(icmp_skb->dev);
 361
 362        if (icmp_skb->len < (iph->ihl << 2) + 8) {
 363                ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 364                return;
 365        }
 366
 367        sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
 368                        iph->saddr, th->source, inet_iif(icmp_skb));
 369        if (!sk) {
 370                ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 371                return;
 372        }
 373        if (sk->sk_state == TCP_TIME_WAIT) {
 374                inet_twsk_put(inet_twsk(sk));
 375                return;
 376        }
 377
 378        bh_lock_sock(sk);
 379        /* If too many ICMPs get dropped on busy
 380         * servers this needs to be solved differently.
 381         */
 382        if (sock_owned_by_user(sk))
 383                NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
 384
 385        if (sk->sk_state == TCP_CLOSE)
 386                goto out;
 387
 388        if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
 389                NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
 390                goto out;
 391        }
 392
 393        icsk = inet_csk(sk);
 394        tp = tcp_sk(sk);
 395        seq = ntohl(th->seq);
 396        if (sk->sk_state != TCP_LISTEN &&
 397            !between(seq, tp->snd_una, tp->snd_nxt)) {
 398                NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 399                goto out;
 400        }
 401
 402        switch (type) {
 403        case ICMP_SOURCE_QUENCH:
 404                /* Just silently ignore these. */
 405                goto out;
 406        case ICMP_PARAMETERPROB:
 407                err = EPROTO;
 408                break;
 409        case ICMP_DEST_UNREACH:
 410                if (code > NR_ICMP_UNREACH)
 411                        goto out;
 412
 413                if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
 414                        if (!sock_owned_by_user(sk))
 415                                do_pmtu_discovery(sk, iph, info);
 416                        goto out;
 417                }
 418
 419                err = icmp_err_convert[code].errno;
 420                /* check if icmp_skb allows revert of backoff
 421                 * (see draft-zimmermann-tcp-lcd) */
 422                if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
 423                        break;
 424                if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
 425                    !icsk->icsk_backoff)
 426                        break;
 427
 428                if (sock_owned_by_user(sk))
 429                        break;
 430
 431                icsk->icsk_backoff--;
 432                inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
 433                                         icsk->icsk_backoff;
 434                tcp_bound_rto(sk);
 435
 436                skb = tcp_write_queue_head(sk);
 437                BUG_ON(!skb);
 438
 439                remaining = icsk->icsk_rto - min(icsk->icsk_rto,
 440                                tcp_time_stamp - TCP_SKB_CB(skb)->when);
 441
 442                if (remaining) {
 443                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 444                                                  remaining, TCP_RTO_MAX);
 445                } else {
 446                        /* RTO revert clocked out retransmission.
 447                         * Will retransmit now */
 448                        tcp_retransmit_timer(sk);
 449                }
 450
 451                break;
 452        case ICMP_TIME_EXCEEDED:
 453                err = EHOSTUNREACH;
 454                break;
 455        default:
 456                goto out;
 457        }
 458
 459        switch (sk->sk_state) {
 460                struct request_sock *req, **prev;
 461        case TCP_LISTEN:
 462                if (sock_owned_by_user(sk))
 463                        goto out;
 464
 465                req = inet_csk_search_req(sk, &prev, th->dest,
 466                                          iph->daddr, iph->saddr);
 467                if (!req)
 468                        goto out;
 469
 470                /* ICMPs are not backlogged, hence we cannot get
 471                   an established socket here.
 472                 */
 473                WARN_ON(req->sk);
 474
 475                if (seq != tcp_rsk(req)->snt_isn) {
 476                        NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 477                        goto out;
 478                }
 479
 480                /*
 481                 * Still in SYN_RECV, just remove it silently.
 482                 * There is no good way to pass the error to the newly
 483                 * created socket, and POSIX does not want network
 484                 * errors returned from accept().
 485                 */
 486                inet_csk_reqsk_queue_drop(sk, req, prev);
 487                goto out;
 488
 489        case TCP_SYN_SENT:
 490        case TCP_SYN_RECV:  /* Cannot happen.
 491                               It can f.e. if SYNs crossed.
 492                             */
 493                if (!sock_owned_by_user(sk)) {
 494                        sk->sk_err = err;
 495
 496                        sk->sk_error_report(sk);
 497
 498                        tcp_done(sk);
 499                } else {
 500                        sk->sk_err_soft = err;
 501                }
 502                goto out;
 503        }
 504
 505        /* If we've already connected we will keep trying
 506         * until we time out, or the user gives up.
 507         *
 508         * rfc1122 4.2.3.9 allows to consider as hard errors
 509         * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
 510         * but it is obsoleted by pmtu discovery).
 511         *
 512         * Note, that in modern internet, where routing is unreliable
 513         * and in each dark corner broken firewalls sit, sending random
 514         * errors ordered by their masters even this two messages finally lose
 515         * their original sense (even Linux sends invalid PORT_UNREACHs)
 516         *
 517         * Now we are in compliance with RFCs.
 518         *                                                      --ANK (980905)
 519         */
 520
 521        inet = inet_sk(sk);
 522        if (!sock_owned_by_user(sk) && inet->recverr) {
 523                sk->sk_err = err;
 524                sk->sk_error_report(sk);
 525        } else  { /* Only an error on timeout */
 526                sk->sk_err_soft = err;
 527        }
 528
 529out:
 530        bh_unlock_sock(sk);
 531        sock_put(sk);
 532}
 533
 534static void __tcp_v4_send_check(struct sk_buff *skb,
 535                                __be32 saddr, __be32 daddr)
 536{
 537        struct tcphdr *th = tcp_hdr(skb);
 538
 539        if (skb->ip_summed == CHECKSUM_PARTIAL) {
 540                th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
 541                skb->csum_start = skb_transport_header(skb) - skb->head;
 542                skb->csum_offset = offsetof(struct tcphdr, check);
 543        } else {
 544                th->check = tcp_v4_check(skb->len, saddr, daddr,
 545                                         csum_partial(th,
 546                                                      th->doff << 2,
 547                                                      skb->csum));
 548        }
 549}
 550
 551/* This routine computes an IPv4 TCP checksum. */
 552void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
 553{
 554        struct inet_sock *inet = inet_sk(sk);
 555
 556        __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
 557}
 558EXPORT_SYMBOL(tcp_v4_send_check);
 559
 560int tcp_v4_gso_send_check(struct sk_buff *skb)
 561{
 562        const struct iphdr *iph;
 563        struct tcphdr *th;
 564
 565        if (!pskb_may_pull(skb, sizeof(*th)))
 566                return -EINVAL;
 567
 568        iph = ip_hdr(skb);
 569        th = tcp_hdr(skb);
 570
 571        th->check = 0;
 572        skb->ip_summed = CHECKSUM_PARTIAL;
 573        __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
 574        return 0;
 575}
 576
 577/*
 578 *      This routine will send an RST to the other tcp.
 579 *
 580 *      Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
 581 *                    for reset.
 582 *      Answer: if a packet caused RST, it is not for a socket
 583 *              existing in our system, if it is matched to a socket,
 584 *              it is just duplicate segment or bug in other side's TCP.
 585 *              So that we build reply only basing on parameters
 586 *              arrived with segment.
 587 *      Exception: precedence violation. We do not implement it in any case.
 588 */
 589
 590static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 591{
 592        struct tcphdr *th = tcp_hdr(skb);
 593        struct {
 594                struct tcphdr th;
 595#ifdef CONFIG_TCP_MD5SIG
 596                __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
 597#endif
 598        } rep;
 599        struct ip_reply_arg arg;
 600#ifdef CONFIG_TCP_MD5SIG
 601        struct tcp_md5sig_key *key;
 602#endif
 603        struct net *net;
 604
 605        /* Never send a reset in response to a reset. */
 606        if (th->rst)
 607                return;
 608
 609        if (skb_rtable(skb)->rt_type != RTN_LOCAL)
 610                return;
 611
 612        /* Swap the send and the receive. */
 613        memset(&rep, 0, sizeof(rep));
 614        rep.th.dest   = th->source;
 615        rep.th.source = th->dest;
 616        rep.th.doff   = sizeof(struct tcphdr) / 4;
 617        rep.th.rst    = 1;
 618
 619        if (th->ack) {
 620                rep.th.seq = th->ack_seq;
 621        } else {
 622                rep.th.ack = 1;
 623                rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
 624                                       skb->len - (th->doff << 2));
 625        }
 626
 627        memset(&arg, 0, sizeof(arg));
 628        arg.iov[0].iov_base = (unsigned char *)&rep;
 629        arg.iov[0].iov_len  = sizeof(rep.th);
 630
 631#ifdef CONFIG_TCP_MD5SIG
 632        key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
 633        if (key) {
 634                rep.opt[0] = htonl((TCPOPT_NOP << 24) |
 635                                   (TCPOPT_NOP << 16) |
 636                                   (TCPOPT_MD5SIG << 8) |
 637                                   TCPOLEN_MD5SIG);
 638                /* Update length and the length the header thinks exists */
 639                arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 640                rep.th.doff = arg.iov[0].iov_len / 4;
 641
 642                tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
 643                                     key, ip_hdr(skb)->saddr,
 644                                     ip_hdr(skb)->daddr, &rep.th);
 645        }
 646#endif
 647        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 648                                      ip_hdr(skb)->saddr, /* XXX */
 649                                      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 650        arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 651        arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
 652
 653        net = dev_net(skb_dst(skb)->dev);
 654        ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
 655                      &arg, arg.iov[0].iov_len);
 656
 657        TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 658        TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
 659}
 660
 661/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
 662   outside socket context is ugly, certainly. What can I do?
 663 */
 664
 665static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 666                            u32 win, u32 ts, int oif,
 667                            struct tcp_md5sig_key *key,
 668                            int reply_flags)
 669{
 670        struct tcphdr *th = tcp_hdr(skb);
 671        struct {
 672                struct tcphdr th;
 673                __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
 674#ifdef CONFIG_TCP_MD5SIG
 675                           + (TCPOLEN_MD5SIG_ALIGNED >> 2)
 676#endif
 677                        ];
 678        } rep;
 679        struct ip_reply_arg arg;
 680        struct net *net = dev_net(skb_dst(skb)->dev);
 681
 682        memset(&rep.th, 0, sizeof(struct tcphdr));
 683        memset(&arg, 0, sizeof(arg));
 684
 685        arg.iov[0].iov_base = (unsigned char *)&rep;
 686        arg.iov[0].iov_len  = sizeof(rep.th);
 687        if (ts) {
 688                rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 689                                   (TCPOPT_TIMESTAMP << 8) |
 690                                   TCPOLEN_TIMESTAMP);
 691                rep.opt[1] = htonl(tcp_time_stamp);
 692                rep.opt[2] = htonl(ts);
 693                arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
 694        }
 695
 696        /* Swap the send and the receive. */
 697        rep.th.dest    = th->source;
 698        rep.th.source  = th->dest;
 699        rep.th.doff    = arg.iov[0].iov_len / 4;
 700        rep.th.seq     = htonl(seq);
 701        rep.th.ack_seq = htonl(ack);
 702        rep.th.ack     = 1;
 703        rep.th.window  = htons(win);
 704
 705#ifdef CONFIG_TCP_MD5SIG
 706        if (key) {
 707                int offset = (ts) ? 3 : 0;
 708
 709                rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
 710                                          (TCPOPT_NOP << 16) |
 711                                          (TCPOPT_MD5SIG << 8) |
 712                                          TCPOLEN_MD5SIG);
 713                arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 714                rep.th.doff = arg.iov[0].iov_len/4;
 715
 716                tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
 717                                    key, ip_hdr(skb)->saddr,
 718                                    ip_hdr(skb)->daddr, &rep.th);
 719        }
 720#endif
 721        arg.flags = reply_flags;
 722        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 723                                      ip_hdr(skb)->saddr, /* XXX */
 724                                      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 725        arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 726        if (oif)
 727                arg.bound_dev_if = oif;
 728
 729        ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
 730                      &arg, arg.iov[0].iov_len);
 731
 732        TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 733}
 734
 735static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 736{
 737        struct inet_timewait_sock *tw = inet_twsk(sk);
 738        struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 739
 740        tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 741                        tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
 742                        tcptw->tw_ts_recent,
 743                        tw->tw_bound_dev_if,
 744                        tcp_twsk_md5_key(tcptw),
 745                        tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
 746                        );
 747
 748        inet_twsk_put(tw);
 749}
 750
 751static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 752                                  struct request_sock *req)
 753{
 754        tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
 755                        tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
 756                        req->ts_recent,
 757                        0,
 758                        tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
 759                        inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
 760}
 761
 762/*
 763 *      Send a SYN-ACK after having received a SYN.
 764 *      This still operates on a request_sock only, not on a big
 765 *      socket.
 766 */
 767static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 768                              struct request_sock *req,
 769                              struct request_values *rvp)
 770{
 771        const struct inet_request_sock *ireq = inet_rsk(req);
 772        struct flowi4 fl4;
 773        int err = -1;
 774        struct sk_buff * skb;
 775
 776        /* First, grab a route. */
 777        if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
 778                return -1;
 779
 780        skb = tcp_make_synack(sk, dst, req, rvp);
 781
 782        if (skb) {
 783                __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
 784
 785                err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
 786                                            ireq->rmt_addr,
 787                                            ireq->opt);
 788                err = net_xmit_eval(err);
 789        }
 790
 791        dst_release(dst);
 792        return err;
 793}
 794
 795static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
 796                              struct request_values *rvp)
 797{
 798        TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
 799        return tcp_v4_send_synack(sk, NULL, req, rvp);
 800}
 801
 802/*
 803 *      IPv4 request_sock destructor.
 804 */
 805static void tcp_v4_reqsk_destructor(struct request_sock *req)
 806{
 807        kfree(inet_rsk(req)->opt);
 808}
 809
 810static void syn_flood_warning(const struct sk_buff *skb)
 811{
 812        const char *msg;
 813
 814#ifdef CONFIG_SYN_COOKIES
 815        if (sysctl_tcp_syncookies)
 816                msg = "Sending cookies";
 817        else
 818#endif
 819                msg = "Dropping request";
 820
 821        pr_info("TCP: Possible SYN flooding on port %d. %s.\n",
 822                                ntohs(tcp_hdr(skb)->dest), msg);
 823}
 824
 825/*
 826 * Save and compile IPv4 options into the request_sock if needed.
 827 */
 828static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
 829                                                  struct sk_buff *skb)
 830{
 831        const struct ip_options *opt = &(IPCB(skb)->opt);
 832        struct ip_options_rcu *dopt = NULL;
 833
 834        if (opt && opt->optlen) {
 835                int opt_size = sizeof(*dopt) + opt->optlen;
 836
 837                dopt = kmalloc(opt_size, GFP_ATOMIC);
 838                if (dopt) {
 839                        if (ip_options_echo(&dopt->opt, skb)) {
 840                                kfree(dopt);
 841                                dopt = NULL;
 842                        }
 843                }
 844        }
 845        return dopt;
 846}
 847
 848#ifdef CONFIG_TCP_MD5SIG
 849/*
 850 * RFC2385 MD5 checksumming requires a mapping of
 851 * IP address->MD5 Key.
 852 * We need to maintain these in the sk structure.
 853 */
 854
 855/* Find the Key structure for an address.  */
 856static struct tcp_md5sig_key *
 857                        tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
 858{
 859        struct tcp_sock *tp = tcp_sk(sk);
 860        int i;
 861
 862        if (!tp->md5sig_info || !tp->md5sig_info->entries4)
 863                return NULL;
 864        for (i = 0; i < tp->md5sig_info->entries4; i++) {
 865                if (tp->md5sig_info->keys4[i].addr == addr)
 866                        return &tp->md5sig_info->keys4[i].base;
 867        }
 868        return NULL;
 869}
 870
 871struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
 872                                         struct sock *addr_sk)
 873{
 874        return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
 875}
 876EXPORT_SYMBOL(tcp_v4_md5_lookup);
 877
 878static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
 879                                                      struct request_sock *req)
 880{
 881        return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
 882}
 883
 884/* This can be called on a newly created socket, from other files */
 885int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
 886                      u8 *newkey, u8 newkeylen)
 887{
 888        /* Add Key to the list */
 889        struct tcp_md5sig_key *key;
 890        struct tcp_sock *tp = tcp_sk(sk);
 891        struct tcp4_md5sig_key *keys;
 892
 893        key = tcp_v4_md5_do_lookup(sk, addr);
 894        if (key) {
 895                /* Pre-existing entry - just update that one. */
 896                kfree(key->key);
 897                key->key = newkey;
 898                key->keylen = newkeylen;
 899        } else {
 900                struct tcp_md5sig_info *md5sig;
 901
 902                if (!tp->md5sig_info) {
 903                        tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
 904                                                  GFP_ATOMIC);
 905                        if (!tp->md5sig_info) {
 906                                kfree(newkey);
 907                                return -ENOMEM;
 908                        }
 909                        sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 910                }
 911                if (tcp_alloc_md5sig_pool(sk) == NULL) {
 912                        kfree(newkey);
 913                        return -ENOMEM;
 914                }
 915                md5sig = tp->md5sig_info;
 916
 917                if (md5sig->alloced4 == md5sig->entries4) {
 918                        keys = kmalloc((sizeof(*keys) *
 919                                        (md5sig->entries4 + 1)), GFP_ATOMIC);
 920                        if (!keys) {
 921                                kfree(newkey);
 922                                tcp_free_md5sig_pool();
 923                                return -ENOMEM;
 924                        }
 925
 926                        if (md5sig->entries4)
 927                                memcpy(keys, md5sig->keys4,
 928                                       sizeof(*keys) * md5sig->entries4);
 929
 930                        /* Free old key list, and reference new one */
 931                        kfree(md5sig->keys4);
 932                        md5sig->keys4 = keys;
 933                        md5sig->alloced4++;
 934                }
 935                md5sig->entries4++;
 936                md5sig->keys4[md5sig->entries4 - 1].addr        = addr;
 937                md5sig->keys4[md5sig->entries4 - 1].base.key    = newkey;
 938                md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
 939        }
 940        return 0;
 941}
 942EXPORT_SYMBOL(tcp_v4_md5_do_add);
 943
 944static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
 945                               u8 *newkey, u8 newkeylen)
 946{
 947        return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,
 948                                 newkey, newkeylen);
 949}
 950
 951int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
 952{
 953        struct tcp_sock *tp = tcp_sk(sk);
 954        int i;
 955
 956        for (i = 0; i < tp->md5sig_info->entries4; i++) {
 957                if (tp->md5sig_info->keys4[i].addr == addr) {
 958                        /* Free the key */
 959                        kfree(tp->md5sig_info->keys4[i].base.key);
 960                        tp->md5sig_info->entries4--;
 961
 962                        if (tp->md5sig_info->entries4 == 0) {
 963                                kfree(tp->md5sig_info->keys4);
 964                                tp->md5sig_info->keys4 = NULL;
 965                                tp->md5sig_info->alloced4 = 0;
 966                        } else if (tp->md5sig_info->entries4 != i) {
 967                                /* Need to do some manipulation */
 968                                memmove(&tp->md5sig_info->keys4[i],
 969                                        &tp->md5sig_info->keys4[i+1],
 970                                        (tp->md5sig_info->entries4 - i) *
 971                                         sizeof(struct tcp4_md5sig_key));
 972                        }
 973                        tcp_free_md5sig_pool();
 974                        return 0;
 975                }
 976        }
 977        return -ENOENT;
 978}
 979EXPORT_SYMBOL(tcp_v4_md5_do_del);
 980
 981static void tcp_v4_clear_md5_list(struct sock *sk)
 982{
 983        struct tcp_sock *tp = tcp_sk(sk);
 984
 985        /* Free each key, then the set of key keys,
 986         * the crypto element, and then decrement our
 987         * hold on the last resort crypto.
 988         */
 989        if (tp->md5sig_info->entries4) {
 990                int i;
 991                for (i = 0; i < tp->md5sig_info->entries4; i++)
 992                        kfree(tp->md5sig_info->keys4[i].base.key);
 993                tp->md5sig_info->entries4 = 0;
 994                tcp_free_md5sig_pool();
 995        }
 996        if (tp->md5sig_info->keys4) {
 997                kfree(tp->md5sig_info->keys4);
 998                tp->md5sig_info->keys4 = NULL;
 999                tp->md5sig_info->alloced4  = 0;
1000        }

1001}
1002
1003static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1004                                 int optlen)
1005{
1006        struct tcp_md5sig cmd;
1007        struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1008        u8 *newkey;
1009
1010        if (optlen < sizeof(cmd))
1011                return -EINVAL;
1012
1013        if (copy_from_user(&cmd, optval, sizeof(cmd)))
1014                return -EFAULT;
1015
1016        if (sin->sin_family != AF_INET)
1017                return -EINVAL;
1018
1019        if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
1020                if (!tcp_sk(sk)->md5sig_info)
1021                        return -ENOENT;
1022                return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
1023        }
1024
1025        if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1026                return -EINVAL;
1027
1028        if (!tcp_sk(sk)->md5sig_info) {
1029                struct tcp_sock *tp = tcp_sk(sk);
1030                struct tcp_md5sig_info *p;
1031
1032                p = kzalloc(sizeof(*p), sk->sk_allocation);
1033                if (!p)
1034                        return -EINVAL;
1035
1036                tp->md5sig_info = p;
1037                sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1038        }
1039
1040        newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
1041        if (!newkey)
1042                return -ENOMEM;
1043        return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1044                                 newkey, cmd.tcpm_keylen);
1045}
1046
1047static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1048                                        __be32 daddr, __be32 saddr, int nbytes)
1049{
1050        struct tcp4_pseudohdr *bp;
1051        struct scatterlist sg;
1052
1053        bp = &hp->md5_blk.ip4;
1054
1055        /*
1056         * 1. the TCP pseudo-header (in the order: source IP address,
1057         * destination IP address, zero-padded protocol number, and
1058         * segment length)
1059         */
1060        bp->saddr = saddr;
1061        bp->daddr = daddr;
1062        bp->pad = 0;
1063        bp->protocol = IPPROTO_TCP;
1064        bp->len = cpu_to_be16(nbytes);
1065
1066        sg_init_one(&sg, bp, sizeof(*bp));
1067        return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1068}
1069
1070static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1071                               __be32 daddr, __be32 saddr, struct tcphdr *th)
1072{
1073        struct tcp_md5sig_pool *hp;
1074        struct hash_desc *desc;
1075
1076        hp = tcp_get_md5sig_pool();
1077        if (!hp)
1078                goto clear_hash_noput;
1079        desc = &hp->md5_desc;
1080
1081        if (crypto_hash_init(desc))
1082                goto clear_hash;
1083        if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1084                goto clear_hash;
1085        if (tcp_md5_hash_header(hp, th))
1086                goto clear_hash;
1087        if (tcp_md5_hash_key(hp, key))
1088                goto clear_hash;
1089        if (crypto_hash_final(desc, md5_hash))
1090                goto clear_hash;
1091
1092        tcp_put_md5sig_pool();
1093        return 0;
1094
1095clear_hash:
1096        tcp_put_md5sig_pool();
1097clear_hash_noput:
1098        memset(md5_hash, 0, 16);
1099        return 1;
1100}
1101
1102int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1103                        struct sock *sk, struct request_sock *req,
1104                        struct sk_buff *skb)
1105{
1106        struct tcp_md5sig_pool *hp;
1107        struct hash_desc *desc;
1108        struct tcphdr *th = tcp_hdr(skb);
1109        __be32 saddr, daddr;
1110
1111        if (sk) {
1112                saddr = inet_sk(sk)->inet_saddr;
1113                daddr = inet_sk(sk)->inet_daddr;
1114        } else if (req) {
1115                saddr = inet_rsk(req)->loc_addr;
1116                daddr = inet_rsk(req)->rmt_addr;
1117        } else {
1118                const struct iphdr *iph = ip_hdr(skb);
1119                saddr = iph->saddr;
1120                daddr = iph->daddr;
1121        }
1122
1123        hp = tcp_get_md5sig_pool();
1124        if (!hp)
1125                goto clear_hash_noput;
1126        desc = &hp->md5_desc;
1127
1128        if (crypto_hash_init(desc))
1129                goto clear_hash;
1130
1131        if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1132                goto clear_hash;
1133        if (tcp_md5_hash_header(hp, th))
1134                goto clear_hash;
1135        if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1136                goto clear_hash;
1137        if (tcp_md5_hash_key(hp, key))
1138                goto clear_hash;
1139        if (crypto_hash_final(desc, md5_hash))
1140                goto clear_hash;
1141
1142        tcp_put_md5sig_pool();
1143        return 0;
1144
1145clear_hash:
1146        tcp_put_md5sig_pool();
1147clear_hash_noput:
1148        memset(md5_hash, 0, 16);
1149        return 1;
1150}
1151EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1152
1153static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
1154{
1155        /*
1156         * This gets called for each TCP segment that arrives
1157         * so we want to be efficient.
1158         * We have 3 drop cases:
1159         * o No MD5 hash and one expected.
1160         * o MD5 hash and we're not expecting one.
1161         * o MD5 hash and its wrong.
1162         */
1163        __u8 *hash_location = NULL;
1164        struct tcp_md5sig_key *hash_expected;
1165        const struct iphdr *iph = ip_hdr(skb);
1166        struct tcphdr *th = tcp_hdr(skb);
1167        int genhash;
1168        unsigned char newhash[16];
1169
1170        hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1171        hash_location = tcp_parse_md5sig_option(th);
1172
1173        /* We've parsed the options - do we have a hash? */
1174        if (!hash_expected && !hash_location)
1175                return 0;
1176
1177        if (hash_expected && !hash_location) {
1178                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1179                return 1;
1180        }
1181
1182        if (!hash_expected && hash_location) {
1183                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1184                return 1;
1185        }
1186
1187        /* Okay, so this is hash_expected and hash_location -
1188         * so we need to calculate the checksum.
1189         */
1190        genhash = tcp_v4_md5_hash_skb(newhash,
1191                                      hash_expected,
1192                                      NULL, NULL, skb);
1193
1194        if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1195                if (net_ratelimit()) {
1196                        printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1197                               &iph->saddr, ntohs(th->source),
1198                               &iph->daddr, ntohs(th->dest),
1199                               genhash ? " tcp_v4_calc_md5_hash failed" : "");
1200                }
1201                return 1;
1202        }
1203        return 0;
1204}
1205
1206#endif
1207
1208struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1209        .family         =       PF_INET,
1210        .obj_size       =       sizeof(struct tcp_request_sock),
1211        .rtx_syn_ack    =       tcp_v4_rtx_synack,
1212        .send_ack       =       tcp_v4_reqsk_send_ack,
1213        .destructor     =       tcp_v4_reqsk_destructor,
1214        .send_reset     =       tcp_v4_send_reset,
1215        .syn_ack_timeout =      tcp_syn_ack_timeout,
1216};
1217
1218#ifdef CONFIG_TCP_MD5SIG
1219static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1220        .md5_lookup     =       tcp_v4_reqsk_md5_lookup,
1221        .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1222};
1223#endif
1224
1225int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1226{
1227        struct tcp_extend_values tmp_ext;
1228        struct tcp_options_received tmp_opt;
1229        u8 *hash_location;
1230        struct request_sock *req;
1231        struct inet_request_sock *ireq;
1232        struct tcp_sock *tp = tcp_sk(sk);
1233        struct dst_entry *dst = NULL;
1234        __be32 saddr = ip_hdr(skb)->saddr;
1235        __be32 daddr = ip_hdr(skb)->daddr;
1236        __u32 isn = TCP_SKB_CB(skb)->when;
1237#ifdef CONFIG_SYN_COOKIES
1238        int want_cookie = 0;
1239#else
1240#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1241#endif
1242
1243        /* Never answer to SYNs send to broadcast or multicast */
1244        if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1245                goto drop;
1246
1247        /* TW buckets are converted to open requests without
1248         * limitations, they conserve resources and peer is
1249         * evidently real one.
1250         */
1251        if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1252                if (net_ratelimit())
1253                        syn_flood_warning(skb);
1254#ifdef CONFIG_SYN_COOKIES
1255                if (sysctl_tcp_syncookies) {
1256                        want_cookie = 1;
1257                } else
1258#endif
1259                goto drop;
1260        }
1261
1262        /* Accept backlog is full. If we have already queued enough
1263         * of warm entries in syn queue, drop request. It is better than
1264         * clogging syn queue with openreqs with exponentially increasing
1265         * timeout.
1266         */
1267        if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1268                goto drop;
1269
1270        req = inet_reqsk_alloc(&tcp_request_sock_ops);
1271        if (!req)
1272                goto drop;
1273
1274#ifdef CONFIG_TCP_MD5SIG
1275        tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1276#endif
1277
1278        tcp_clear_options(&tmp_opt);
1279        tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1280        tmp_opt.user_mss  = tp->rx_opt.user_mss;
1281        tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
1282
1283        if (tmp_opt.cookie_plus > 0 &&
1284            tmp_opt.saw_tstamp &&
1285            !tp->rx_opt.cookie_out_never &&
1286            (sysctl_tcp_cookie_size > 0 ||
1287             (tp->cookie_values != NULL &&
1288              tp->cookie_values->cookie_desired > 0))) {
1289                u8 *c;
1290                u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1291                int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1292
1293                if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1294                        goto drop_and_release;
1295
1296                /* Secret recipe starts with IP addresses */
1297                *mess++ ^= (__force u32)daddr;
1298                *mess++ ^= (__force u32)saddr;
1299
1300                /* plus variable length Initiator Cookie */
1301                c = (u8 *)mess;
1302                while (l-- > 0)
1303                        *c++ ^= *hash_location++;
1304
1305#ifdef CONFIG_SYN_COOKIES
1306                want_cookie = 0;        /* not our kind of cookie */
1307#endif
1308                tmp_ext.cookie_out_never = 0; /* false */
1309                tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1310        } else if (!tp->rx_opt.cookie_in_always) {
1311                /* redundant indications, but ensure initialization. */
1312                tmp_ext.cookie_out_never = 1; /* true */
1313                tmp_ext.cookie_plus = 0;
1314        } else {
1315                goto drop_and_release;
1316        }
1317        tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1318
1319        if (want_cookie && !tmp_opt.saw_tstamp)
1320                tcp_clear_options(&tmp_opt);
1321
1322        tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1323        tcp_openreq_init(req, &tmp_opt, skb);
1324
1325        ireq = inet_rsk(req);
1326        ireq->loc_addr = daddr;
1327        ireq->rmt_addr = saddr;
1328        ireq->no_srccheck = inet_sk(sk)->transparent;
1329        ireq->opt = tcp_v4_save_options(sk, skb);
1330
1331        if (security_inet_conn_request(sk, skb, req))
1332                goto drop_and_free;
1333
1334        if (!want_cookie || tmp_opt.tstamp_ok)
1335                TCP_ECN_create_request(req, tcp_hdr(skb));
1336
1337        if (want_cookie) {
1338                isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1339                req->cookie_ts = tmp_opt.tstamp_ok;
1340        } else if (!isn) {
1341                struct inet_peer *peer = NULL;
1342                struct flowi4 fl4;
1343
1344                /* VJ's idea. We save last timestamp seen
1345                 * from the destination in peer table, when entering
1346                 * state TIME-WAIT, and check against it before
1347                 * accepting new connection request.
1348                 *
1349                 * If "isn" is not zero, this request hit alive
1350                 * timewait bucket, so that all the necessary checks
1351                 * are made in the function processing timewait state.
1352                 */
1353                if (tmp_opt.saw_tstamp &&
1354                    tcp_death_row.sysctl_tw_recycle &&
1355                    (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1356                    fl4.daddr == saddr &&
1357                    (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
1358                        inet_peer_refcheck(peer);
1359                        if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1360                            (s32)(peer->tcp_ts - req->ts_recent) >
1361                                                        TCP_PAWS_WINDOW) {
1362                                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1363                                goto drop_and_release;
1364                        }
1365                }
1366                /* Kill the following clause, if you dislike this way. */
1367                else if (!sysctl_tcp_syncookies &&
1368                         (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1369                          (sysctl_max_syn_backlog >> 2)) &&
1370                         (!peer || !peer->tcp_ts_stamp) &&
1371                         (!dst || !dst_metric(dst, RTAX_RTT))) {
1372                        /* Without syncookies last quarter of
1373                         * backlog is filled with destinations,
1374                         * proven to be alive.
1375                         * It means that we continue to communicate
1376                         * to destinations, already remembered
1377                         * to the moment of synflood.
1378                         */
1379                        LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1380                                       &saddr, ntohs(tcp_hdr(skb)->source));
1381                        goto drop_and_release;
1382                }
1383
1384                isn = tcp_v4_init_sequence(skb);
1385        }
1386        tcp_rsk(req)->snt_isn = isn;
1387
1388        if (tcp_v4_send_synack(sk, dst, req,
1389                               (struct request_values *)&tmp_ext) ||
1390            want_cookie)
1391                goto drop_and_free;
1392
1393        inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1394        return 0;
1395
1396drop_and_release:
1397        dst_release(dst);
1398drop_and_free:
1399        reqsk_free(req);
1400drop:
1401        return 0;
1402}
1403EXPORT_SYMBOL(tcp_v4_conn_request);
1404
1405
1406/*
1407 * The three way handshake has completed - we got a valid synack -
1408 * now create the new socket.
1409 */
1410struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1411                                  struct request_sock *req,
1412                                  struct dst_entry *dst)
1413{
1414        struct inet_request_sock *ireq;
1415        struct inet_sock *newinet;
1416        struct tcp_sock *newtp;
1417        struct sock *newsk;
1418#ifdef CONFIG_TCP_MD5SIG
1419        struct tcp_md5sig_key *key;
1420#endif
1421        struct ip_options_rcu *inet_opt;
1422
1423        if (sk_acceptq_is_full(sk))
1424                goto exit_overflow;
1425
1426        newsk = tcp_create_openreq_child(sk, req, skb);
1427        if (!newsk)
1428                goto exit_nonewsk;
1429
1430        newsk->sk_gso_type = SKB_GSO_TCPV4;
1431
1432        newtp                 = tcp_sk(newsk);
1433        newinet               = inet_sk(newsk);
1434        ireq                  = inet_rsk(req);
1435        newinet->inet_daddr   = ireq->rmt_addr;
1436        newinet->inet_rcv_saddr = ireq->loc_addr;
1437        newinet->inet_saddr           = ireq->loc_addr;
1438        inet_opt              = ireq->opt;
1439        rcu_assign_pointer(newinet->inet_opt, inet_opt);
1440        ireq->opt             = NULL;
1441        newinet->mc_index     = inet_iif(skb);
1442        newinet->mc_ttl       = ip_hdr(skb)->ttl;
1443        inet_csk(newsk)->icsk_ext_hdr_len = 0;
1444        if (inet_opt)
1445                inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1446        newinet->inet_id = newtp->write_seq ^ jiffies;
1447
1448        if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
1449                goto put_and_exit;
1450
1451        sk_setup_caps(newsk, dst);
1452
1453        tcp_mtup_init(newsk);
1454        tcp_sync_mss(newsk, dst_mtu(dst));
1455        newtp->advmss = dst_metric_advmss(dst);
1456        if (tcp_sk(sk)->rx_opt.user_mss &&
1457            tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1458                newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1459
1460        tcp_initialize_rcv_mss(newsk);
1461
1462#ifdef CONFIG_TCP_MD5SIG
1463        /* Copy over the MD5 key from the original socket */
1464        key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
1465        if (key != NULL) {
1466                /*
1467                 * We're using one, so create a matching key
1468                 * on the newsk structure. If we fail to get
1469                 * memory, then we end up not copying the key
1470                 * across. Shucks.
1471                 */
1472                char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1473                if (newkey != NULL)
1474                        tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
1475                                          newkey, key->keylen);
1476                sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1477        }
1478#endif
1479
1480        if (__inet_inherit_port(sk, newsk) < 0)
1481                goto put_and_exit;
1482        __inet_hash_nolisten(newsk, NULL);
1483
1484        return newsk;
1485
1486exit_overflow:
1487        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1488exit_nonewsk:
1489        dst_release(dst);
1490exit:
1491        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1492        return NULL;
1493put_and_exit:
1494        sock_put(newsk);
1495        goto exit;
1496}
1497EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1498
1499static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1500{
1501        struct tcphdr *th = tcp_hdr(skb);
1502        const struct iphdr *iph = ip_hdr(skb);
1503        struct sock *nsk;
1504        struct request_sock **prev;
1505        /* Find possible connection requests. */
1506        struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1507                                                       iph->saddr, iph->daddr);
1508        if (req)
1509                return tcp_check_req(sk, skb, req, prev);
1510
1511        nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1512                        th->source, iph->daddr, th->dest, inet_iif(skb));
1513
1514        if (nsk) {
1515                if (nsk->sk_state != TCP_TIME_WAIT) {
1516                        bh_lock_sock(nsk);
1517                        return nsk;
1518                }
1519                inet_twsk_put(inet_twsk(nsk));
1520                return NULL;
1521        }
1522
1523#ifdef CONFIG_SYN_COOKIES
1524        if (!th->syn)
1525                sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1526#endif
1527        return sk;
1528}
1529
1530static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1531{
1532        const struct iphdr *iph = ip_hdr(skb);
1533
1534        if (skb->ip_summed == CHECKSUM_COMPLETE) {
1535                if (!tcp_v4_check(skb->len, iph->saddr,
1536                                  iph->daddr, skb->csum)) {
1537                        skb->ip_summed = CHECKSUM_UNNECESSARY;
1538                        return 0;
1539                }
1540        }
1541
1542        skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1543                                       skb->len, IPPROTO_TCP, 0);
1544
1545        if (skb->len <= 76) {
1546                return __skb_checksum_complete(skb);
1547        }
1548        return 0;
1549}
1550
1551
1552/* The socket must have it's spinlock held when we get
1553 * here.
1554 *
1555 * We have a potential double-lock case here, so even when
1556 * doing backlog processing we use the BH locking scheme.
1557 * This is because we cannot sleep with the original spinlock
1558 * held.
1559 */
1560int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1561{
1562        struct sock *rsk;
1563#ifdef CONFIG_TCP_MD5SIG
1564        /*
1565         * We really want to reject the packet as early as possible
1566         * if:
1567         *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1568         *  o There is an MD5 option and we're not expecting one
1569         */
1570        if (tcp_v4_inbound_md5_hash(sk, skb))
1571                goto discard;
1572#endif
1573
1574        if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1575                sock_rps_save_rxhash(sk, skb->rxhash);
1576                if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1577                        rsk = sk;
1578                        goto reset;
1579                }
1580                return 0;
1581        }
1582
1583        if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1584                goto csum_err;
1585
1586        if (sk->sk_state == TCP_LISTEN) {
1587                struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1588                if (!nsk)
1589                        goto discard;
1590
1591                if (nsk != sk) {
1592                        sock_rps_save_rxhash(nsk, skb->rxhash);
1593                        if (tcp_child_process(sk, nsk, skb)) {
1594                                rsk = nsk;
1595                                goto reset;
1596                        }
1597                        return 0;
1598                }
1599        } else
1600                sock_rps_save_rxhash(sk, skb->rxhash);
1601
1602        if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1603                rsk = sk;
1604                goto reset;
1605        }
1606        return 0;
1607
1608reset:
1609        tcp_v4_send_reset(rsk, skb);
1610discard:
1611        kfree_skb(skb);
1612        /* Be careful here. If this function gets more complicated and
1613         * gcc suffers from register pressure on the x86, sk (in %ebx)
1614         * might be destroyed here. This current version compiles correctly,
1615         * but you have been warned.
1616         */
1617        return 0;
1618
1619csum_err:
1620        TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1621        goto discard;
1622}
1623EXPORT_SYMBOL(tcp_v4_do_rcv);
1624
1625/*
1626 *      From tcp_input.c
1627 */
1628
1629int tcp_v4_rcv(struct sk_buff *skb)
1630{
1631        const struct iphdr *iph;
1632        struct tcphdr *th;
1633        struct sock *sk;
1634        int ret;
1635        struct net *net = dev_net(skb->dev);
1636
1637        if (skb->pkt_type != PACKET_HOST)
1638                goto discard_it;
1639
1640        /* Count it even if it's bad */
1641        TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1642
1643        if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1644                goto discard_it;
1645
1646        th = tcp_hdr(skb);
1647
1648        if (th->doff < sizeof(struct tcphdr) / 4)
1649                goto bad_packet;
1650        if (!pskb_may_pull(skb, th->doff * 4))
1651                goto discard_it;
1652
1653        /* An explanation is required here, I think.
1654         * Packet length and doff are validated by header prediction,
1655         * provided case of th->doff==0 is eliminated.
1656         * So, we defer the checks. */
1657        if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1658                goto bad_packet;
1659
1660        th = tcp_hdr(skb);
1661        iph = ip_hdr(skb);
1662        TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1663        TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1664                                    skb->len - th->doff * 4);
1665        TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1666        TCP_SKB_CB(skb)->when    = 0;
1667        TCP_SKB_CB(skb)->flags   = iph->tos;
1668        TCP_SKB_CB(skb)->sacked  = 0;
1669
1670        sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1671        if (!sk)
1672                goto no_tcp_socket;
1673
1674process:
1675        if (sk->sk_state == TCP_TIME_WAIT)
1676                goto do_time_wait;
1677
1678        if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1679                NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1680                goto discard_and_relse;
1681        }
1682
1683        if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1684                goto discard_and_relse;
1685        nf_reset(skb);
1686
1687        if (sk_filter(sk, skb))
1688                goto discard_and_relse;
1689
1690        skb->dev = NULL;
1691
1692        bh_lock_sock_nested(sk);
1693        ret = 0;
1694        if (!sock_owned_by_user(sk)) {
1695#ifdef CONFIG_NET_DMA
1696                struct tcp_sock *tp = tcp_sk(sk);
1697                if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1698                        tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1699                if (tp->ucopy.dma_chan)
1700                        ret = tcp_v4_do_rcv(sk, skb);
1701                else
1702#endif
1703                {
1704                        if (!tcp_prequeue(sk, skb))
1705                                ret = tcp_v4_do_rcv(sk, skb);
1706                }
1707        } else if (unlikely(sk_add_backlog(sk, skb))) {
1708                bh_unlock_sock(sk);
1709                NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1710                goto discard_and_relse;
1711        }
1712        bh_unlock_sock(sk);
1713
1714        sock_put(sk);
1715
1716        return ret;
1717
1718no_tcp_socket:
1719        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1720                goto discard_it;
1721
1722        if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1723bad_packet:
1724                TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1725        } else {
1726                tcp_v4_send_reset(NULL, skb);
1727        }
1728
1729discard_it:
1730        /* Discard frame. */
1731        kfree_skb(skb);
1732        return 0;
1733
1734discard_and_relse:
1735        sock_put(sk);
1736        goto discard_it;
1737
1738do_time_wait:
1739        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1740                inet_twsk_put(inet_twsk(sk));
1741                goto discard_it;
1742        }
1743
1744        if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1745                TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1746                inet_twsk_put(inet_twsk(sk));
1747                goto discard_it;
1748        }
1749        switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1750        case TCP_TW_SYN: {
1751                struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1752                                                        &tcp_hashinfo,
1753                                                        iph->daddr, th->dest,
1754                                                        inet_iif(skb));
1755                if (sk2) {
1756                        inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1757                        inet_twsk_put(inet_twsk(sk));
1758                        sk = sk2;
1759                        goto process;
1760                }
1761                /* Fall through to ACK */
1762        }
1763        case TCP_TW_ACK:
1764                tcp_v4_timewait_ack(sk, skb);
1765                break;
1766        case TCP_TW_RST:
1767                goto no_tcp_socket;
1768        case TCP_TW_SUCCESS:;
1769        }
1770        goto discard_it;
1771}
1772
1773struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
1774{
1775        struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
1776        struct inet_sock *inet = inet_sk(sk);
1777        struct inet_peer *peer;
1778
1779        if (!rt ||
1780            inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
1781                peer = inet_getpeer_v4(inet->inet_daddr, 1);
1782                *release_it = true;
1783        } else {
1784                if (!rt->peer)
1785                        rt_bind_peer(rt, inet->inet_daddr, 1);
1786                peer = rt->peer;
1787                *release_it = false;
1788        }
1789
1790        return peer;
1791}
1792EXPORT_SYMBOL(tcp_v4_get_peer);
1793
1794void *tcp_v4_tw_get_peer(struct sock *sk)
1795{
1796        struct inet_timewait_sock *tw = inet_twsk(sk);
1797
1798        return inet_getpeer_v4(tw->tw_daddr, 1);
1799}
1800EXPORT_SYMBOL(tcp_v4_tw_get_peer);
1801
1802static struct timewait_sock_ops tcp_timewait_sock_ops = {
1803        .twsk_obj_size  = sizeof(struct tcp_timewait_sock),
1804        .twsk_unique    = tcp_twsk_unique,
1805        .twsk_destructor= tcp_twsk_destructor,
1806        .twsk_getpeer   = tcp_v4_tw_get_peer,
1807};
1808
1809const struct inet_connection_sock_af_ops ipv4_specific = {
1810        .queue_xmit        = ip_queue_xmit,
1811        .send_check        = tcp_v4_send_check,
1812        .rebuild_header    = inet_sk_rebuild_header,
1813        .conn_request      = tcp_v4_conn_request,
1814        .syn_recv_sock     = tcp_v4_syn_recv_sock,
1815        .get_peer          = tcp_v4_get_peer,
1816        .net_header_len    = sizeof(struct iphdr),
1817        .setsockopt        = ip_setsockopt,
1818        .getsockopt        = ip_getsockopt,
1819        .addr2sockaddr     = inet_csk_addr2sockaddr,
1820        .sockaddr_len      = sizeof(struct sockaddr_in),
1821        .bind_conflict     = inet_csk_bind_conflict,
1822#ifdef CONFIG_COMPAT
1823        .compat_setsockopt = compat_ip_setsockopt,
1824        .compat_getsockopt = compat_ip_getsockopt,
1825#endif
1826};
1827EXPORT_SYMBOL(ipv4_specific);
1828
1829#ifdef CONFIG_TCP_MD5SIG
1830static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1831        .md5_lookup             = tcp_v4_md5_lookup,
1832        .calc_md5_hash          = tcp_v4_md5_hash_skb,
1833        .md5_add                = tcp_v4_md5_add_func,
1834        .md5_parse              = tcp_v4_parse_md5_keys,
1835};
1836#endif
1837
1838/* NOTE: A lot of things set to zero explicitly by call to
1839 *       sk_alloc() so need not be done here.
1840 */
1841static int tcp_v4_init_sock(struct sock *sk)
1842{
1843        struct inet_connection_sock *icsk = inet_csk(sk);
1844        struct tcp_sock *tp = tcp_sk(sk);
1845
1846        skb_queue_head_init(&tp->out_of_order_queue);
1847        tcp_init_xmit_timers(sk);
1848        tcp_prequeue_init(tp);
1849
1850        icsk->icsk_rto = TCP_TIMEOUT_INIT;
1851        tp->mdev = TCP_TIMEOUT_INIT;
1852
1853        /* So many TCP implementations out there (incorrectly) count the
1854         * initial SYN frame in their delayed-ACK and congestion control
1855         * algorithms that we must have the following bandaid to talk
1856         * efficiently to them.  -DaveM
1857         */
1858        tp->snd_cwnd = 2;
1859
1860        /* See draft-stevens-tcpca-spec-01 for discussion of the
1861         * initialization of these values.
1862         */
1863        tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1864        tp->snd_cwnd_clamp = ~0;
1865        tp->mss_cache = TCP_MSS_DEFAULT;
1866
1867        tp->reordering = sysctl_tcp_reordering;
1868        icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1869
1870        sk->sk_state = TCP_CLOSE;
1871
1872        sk->sk_write_space = sk_stream_write_space;
1873        sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1874
1875        icsk->icsk_af_ops = &ipv4_specific;
1876        icsk->icsk_sync_mss = tcp_sync_mss;
1877#ifdef CONFIG_TCP_MD5SIG
1878        tp->af_specific = &tcp_sock_ipv4_specific;
1879#endif
1880
1881        /* TCP Cookie Transactions */
1882        if (sysctl_tcp_cookie_size > 0) {
1883                /* Default, cookies without s_data_payload. */
1884                tp->cookie_values =
1885                        kzalloc(sizeof(*tp->cookie_values),
1886                                sk->sk_allocation);
1887                if (tp->cookie_values != NULL)
1888                        kref_init(&tp->cookie_values->kref);
1889        }
1890        /* Presumed zeroed, in order of appearance:
1891         *      cookie_in_always, cookie_out_never,
1892         *      s_data_constant, s_data_in, s_data_out
1893         */
1894        sk->sk_sndbuf = sysctl_tcp_wmem[1];
1895        sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1896
1897        local_bh_disable();
1898        percpu_counter_inc(&tcp_sockets_allocated);
1899        local_bh_enable();
1900
1901        return 0;
1902}
1903
1904void tcp_v4_destroy_sock(struct sock *sk)
1905{
1906        struct tcp_sock *tp = tcp_sk(sk);
1907
1908        tcp_clear_xmit_timers(sk);
1909
1910        tcp_cleanup_congestion_control(sk);
1911
1912        /* Cleanup up the write buffer. */
1913        tcp_write_queue_purge(sk);
1914
1915        /* Cleans up our, hopefully empty, out_of_order_queue. */
1916        __skb_queue_purge(&tp->out_of_order_queue);
1917
1918#ifdef CONFIG_TCP_MD5SIG
1919        /* Clean up the MD5 key list, if any */
1920        if (tp->md5sig_info) {
1921                tcp_v4_clear_md5_list(sk);
1922                kfree(tp->md5sig_info);
1923                tp->md5sig_info = NULL;
1924        }
1925#endif
1926
1927#ifdef CONFIG_NET_DMA
1928        /* Cleans up our sk_async_wait_queue */
1929        __skb_queue_purge(&sk->sk_async_wait_queue);
1930#endif
1931
1932        /* Clean prequeue, it must be empty really */
1933        __skb_queue_purge(&tp->ucopy.prequeue);
1934
1935        /* Clean up a referenced TCP bind bucket. */
1936        if (inet_csk(sk)->icsk_bind_hash)
1937                inet_put_port(sk);
1938
1939        /*
1940         * If sendmsg cached page exists, toss it.
1941         */
1942        if (sk->sk_sndmsg_page) {
1943                __free_page(sk->sk_sndmsg_page);
1944                sk->sk_sndmsg_page = NULL;
1945        }
1946
1947        /* TCP Cookie Transactions */
1948        if (tp->cookie_values != NULL) {
1949                kref_put(&tp->cookie_values->kref,
1950                         tcp_cookie_values_release);
1951                tp->cookie_values = NULL;
1952        }
1953
1954        percpu_counter_dec(&tcp_sockets_allocated);
1955}
1956EXPORT_SYMBOL(tcp_v4_destroy_sock);
1957
1958#ifdef CONFIG_PROC_FS
1959/* Proc filesystem TCP sock list dumping. */
1960
1961static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1962{
1963        return hlist_nulls_empty(head) ? NULL :
1964                list_entry(head->first, struct inet_timewait_sock, tw_node);
1965}
1966
1967static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1968{
1969        return !is_a_nulls(tw->tw_node.next) ?
1970                hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1971}
1972
1973/*
1974 * Get next listener socket follow cur.  If cur is NULL, get first socket
1975 * starting from bucket given in st->bucket; when st->bucket is zero the
1976 * very first socket in the hash table is returned.
1977 */
1978static void *listening_get_next(struct seq_file *seq, void *cur)
1979{
1980        struct inet_connection_sock *icsk;
1981        struct hlist_nulls_node *node;
1982        struct sock *sk = cur;
1983        struct inet_listen_hashbucket *ilb;
1984        struct tcp_iter_state *st = seq->private;
1985        struct net *net = seq_file_net(seq);
1986
1987        if (!sk) {
1988                ilb = &tcp_hashinfo.listening_hash[st->bucket];
1989                spin_lock_bh(&ilb->lock);
1990                sk = sk_nulls_head(&ilb->head);
1991                st->offset = 0;
1992                goto get_sk;
1993        }
1994        ilb = &tcp_hashinfo.listening_hash[st->bucket];
1995        ++st->num;
1996        ++st->offset;
1997
1998        if (st->state == TCP_SEQ_STATE_OPENREQ) {
1999                struct request_sock *req = cur;
2000

2001                icsk = inet_csk(st->syn_wait_sk);
2002                req = req->dl_next;
2003                while (1) {
2004                        while (req) {
2005                                if (req->rsk_ops->family == st->family) {
2006                                        cur = req;
2007                                        goto out;
2008                                }
2009                                req = req->dl_next;
2010                        }
2011                        if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2012                                break;
2013get_req:
2014                        req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2015                }
2016                sk        = sk_nulls_next(st->syn_wait_sk);
2017                st->state = TCP_SEQ_STATE_LISTENING;
2018                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2019        } else {
2020                icsk = inet_csk(sk);
2021                read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2022                if (reqsk_queue_len(&icsk->icsk_accept_queue))
2023                        goto start_req;
2024                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2025                sk = sk_nulls_next(sk);
2026        }
2027get_sk:
2028        sk_nulls_for_each_from(sk, node) {
2029                if (!net_eq(sock_net(sk), net))
2030                        continue;
2031                if (sk->sk_family == st->family) {
2032                        cur = sk;
2033                        goto out;
2034                }
2035                icsk = inet_csk(sk);
2036                read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2037                if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2038start_req:
2039                        st->uid         = sock_i_uid(sk);
2040                        st->syn_wait_sk = sk;
2041                        st->state       = TCP_SEQ_STATE_OPENREQ;
2042                        st->sbucket     = 0;
2043                        goto get_req;
2044                }
2045                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2046        }
2047        spin_unlock_bh(&ilb->lock);
2048        st->offset = 0;
2049        if (++st->bucket < INET_LHTABLE_SIZE) {
2050                ilb = &tcp_hashinfo.listening_hash[st->bucket];
2051                spin_lock_bh(&ilb->lock);
2052                sk = sk_nulls_head(&ilb->head);
2053                goto get_sk;
2054        }
2055        cur = NULL;
2056out:
2057        return cur;
2058}
2059
2060static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2061{
2062        struct tcp_iter_state *st = seq->private;
2063        void *rc;
2064
2065        st->bucket = 0;
2066        st->offset = 0;
2067        rc = listening_get_next(seq, NULL);
2068
2069        while (rc && *pos) {
2070                rc = listening_get_next(seq, rc);
2071                --*pos;
2072        }
2073        return rc;
2074}
2075
2076static inline int empty_bucket(struct tcp_iter_state *st)
2077{
2078        return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2079                hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2080}
2081
2082/*
2083 * Get first established socket starting from bucket given in st->bucket.
2084 * If st->bucket is zero, the very first socket in the hash is returned.
2085 */
2086static void *established_get_first(struct seq_file *seq)
2087{
2088        struct tcp_iter_state *st = seq->private;
2089        struct net *net = seq_file_net(seq);
2090        void *rc = NULL;
2091
2092        st->offset = 0;
2093        for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2094                struct sock *sk;
2095                struct hlist_nulls_node *node;
2096                struct inet_timewait_sock *tw;
2097                spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2098
2099                /* Lockless fast path for the common case of empty buckets */
2100                if (empty_bucket(st))
2101                        continue;
2102
2103                spin_lock_bh(lock);
2104                sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2105                        if (sk->sk_family != st->family ||
2106                            !net_eq(sock_net(sk), net)) {
2107                                continue;
2108                        }
2109                        rc = sk;
2110                        goto out;
2111                }
2112                st->state = TCP_SEQ_STATE_TIME_WAIT;
2113                inet_twsk_for_each(tw, node,
2114                                   &tcp_hashinfo.ehash[st->bucket].twchain) {
2115                        if (tw->tw_family != st->family ||
2116                            !net_eq(twsk_net(tw), net)) {
2117                                continue;
2118                        }
2119                        rc = tw;
2120                        goto out;
2121                }
2122                spin_unlock_bh(lock);
2123                st->state = TCP_SEQ_STATE_ESTABLISHED;
2124        }
2125out:
2126        return rc;
2127}
2128
2129static void *established_get_next(struct seq_file *seq, void *cur)
2130{
2131        struct sock *sk = cur;
2132        struct inet_timewait_sock *tw;
2133        struct hlist_nulls_node *node;
2134        struct tcp_iter_state *st = seq->private;
2135        struct net *net = seq_file_net(seq);
2136
2137        ++st->num;
2138        ++st->offset;
2139
2140        if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2141                tw = cur;
2142                tw = tw_next(tw);
2143get_tw:
2144                while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2145                        tw = tw_next(tw);
2146                }
2147                if (tw) {
2148                        cur = tw;
2149                        goto out;
2150                }
2151                spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2152                st->state = TCP_SEQ_STATE_ESTABLISHED;
2153
2154                /* Look for next non empty bucket */
2155                st->offset = 0;
2156                while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2157                                empty_bucket(st))
2158                        ;
2159                if (st->bucket > tcp_hashinfo.ehash_mask)
2160                        return NULL;
2161
2162                spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2163                sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2164        } else
2165                sk = sk_nulls_next(sk);
2166
2167        sk_nulls_for_each_from(sk, node) {
2168                if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2169                        goto found;
2170        }
2171
2172        st->state = TCP_SEQ_STATE_TIME_WAIT;
2173        tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2174        goto get_tw;
2175found:
2176        cur = sk;
2177out:
2178        return cur;
2179}
2180
2181static void *established_get_idx(struct seq_file *seq, loff_t pos)
2182{
2183        struct tcp_iter_state *st = seq->private;
2184        void *rc;
2185
2186        st->bucket = 0;
2187        rc = established_get_first(seq);
2188
2189        while (rc && pos) {
2190                rc = established_get_next(seq, rc);
2191                --pos;
2192        }
2193        return rc;
2194}
2195
2196static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2197{
2198        void *rc;
2199        struct tcp_iter_state *st = seq->private;
2200
2201        st->state = TCP_SEQ_STATE_LISTENING;
2202        rc        = listening_get_idx(seq, &pos);
2203
2204        if (!rc) {
2205                st->state = TCP_SEQ_STATE_ESTABLISHED;
2206                rc        = established_get_idx(seq, pos);
2207        }
2208
2209        return rc;
2210}
2211
2212static void *tcp_seek_last_pos(struct seq_file *seq)
2213{
2214        struct tcp_iter_state *st = seq->private;
2215        int offset = st->offset;
2216        int orig_num = st->num;
2217        void *rc = NULL;
2218
2219        switch (st->state) {
2220        case TCP_SEQ_STATE_OPENREQ:
2221        case TCP_SEQ_STATE_LISTENING:
2222                if (st->bucket >= INET_LHTABLE_SIZE)
2223                        break;
2224                st->state = TCP_SEQ_STATE_LISTENING;
2225                rc = listening_get_next(seq, NULL);
2226                while (offset-- && rc)
2227                        rc = listening_get_next(seq, rc);
2228                if (rc)
2229                        break;
2230                st->bucket = 0;
2231                /* Fallthrough */
2232        case TCP_SEQ_STATE_ESTABLISHED:
2233        case TCP_SEQ_STATE_TIME_WAIT:
2234                st->state = TCP_SEQ_STATE_ESTABLISHED;
2235                if (st->bucket > tcp_hashinfo.ehash_mask)
2236                        break;
2237                rc = established_get_first(seq);
2238                while (offset-- && rc)
2239                        rc = established_get_next(seq, rc);
2240        }
2241
2242        st->num = orig_num;
2243
2244        return rc;
2245}
2246
2247static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2248{
2249        struct tcp_iter_state *st = seq->private;
2250        void *rc;
2251
2252        if (*pos && *pos == st->last_pos) {
2253                rc = tcp_seek_last_pos(seq);
2254                if (rc)
2255                        goto out;
2256        }
2257
2258        st->state = TCP_SEQ_STATE_LISTENING;
2259        st->num = 0;
2260        st->bucket = 0;
2261        st->offset = 0;
2262        rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2263
2264out:
2265        st->last_pos = *pos;
2266        return rc;
2267}
2268
2269static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2270{
2271        struct tcp_iter_state *st = seq->private;
2272        void *rc = NULL;
2273
2274        if (v == SEQ_START_TOKEN) {
2275                rc = tcp_get_idx(seq, 0);
2276                goto out;
2277        }
2278
2279        switch (st->state) {
2280        case TCP_SEQ_STATE_OPENREQ:
2281        case TCP_SEQ_STATE_LISTENING:
2282                rc = listening_get_next(seq, v);
2283                if (!rc) {
2284                        st->state = TCP_SEQ_STATE_ESTABLISHED;
2285                        st->bucket = 0;
2286                        st->offset = 0;
2287                        rc        = established_get_first(seq);
2288                }
2289                break;
2290        case TCP_SEQ_STATE_ESTABLISHED:
2291        case TCP_SEQ_STATE_TIME_WAIT:
2292                rc = established_get_next(seq, v);
2293                break;
2294        }
2295out:
2296        ++*pos;
2297        st->last_pos = *pos;
2298        return rc;
2299}
2300
2301static void tcp_seq_stop(struct seq_file *seq, void *v)
2302{
2303        struct tcp_iter_state *st = seq->private;
2304
2305        switch (st->state) {
2306        case TCP_SEQ_STATE_OPENREQ:
2307                if (v) {
2308                        struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2309                        read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2310                }
2311        case TCP_SEQ_STATE_LISTENING:
2312                if (v != SEQ_START_TOKEN)
2313                        spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2314                break;
2315        case TCP_SEQ_STATE_TIME_WAIT:
2316        case TCP_SEQ_STATE_ESTABLISHED:
2317                if (v)
2318                        spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2319                break;
2320        }
2321}
2322
2323static int tcp_seq_open(struct inode *inode, struct file *file)
2324{
2325        struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2326        struct tcp_iter_state *s;
2327        int err;
2328
2329        err = seq_open_net(inode, file, &afinfo->seq_ops,
2330                          sizeof(struct tcp_iter_state));
2331        if (err < 0)
2332                return err;
2333
2334        s = ((struct seq_file *)file->private_data)->private;
2335        s->family               = afinfo->family;
2336        s->last_pos             = 0;
2337        return 0;
2338}
2339
2340int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2341{
2342        int rc = 0;
2343        struct proc_dir_entry *p;
2344
2345        afinfo->seq_fops.open           = tcp_seq_open;
2346        afinfo->seq_fops.read           = seq_read;
2347        afinfo->seq_fops.llseek         = seq_lseek;
2348        afinfo->seq_fops.release        = seq_release_net;
2349
2350        afinfo->seq_ops.start           = tcp_seq_start;
2351        afinfo->seq_ops.next            = tcp_seq_next;
2352        afinfo->seq_ops.stop            = tcp_seq_stop;
2353
2354        p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2355                             &afinfo->seq_fops, afinfo);
2356        if (!p)
2357                rc = -ENOMEM;
2358        return rc;
2359}
2360EXPORT_SYMBOL(tcp_proc_register);
2361
2362void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2363{
2364        proc_net_remove(net, afinfo->name);
2365}
2366EXPORT_SYMBOL(tcp_proc_unregister);
2367
2368static void get_openreq4(struct sock *sk, struct request_sock *req,
2369                         struct seq_file *f, int i, int uid, int *len)
2370{
2371        const struct inet_request_sock *ireq = inet_rsk(req);
2372        int ttd = req->expires - jiffies;
2373
2374        seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2375                " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
2376                i,
2377                ireq->loc_addr,
2378                ntohs(inet_sk(sk)->inet_sport),
2379                ireq->rmt_addr,
2380                ntohs(ireq->rmt_port),
2381                TCP_SYN_RECV,
2382                0, 0, /* could print option size, but that is af dependent. */
2383                1,    /* timers active (only the expire timer) */
2384                jiffies_to_clock_t(ttd),
2385                req->retrans,
2386                uid,
2387                0,  /* non standard timer */
2388                0, /* open_requests have no inode */
2389                atomic_read(&sk->sk_refcnt),
2390                req,
2391                len);
2392}
2393
2394static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2395{
2396        int timer_active;
2397        unsigned long timer_expires;
2398        struct tcp_sock *tp = tcp_sk(sk);
2399        const struct inet_connection_sock *icsk = inet_csk(sk);
2400        struct inet_sock *inet = inet_sk(sk);
2401        __be32 dest = inet->inet_daddr;
2402        __be32 src = inet->inet_rcv_saddr;
2403        __u16 destp = ntohs(inet->inet_dport);
2404        __u16 srcp = ntohs(inet->inet_sport);
2405        int rx_queue;
2406
2407        if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2408                timer_active    = 1;
2409                timer_expires   = icsk->icsk_timeout;
2410        } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2411                timer_active    = 4;
2412                timer_expires   = icsk->icsk_timeout;
2413        } else if (timer_pending(&sk->sk_timer)) {
2414                timer_active    = 2;
2415                timer_expires   = sk->sk_timer.expires;
2416        } else {
2417                timer_active    = 0;
2418                timer_expires = jiffies;
2419        }
2420
2421        if (sk->sk_state == TCP_LISTEN)
2422                rx_queue = sk->sk_ack_backlog;
2423        else
2424                /*
2425                 * because we dont lock socket, we might find a transient negative value
2426                 */
2427                rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2428
2429        seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2430                        "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
2431                i, src, srcp, dest, destp, sk->sk_state,
2432                tp->write_seq - tp->snd_una,
2433                rx_queue,
2434                timer_active,
2435                jiffies_to_clock_t(timer_expires - jiffies),
2436                icsk->icsk_retransmits,
2437                sock_i_uid(sk),
2438                icsk->icsk_probes_out,
2439                sock_i_ino(sk),
2440                atomic_read(&sk->sk_refcnt), sk,
2441                jiffies_to_clock_t(icsk->icsk_rto),
2442                jiffies_to_clock_t(icsk->icsk_ack.ato),
2443                (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2444                tp->snd_cwnd,
2445                tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
2446                len);
2447}
2448
2449static void get_timewait4_sock(struct inet_timewait_sock *tw,
2450                               struct seq_file *f, int i, int *len)
2451{
2452        __be32 dest, src;
2453        __u16 destp, srcp;
2454        int ttd = tw->tw_ttd - jiffies;
2455
2456        if (ttd < 0)
2457                ttd = 0;
2458
2459        dest  = tw->tw_daddr;
2460        src   = tw->tw_rcv_saddr;
2461        destp = ntohs(tw->tw_dport);
2462        srcp  = ntohs(tw->tw_sport);
2463
2464        seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2465                " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
2466                i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2467                3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2468                atomic_read(&tw->tw_refcnt), tw, len);
2469}
2470
2471#define TMPSZ 150
2472
2473static int tcp4_seq_show(struct seq_file *seq, void *v)
2474{
2475        struct tcp_iter_state *st;
2476        int len;
2477
2478        if (v == SEQ_START_TOKEN) {
2479                seq_printf(seq, "%-*s\n", TMPSZ - 1,
2480                           "  sl  local_address rem_address   st tx_queue "
2481                           "rx_queue tr tm->when retrnsmt   uid  timeout "
2482                           "inode");
2483                goto out;
2484        }
2485        st = seq->private;
2486
2487        switch (st->state) {
2488        case TCP_SEQ_STATE_LISTENING:
2489        case TCP_SEQ_STATE_ESTABLISHED:
2490                get_tcp4_sock(v, seq, st->num, &len);
2491                break;
2492        case TCP_SEQ_STATE_OPENREQ:
2493                get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2494                break;
2495        case TCP_SEQ_STATE_TIME_WAIT:
2496                get_timewait4_sock(v, seq, st->num, &len);
2497                break;
2498        }
2499        seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2500out:
2501        return 0;
2502}
2503
2504static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2505        .name           = "tcp",
2506        .family         = AF_INET,
2507        .seq_fops       = {
2508                .owner          = THIS_MODULE,
2509        },
2510        .seq_ops        = {
2511                .show           = tcp4_seq_show,
2512        },
2513};
2514
2515static int __net_init tcp4_proc_init_net(struct net *net)
2516{
2517        return tcp_proc_register(net, &tcp4_seq_afinfo);
2518}
2519
2520static void __net_exit tcp4_proc_exit_net(struct net *net)
2521{
2522        tcp_proc_unregister(net, &tcp4_seq_afinfo);
2523}
2524
2525static struct pernet_operations tcp4_net_ops = {
2526        .init = tcp4_proc_init_net,
2527        .exit = tcp4_proc_exit_net,
2528};
2529
2530int __init tcp4_proc_init(void)
2531{
2532        return register_pernet_subsys(&tcp4_net_ops);
2533}
2534
2535void tcp4_proc_exit(void)
2536{
2537        unregister_pernet_subsys(&tcp4_net_ops);
2538}
2539#endif /* CONFIG_PROC_FS */
2540
2541struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2542{
2543        const struct iphdr *iph = skb_gro_network_header(skb);
2544
2545        switch (skb->ip_summed) {
2546        case CHECKSUM_COMPLETE:
2547                if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2548                                  skb->csum)) {
2549                        skb->ip_summed = CHECKSUM_UNNECESSARY;
2550                        break;
2551                }
2552
2553                /* fall through */
2554        case CHECKSUM_NONE:
2555                NAPI_GRO_CB(skb)->flush = 1;
2556                return NULL;
2557        }
2558
2559        return tcp_gro_receive(head, skb);
2560}
2561
2562int tcp4_gro_complete(struct sk_buff *skb)
2563{
2564        const struct iphdr *iph = ip_hdr(skb);
2565        struct tcphdr *th = tcp_hdr(skb);
2566
2567        th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2568                                  iph->saddr, iph->daddr, 0);
2569        skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2570
2571        return tcp_gro_complete(skb);
2572}
2573
2574struct proto tcp_prot = {
2575        .name                   = "TCP",
2576        .owner                  = THIS_MODULE,
2577        .close                  = tcp_close,
2578        .connect                = tcp_v4_connect,
2579        .disconnect             = tcp_disconnect,
2580        .accept                 = inet_csk_accept,
2581        .ioctl                  = tcp_ioctl,
2582        .init                   = tcp_v4_init_sock,
2583        .destroy                = tcp_v4_destroy_sock,
2584        .shutdown               = tcp_shutdown,
2585        .setsockopt             = tcp_setsockopt,
2586        .getsockopt             = tcp_getsockopt,
2587        .recvmsg                = tcp_recvmsg,
2588        .sendmsg                = tcp_sendmsg,
2589        .sendpage               = tcp_sendpage,
2590        .backlog_rcv            = tcp_v4_do_rcv,
2591        .hash                   = inet_hash,
2592        .unhash                 = inet_unhash,
2593        .get_port               = inet_csk_get_port,
2594        .enter_memory_pressure  = tcp_enter_memory_pressure,
2595        .sockets_allocated      = &tcp_sockets_allocated,
2596        .orphan_count           = &tcp_orphan_count,
2597        .memory_allocated       = &tcp_memory_allocated,
2598        .memory_pressure        = &tcp_memory_pressure,
2599        .sysctl_mem             = sysctl_tcp_mem,
2600        .sysctl_wmem            = sysctl_tcp_wmem,
2601        .sysctl_rmem            = sysctl_tcp_rmem,
2602        .max_header             = MAX_TCP_HEADER,
2603        .obj_size               = sizeof(struct tcp_sock),
2604        .slab_flags             = SLAB_DESTROY_BY_RCU,
2605        .twsk_prot              = &tcp_timewait_sock_ops,
2606        .rsk_prot               = &tcp_request_sock_ops,
2607        .h.hashinfo             = &tcp_hashinfo,
2608        .no_autobind            = true,
2609#ifdef CONFIG_COMPAT
2610        .compat_setsockopt      = compat_tcp_setsockopt,
2611        .compat_getsockopt      = compat_tcp_getsockopt,
2612#endif
2613};
2614EXPORT_SYMBOL(tcp_prot);
2615
2616
2617static int __net_init tcp_sk_init(struct net *net)
2618{
2619        return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2620                                    PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2621}
2622
2623static void __net_exit tcp_sk_exit(struct net *net)
2624{
2625        inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2626}
2627
2628static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2629{
2630        inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2631}
2632
2633static struct pernet_operations __net_initdata tcp_sk_ops = {
2634       .init       = tcp_sk_init,
2635       .exit       = tcp_sk_exit,
2636       .exit_batch = tcp_sk_exit_batch,
2637};
2638
2639void __init tcp_v4_init(void)
2640{
2641        inet_hashinfo_init(&tcp_hashinfo);
2642        if (register_pernet_subsys(&tcp_sk_ops))
2643                panic("Failed to create the TCP control socket.\n");
2644}
2645