LXR linux/net/ipv4/tcp

   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              Implementation of the Transmission Control Protocol(TCP).
   7 *
   8 *              IPv4 specific functions
   9 *
  10 *
  11 *              code split from:
  12 *              linux/ipv4/tcp.c
  13 *              linux/ipv4/tcp_input.c
  14 *              linux/ipv4/tcp_output.c
  15 *
  16 *              See tcp.c for author information
  17 *
  18 *      This program is free software; you can redistribute it and/or
  19 *      modify it under the terms of the GNU General Public License
  20 *      as published by the Free Software Foundation; either version
  21 *      2 of the License, or (at your option) any later version.
  22 */
  23
  24/*
  25 * Changes:
  26 *              David S. Miller :       New socket lookup architecture.
  27 *                                      This code is dedicated to John Dyson.
  28 *              David S. Miller :       Change semantics of established hash,
  29 *                                      half is devoted to TIME_WAIT sockets
  30 *                                      and the rest go in the other half.
  31 *              Andi Kleen :            Add support for syncookies and fixed
  32 *                                      some bugs: ip options weren't passed to
  33 *                                      the TCP layer, missed a check for an
  34 *                                      ACK bit.
  35 *              Andi Kleen :            Implemented fast path mtu discovery.
  36 *                                      Fixed many serious bugs in the
  37 *                                      request_sock handling and moved
  38 *                                      most of it into the af independent code.
  39 *                                      Added tail drop and some other bugfixes.
  40 *                                      Added new listen semantics.
  41 *              Mike McLagan    :       Routing by source
  42 *      Juan Jose Ciarlante:            ip_dynaddr bits
  43 *              Andi Kleen:             various fixes.
  44 *      Vitaly E. Lavrov        :       Transparent proxy revived after year
  45 *                                      coma.
  46 *      Andi Kleen              :       Fix new listen.
  47 *      Andi Kleen              :       Fix accept error reporting.
  48 *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  49 *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  50 *                                      a single port at the same time.
  51 */
  52
  53#define pr_fmt(fmt) "TCP: " fmt
  54
  55#include <linux/bottom_half.h>
  56#include <linux/types.h>
  57#include <linux/fcntl.h>
  58#include <linux/module.h>
  59#include <linux/random.h>
  60#include <linux/cache.h>
  61#include <linux/jhash.h>
  62#include <linux/init.h>
  63#include <linux/times.h>
  64#include <linux/slab.h>
  65
  66#include <net/net_namespace.h>
  67#include <net/icmp.h>
  68#include <net/inet_hashtables.h>
  69#include <net/tcp.h>
  70#include <net/transp_v6.h>
  71#include <net/ipv6.h>
  72#include <net/inet_common.h>
  73#include <net/timewait_sock.h>
  74#include <net/xfrm.h>
  75#include <net/secure_seq.h>
  76#include <net/busy_poll.h>
  77
  78#include <linux/inet.h>
  79#include <linux/ipv6.h>
  80#include <linux/stddef.h>
  81#include <linux/proc_fs.h>
  82#include <linux/seq_file.h>
  83
  84#include <crypto/hash.h>
  85#include <linux/scatterlist.h>
  86
  87int sysctl_tcp_tw_reuse __read_mostly;
  88int sysctl_tcp_low_latency __read_mostly;
  89EXPORT_SYMBOL(sysctl_tcp_low_latency);
  90
  91#ifdef CONFIG_TCP_MD5SIG
  92static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
  93                               __be32 daddr, __be32 saddr, const struct tcphdr *th);
  94#endif
  95
  96struct inet_hashinfo tcp_hashinfo;
  97EXPORT_SYMBOL(tcp_hashinfo);
  98
  99static  __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
 100{
 101        return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
 102                                          ip_hdr(skb)->saddr,
 103                                          tcp_hdr(skb)->dest,
 104                                          tcp_hdr(skb)->source);
 105}
 106
 107int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 108{
 109        const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
 110        struct tcp_sock *tp = tcp_sk(sk);
 111
 112        /* With PAWS, it is safe from the viewpoint
 113           of data integrity. Even without PAWS it is safe provided sequence
 114           spaces do not overlap i.e. at data rates <= 80Mbit/sec.
 115
 116           Actually, the idea is close to VJ's one, only timestamp cache is
 117           held not per host, but per port pair and TW bucket is used as state
 118           holder.
 119
 120           If TW bucket has been already destroyed we fall back to VJ's scheme
 121           and use initial timestamp retrieved from peer table.
 122         */
 123        if (tcptw->tw_ts_recent_stamp &&
 124            (!twp || (sysctl_tcp_tw_reuse &&
 125                             get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
 126                tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
 127                if (tp->write_seq == 0)
 128                        tp->write_seq = 1;
 129                tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
 130                tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
 131                sock_hold(sktw);
 132                return 1;
 133        }
 134
 135        return 0;
 136}
 137EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 138
 139/* This will initiate an outgoing connection. */
 140int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 141{
 142        struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
 143        struct inet_sock *inet = inet_sk(sk);
 144        struct tcp_sock *tp = tcp_sk(sk);
 145        __be16 orig_sport, orig_dport;
 146        __be32 daddr, nexthop;
 147        struct flowi4 *fl4;
 148        struct rtable *rt;
 149        int err;
 150        struct ip_options_rcu *inet_opt;
 151
 152        if (addr_len < sizeof(struct sockaddr_in))
 153                return -EINVAL;
 154
 155        if (usin->sin_family != AF_INET)
 156                return -EAFNOSUPPORT;
 157
 158        nexthop = daddr = usin->sin_addr.s_addr;
 159        inet_opt = rcu_dereference_protected(inet->inet_opt,
 160                                             lockdep_sock_is_held(sk));
 161        if (inet_opt && inet_opt->opt.srr) {
 162                if (!daddr)
 163                        return -EINVAL;
 164                nexthop = inet_opt->opt.faddr;
 165        }
 166
 167        orig_sport = inet->inet_sport;
 168        orig_dport = usin->sin_port;
 169        fl4 = &inet->cork.fl.u.ip4;
 170        rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
 171                              RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
 172                              IPPROTO_TCP,
 173                              orig_sport, orig_dport, sk);
 174        if (IS_ERR(rt)) {
 175                err = PTR_ERR(rt);
 176                if (err == -ENETUNREACH)
 177                        IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
 178                return err;
 179        }
 180
 181        if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
 182                ip_rt_put(rt);
 183                return -ENETUNREACH;
 184        }
 185
 186        if (!inet_opt || !inet_opt->opt.srr)
 187                daddr = fl4->daddr;
 188
 189        if (!inet->inet_saddr)
 190                inet->inet_saddr = fl4->saddr;
 191        sk_rcv_saddr_set(sk, inet->inet_saddr);
 192
 193        if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
 194                /* Reset inherited state */
 195                tp->rx_opt.ts_recent       = 0;
 196                tp->rx_opt.ts_recent_stamp = 0;
 197                if (likely(!tp->repair))
 198                        tp->write_seq      = 0;
 199        }
 200
 201        if (tcp_death_row.sysctl_tw_recycle &&
 202            !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
 203                tcp_fetch_timewait_stamp(sk, &rt->dst);
 204
 205        inet->inet_dport = usin->sin_port;
 206        sk_daddr_set(sk, daddr);
 207
 208        inet_csk(sk)->icsk_ext_hdr_len = 0;
 209        if (inet_opt)
 210                inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
 211
 212        tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
 213
 214        /* Socket identity is still unknown (sport may be zero).
 215         * However we set state to SYN-SENT and not releasing socket
 216         * lock select source port, enter ourselves into the hash tables and
 217         * complete initialization after this.
 218         */
 219        tcp_set_state(sk, TCP_SYN_SENT);
 220        err = inet_hash_connect(&tcp_death_row, sk);
 221        if (err)
 222                goto failure;
 223
 224        sk_set_txhash(sk);
 225
 226        rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
 227                               inet->inet_sport, inet->inet_dport, sk);
 228        if (IS_ERR(rt)) {
 229                err = PTR_ERR(rt);
 230                rt = NULL;
 231                goto failure;
 232        }
 233        /* OK, now commit destination to socket.  */
 234        sk->sk_gso_type = SKB_GSO_TCPV4;
 235        sk_setup_caps(sk, &rt->dst);
 236
 237        if (!tp->write_seq && likely(!tp->repair))
 238                tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
 239                                                           inet->inet_daddr,
 240                                                           inet->inet_sport,
 241                                                           usin->sin_port);
 242
 243        inet->inet_id = tp->write_seq ^ jiffies;
 244
 245        err = tcp_connect(sk);
 246
 247        rt = NULL;
 248        if (err)
 249                goto failure;
 250
 251        return 0;
 252
 253failure:
 254        /*
 255         * This unhashes the socket and releases the local port,
 256         * if necessary.
 257         */
 258        tcp_set_state(sk, TCP_CLOSE);
 259        ip_rt_put(rt);
 260        sk->sk_route_caps = 0;
 261        inet->inet_dport = 0;
 262        return err;
 263}
 264EXPORT_SYMBOL(tcp_v4_connect);
 265
 266/*
 267 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
 268 * It can be called through tcp_release_cb() if socket was owned by user
 269 * at the time tcp_v4_err() was called to handle ICMP message.
 270 */
 271void tcp_v4_mtu_reduced(struct sock *sk)
 272{
 273        struct dst_entry *dst;
 274        struct inet_sock *inet = inet_sk(sk);
 275        u32 mtu = tcp_sk(sk)->mtu_info;
 276
 277        dst = inet_csk_update_pmtu(sk, mtu);
 278        if (!dst)
 279                return;
 280
 281        /* Something is about to be wrong... Remember soft error
 282         * for the case, if this connection will not able to recover.
 283         */
 284        if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
 285                sk->sk_err_soft = EMSGSIZE;
 286
 287        mtu = dst_mtu(dst);
 288
 289        if (inet->pmtudisc != IP_PMTUDISC_DONT &&
 290            ip_sk_accept_pmtu(sk) &&
 291            inet_csk(sk)->icsk_pmtu_cookie > mtu) {
 292                tcp_sync_mss(sk, mtu);
 293
 294                /* Resend the TCP packet because it's
 295                 * clear that the old packet has been
 296                 * dropped. This is the new "fast" path mtu
 297                 * discovery.
 298                 */
 299                tcp_simple_retransmit(sk);
 300        } /* else let the usual retransmit timer handle it */
 301}
 302EXPORT_SYMBOL(tcp_v4_mtu_reduced);
 303
 304static void do_redirect(struct sk_buff *skb, struct sock *sk)
 305{
 306        struct dst_entry *dst = __sk_dst_check(sk, 0);
 307
 308        if (dst)
 309                dst->ops->redirect(dst, sk, skb);
 310}
 311
 312
 313/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
 314void tcp_req_err(struct sock *sk, u32 seq, bool abort)
 315{
 316        struct request_sock *req = inet_reqsk(sk);
 317        struct net *net = sock_net(sk);
 318
 319        /* ICMPs are not backlogged, hence we cannot get
 320         * an established socket here.
 321         */
 322        if (seq != tcp_rsk(req)->snt_isn) {
 323                __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
 324        } else if (abort) {
 325                /*
 326                 * Still in SYN_RECV, just remove it silently.
 327                 * There is no good way to pass the error to the newly
 328                 * created socket, and POSIX does not want network
 329                 * errors returned from accept().
 330                 */
 331                inet_csk_reqsk_queue_drop(req->rsk_listener, req);
 332                tcp_listendrop(req->rsk_listener);
 333        }
 334        reqsk_put(req);
 335}
 336EXPORT_SYMBOL(tcp_req_err);
 337
 338/*
 339 * This routine is called by the ICMP module when it gets some
 340 * sort of error condition.  If err < 0 then the socket should
 341 * be closed and the error returned to the user.  If err > 0
 342 * it's just the icmp type << 8 | icmp code.  After adjustment
 343 * header points to the first 8 bytes of the tcp header.  We need
 344 * to find the appropriate port.
 345 *
 346 * The locking strategy used here is very "optimistic". When
 347 * someone else accesses the socket the ICMP is just dropped
 348 * and for some paths there is no check at all.
 349 * A more general error queue to queue errors for later handling
 350 * is probably better.
 351 *
 352 */
 353
 354void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 355{
 356        const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
 357        struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
 358        struct inet_connection_sock *icsk;
 359        struct tcp_sock *tp;
 360        struct inet_sock *inet;
 361        const int type = icmp_hdr(icmp_skb)->type;
 362        const int code = icmp_hdr(icmp_skb)->code;
 363        struct sock *sk;
 364        struct sk_buff *skb;
 365        struct request_sock *fastopen;
 366        __u32 seq, snd_una;
 367        __u32 remaining;
 368        int err;
 369        struct net *net = dev_net(icmp_skb->dev);
 370
 371        sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
 372                                       th->dest, iph->saddr, ntohs(th->source),
 373                                       inet_iif(icmp_skb));
 374        if (!sk) {
 375                __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
 376                return;
 377        }
 378        if (sk->sk_state == TCP_TIME_WAIT) {
 379                inet_twsk_put(inet_twsk(sk));
 380                return;
 381        }
 382        seq = ntohl(th->seq);
 383        if (sk->sk_state == TCP_NEW_SYN_RECV)
 384                return tcp_req_err(sk, seq,
 385                                  type == ICMP_PARAMETERPROB ||
 386                                  type == ICMP_TIME_EXCEEDED ||
 387                                  (type == ICMP_DEST_UNREACH &&
 388                                   (code == ICMP_NET_UNREACH ||
 389                                    code == ICMP_HOST_UNREACH)));
 390
 391        bh_lock_sock(sk);
 392        /* If too many ICMPs get dropped on busy
 393         * servers this needs to be solved differently.
 394         * We do take care of PMTU discovery (RFC1191) special case :
 395         * we can receive locally generated ICMP messages while socket is held.
 396         */
 397        if (sock_owned_by_user(sk)) {
 398                if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
 399                        __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
 400        }
 401        if (sk->sk_state == TCP_CLOSE)
 402                goto out;
 403
 404        if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
 405                __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
 406                goto out;
 407        }
 408
 409        icsk = inet_csk(sk);
 410        tp = tcp_sk(sk);
 411        /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
 412        fastopen = tp->fastopen_rsk;
 413        snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
 414        if (sk->sk_state != TCP_LISTEN &&
 415            !between(seq, snd_una, tp->snd_nxt)) {
 416                __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
 417                goto out;
 418        }
 419
 420        switch (type) {
 421        case ICMP_REDIRECT:
 422                do_redirect(icmp_skb, sk);
 423                goto out;
 424        case ICMP_SOURCE_QUENCH:
 425                /* Just silently ignore these. */
 426                goto out;
 427        case ICMP_PARAMETERPROB:
 428                err = EPROTO;
 429                break;
 430        case ICMP_DEST_UNREACH:
 431                if (code > NR_ICMP_UNREACH)
 432                        goto out;
 433
 434                if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
 435                        /* We are not interested in TCP_LISTEN and open_requests
 436                         * (SYN-ACKs send out by Linux are always <576bytes so
 437                         * they should go through unfragmented).
 438                         */
 439                        if (sk->sk_state == TCP_LISTEN)
 440                                goto out;
 441
 442                        tp->mtu_info = info;
 443                        if (!sock_owned_by_user(sk)) {
 444                                tcp_v4_mtu_reduced(sk);
 445                        } else {
 446                                if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
 447                                        sock_hold(sk);
 448                        }
 449                        goto out;
 450                }
 451
 452                err = icmp_err_convert[code].errno;
 453                /* check if icmp_skb allows revert of backoff
 454                 * (see draft-zimmermann-tcp-lcd) */
 455                if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
 456                        break;
 457                if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
 458                    !icsk->icsk_backoff || fastopen)
 459                        break;
 460
 461                if (sock_owned_by_user(sk))
 462                        break;
 463
 464                icsk->icsk_backoff--;
 465                icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
 466                                               TCP_TIMEOUT_INIT;
 467                icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
 468
 469                skb = tcp_write_queue_head(sk);
 470                BUG_ON(!skb);
 471
 472                remaining = icsk->icsk_rto -
 473                            min(icsk->icsk_rto,
 474                                tcp_time_stamp - tcp_skb_timestamp(skb));
 475
 476                if (remaining) {
 477                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 478                                                  remaining, TCP_RTO_MAX);
 479                } else {
 480                        /* RTO revert clocked out retransmission.
 481                         * Will retransmit now */
 482                        tcp_retransmit_timer(sk);
 483                }
 484
 485                break;
 486        case ICMP_TIME_EXCEEDED:
 487                err = EHOSTUNREACH;
 488                break;
 489        default:
 490                goto out;
 491        }
 492
 493        switch (sk->sk_state) {
 494        case TCP_SYN_SENT:
 495        case TCP_SYN_RECV:
 496                /* Only in fast or simultaneous open. If a fast open socket is
 497                 * is already accepted it is treated as a connected one below.
 498                 */
 499                if (fastopen && !fastopen->sk)
 500                        break;
 501
 502                if (!sock_owned_by_user(sk)) {
 503                        sk->sk_err = err;
 504
 505                        sk->sk_error_report(sk);
 506
 507                        tcp_done(sk);
 508                } else {
 509                        sk->sk_err_soft = err;
 510                }
 511                goto out;
 512        }
 513
 514        /* If we've already connected we will keep trying
 515         * until we time out, or the user gives up.
 516         *
 517         * rfc1122 4.2.3.9 allows to consider as hard errors
 518         * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
 519         * but it is obsoleted by pmtu discovery).
 520         *
 521         * Note, that in modern internet, where routing is unreliable
 522         * and in each dark corner broken firewalls sit, sending random
 523         * errors ordered by their masters even this two messages finally lose
 524         * their original sense (even Linux sends invalid PORT_UNREACHs)
 525         *
 526         * Now we are in compliance with RFCs.
 527         *                                                      --ANK (980905)
 528         */
 529
 530        inet = inet_sk(sk);
 531        if (!sock_owned_by_user(sk) && inet->recverr) {
 532                sk->sk_err = err;
 533                sk->sk_error_report(sk);
 534        } else  { /* Only an error on timeout */
 535                sk->sk_err_soft = err;
 536        }
 537
 538out:
 539        bh_unlock_sock(sk);
 540        sock_put(sk);
 541}
 542
 543void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
 544{
 545        struct tcphdr *th = tcp_hdr(skb);
 546
 547        if (skb->ip_summed == CHECKSUM_PARTIAL) {
 548                th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
 549                skb->csum_start = skb_transport_header(skb) - skb->head;
 550                skb->csum_offset = offsetof(struct tcphdr, check);
 551        } else {
 552                th->check = tcp_v4_check(skb->len, saddr, daddr,
 553                                         csum_partial(th,
 554                                                      th->doff << 2,
 555                                                      skb->csum));
 556        }
 557}
 558
 559/* This routine computes an IPv4 TCP checksum. */
 560void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
 561{
 562        const struct inet_sock *inet = inet_sk(sk);
 563
 564        __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
 565}
 566EXPORT_SYMBOL(tcp_v4_send_check);
 567
 568/*
 569 *      This routine will send an RST to the other tcp.
 570 *
 571 *      Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
 572 *                    for reset.
 573 *      Answer: if a packet caused RST, it is not for a socket
 574 *              existing in our system, if it is matched to a socket,
 575 *              it is just duplicate segment or bug in other side's TCP.
 576 *              So that we build reply only basing on parameters
 577 *              arrived with segment.
 578 *      Exception: precedence violation. We do not implement it in any case.
 579 */
 580
 581static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
 582{
 583        const struct tcphdr *th = tcp_hdr(skb);
 584        struct {
 585                struct tcphdr th;
 586#ifdef CONFIG_TCP_MD5SIG
 587                __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
 588#endif
 589        } rep;
 590        struct ip_reply_arg arg;
 591#ifdef CONFIG_TCP_MD5SIG
 592        struct tcp_md5sig_key *key = NULL;
 593        const __u8 *hash_location = NULL;
 594        unsigned char newhash[16];
 595        int genhash;
 596        struct sock *sk1 = NULL;
 597#endif
 598        struct net *net;
 599
 600        /* Never send a reset in response to a reset. */
 601        if (th->rst)
 602                return;
 603
 604        /* If sk not NULL, it means we did a successful lookup and incoming
 605         * route had to be correct. prequeue might have dropped our dst.
 606         */
 607        if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
 608                return;
 609
 610        /* Swap the send and the receive. */
 611        memset(&rep, 0, sizeof(rep));
 612        rep.th.dest   = th->source;
 613        rep.th.source = th->dest;
 614        rep.th.doff   = sizeof(struct tcphdr) / 4;
 615        rep.th.rst    = 1;
 616
 617        if (th->ack) {
 618                rep.th.seq = th->ack_seq;
 619        } else {
 620                rep.th.ack = 1;
 621                rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
 622                                       skb->len - (th->doff << 2));
 623        }
 624
 625        memset(&arg, 0, sizeof(arg));
 626        arg.iov[0].iov_base = (unsigned char *)&rep;
 627        arg.iov[0].iov_len  = sizeof(rep.th);
 628
 629        net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
 630#ifdef CONFIG_TCP_MD5SIG
 631        rcu_read_lock();
 632        hash_location = tcp_parse_md5sig_option(th);
 633        if (sk && sk_fullsock(sk)) {
 634                key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
 635                                        &ip_hdr(skb)->saddr, AF_INET);
 636        } else if (hash_location) {
 637                /*
 638                 * active side is lost. Try to find listening socket through
 639                 * source port, and then find md5 key through listening socket.
 640                 * we are not loose security here:
 641                 * Incoming packet is checked with md5 hash with finding key,
 642                 * no RST generated if md5 hash doesn't match.
 643                 */
 644                sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
 645                                             ip_hdr(skb)->saddr,
 646                                             th->source, ip_hdr(skb)->daddr,
 647                                             ntohs(th->source), inet_iif(skb));
 648                /* don't send rst if it can't find key */
 649                if (!sk1)
 650                        goto out;
 651
 652                key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
 653                                        &ip_hdr(skb)->saddr, AF_INET);
 654                if (!key)
 655                        goto out;
 656
 657
 658                genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
 659                if (genhash || memcmp(hash_location, newhash, 16) != 0)
 660                        goto out;
 661
 662        }
 663
 664        if (key) {
 665                rep.opt[0] = htonl((TCPOPT_NOP << 24) |
 666                                   (TCPOPT_NOP << 16) |
 667                                   (TCPOPT_MD5SIG << 8) |
 668                                   TCPOLEN_MD5SIG);
 669                /* Update length and the length the header thinks exists */
 670                arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 671                rep.th.doff = arg.iov[0].iov_len / 4;
 672
 673                tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
 674                                     key, ip_hdr(skb)->saddr,
 675                                     ip_hdr(skb)->daddr, &rep.th);
 676        }
 677#endif
 678        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 679                                      ip_hdr(skb)->saddr, /* XXX */
 680                                      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 681        arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 682        arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
 683
 684        /* When socket is gone, all binding information is lost.
 685         * routing might fail in this case. No choice here, if we choose to force
 686         * input interface, we will misroute in case of asymmetric route.
 687         */
 688        if (sk)
 689                arg.bound_dev_if = sk->sk_bound_dev_if;
 690
 691        BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
 692                     offsetof(struct inet_timewait_sock, tw_bound_dev_if));
 693
 694        arg.tos = ip_hdr(skb)->tos;
 695        local_bh_disable();
 696        ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
 697                              skb, &TCP_SKB_CB(skb)->header.h4.opt,
 698                              ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
 699                              &arg, arg.iov[0].iov_len);
 700
 701        __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
 702        __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
 703        local_bh_enable();
 704
 705#ifdef CONFIG_TCP_MD5SIG
 706out:
 707        rcu_read_unlock();
 708#endif
 709}
 710
 711/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
 712   outside socket context is ugly, certainly. What can I do?
 713 */
 714
 715static void tcp_v4_send_ack(struct net *net,
 716                            struct sk_buff *skb, u32 seq, u32 ack,
 717                            u32 win, u32 tsval, u32 tsecr, int oif,
 718                            struct tcp_md5sig_key *key,
 719                            int reply_flags, u8 tos)
 720{
 721        const struct tcphdr *th = tcp_hdr(skb);
 722        struct {
 723                struct tcphdr th;
 724                __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
 725#ifdef CONFIG_TCP_MD5SIG
 726                           + (TCPOLEN_MD5SIG_ALIGNED >> 2)
 727#endif
 728                        ];
 729        } rep;
 730        struct ip_reply_arg arg;
 731
 732        memset(&rep.th, 0, sizeof(struct tcphdr));
 733        memset(&arg, 0, sizeof(arg));
 734
 735        arg.iov[0].iov_base = (unsigned char *)&rep;
 736        arg.iov[0].iov_len  = sizeof(rep.th);
 737        if (tsecr) {
 738                rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 739                                   (TCPOPT_TIMESTAMP << 8) |
 740                                   TCPOLEN_TIMESTAMP);
 741                rep.opt[1] = htonl(tsval);
 742                rep.opt[2] = htonl(tsecr);
 743                arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
 744        }
 745
 746        /* Swap the send and the receive. */
 747        rep.th.dest    = th->source;
 748        rep.th.source  = th->dest;
 749        rep.th.doff    = arg.iov[0].iov_len / 4;
 750        rep.th.seq     = htonl(seq);
 751        rep.th.ack_seq = htonl(ack);
 752        rep.th.ack     = 1;
 753        rep.th.window  = htons(win);
 754
 755#ifdef CONFIG_TCP_MD5SIG
 756        if (key) {
 757                int offset = (tsecr) ? 3 : 0;
 758
 759                rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
 760                                          (TCPOPT_NOP << 16) |
 761                                          (TCPOPT_MD5SIG << 8) |
 762                                          TCPOLEN_MD5SIG);
 763                arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 764                rep.th.doff = arg.iov[0].iov_len/4;
 765
 766                tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
 767                                    key, ip_hdr(skb)->saddr,
 768                                    ip_hdr(skb)->daddr, &rep.th);
 769        }
 770#endif
 771        arg.flags = reply_flags;
 772        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 773                                      ip_hdr(skb)->saddr, /* XXX */
 774                                      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 775        arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 776        if (oif)
 777                arg.bound_dev_if = oif;
 778        arg.tos = tos;
 779        local_bh_disable();
 780        ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
 781                              skb, &TCP_SKB_CB(skb)->header.h4.opt,
 782                              ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
 783                              &arg, arg.iov[0].iov_len);
 784
 785        __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
 786        local_bh_enable();
 787}
 788
 789static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 790{
 791        struct inet_timewait_sock *tw = inet_twsk(sk);
 792        struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 793
 794        tcp_v4_send_ack(sock_net(sk), skb,
 795                        tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 796                        tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
 797                        tcp_time_stamp + tcptw->tw_ts_offset,
 798                        tcptw->tw_ts_recent,
 799                        tw->tw_bound_dev_if,
 800                        tcp_twsk_md5_key(tcptw),
 801                        tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
 802                        tw->tw_tos
 803                        );
 804
 805        inet_twsk_put(tw);
 806}
 807
 808static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 809                                  struct request_sock *req)
 810{
 811        /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
 812         * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
 813         */
 814        u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
 815                                             tcp_sk(sk)->snd_nxt;
 816
 817        /* RFC 7323 2.3
 818         * The window field (SEG.WND) of every outgoing segment, with the
 819         * exception of <SYN> segments, MUST be right-shifted by
 820         * Rcv.Wind.Shift bits:
 821         */
 822        tcp_v4_send_ack(sock_net(sk), skb, seq,
 823                        tcp_rsk(req)->rcv_nxt,
 824                        req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
 825                        tcp_time_stamp,
 826                        req->ts_recent,
 827                        0,
 828                        tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
 829                                          AF_INET),
 830                        inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
 831                        ip_hdr(skb)->tos);
 832}
 833
 834/*
 835 *      Send a SYN-ACK after having received a SYN.
 836 *      This still operates on a request_sock only, not on a big
 837 *      socket.
 838 */
 839static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
 840                              struct flowi *fl,
 841                              struct request_sock *req,
 842                              struct tcp_fastopen_cookie *foc,
 843                              enum tcp_synack_type synack_type)
 844{
 845        const struct inet_request_sock *ireq = inet_rsk(req);
 846        struct flowi4 fl4;
 847        int err = -1;
 848        struct sk_buff *skb;
 849
 850        /* First, grab a route. */
 851        if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
 852                return -1;
 853
 854        skb = tcp_make_synack(sk, dst, req, foc, synack_type);
 855
 856        if (skb) {
 857                __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
 858
 859                err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
 860                                            ireq->ir_rmt_addr,
 861                                            ireq->opt);
 862                err = net_xmit_eval(err);
 863        }
 864
 865        return err;
 866}
 867
 868/*
 869 *      IPv4 request_sock destructor.
 870 */
 871static void tcp_v4_reqsk_destructor(struct request_sock *req)
 872{
 873        kfree(inet_rsk(req)->opt);
 874}
 875
 876#ifdef CONFIG_TCP_MD5SIG
 877/*
 878 * RFC2385 MD5 checksumming requires a mapping of
 879 * IP address->MD5 Key.
 880 * We need to maintain these in the sk structure.
 881 */
 882
 883/* Find the Key structure for an address.  */
 884struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
 885                                         const union tcp_md5_addr *addr,
 886                                         int family)
 887{
 888        const struct tcp_sock *tp = tcp_sk(sk);
 889        struct tcp_md5sig_key *key;
 890        unsigned int size = sizeof(struct in_addr);
 891        const struct tcp_md5sig_info *md5sig;
 892
 893        /* caller either holds rcu_read_lock() or socket lock */
 894        md5sig = rcu_dereference_check(tp->md5sig_info,
 895                                       lockdep_sock_is_held(sk));
 896        if (!md5sig)
 897                return NULL;
 898#if IS_ENABLED(CONFIG_IPV6)
 899        if (family == AF_INET6)
 900                size = sizeof(struct in6_addr);
 901#endif
 902        hlist_for_each_entry_rcu(key, &md5sig->head, node) {
 903                if (key->family != family)
 904                        continue;
 905                if (!memcmp(&key->addr, addr, size))
 906                        return key;
 907        }
 908        return NULL;
 909}
 910EXPORT_SYMBOL(tcp_md5_do_lookup);
 911
 912struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
 913                                         const struct sock *addr_sk)
 914{
 915        const union tcp_md5_addr *addr;
 916
 917        addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
 918        return tcp_md5_do_lookup(sk, addr, AF_INET);
 919}
 920EXPORT_SYMBOL(tcp_v4_md5_lookup);
 921
 922/* This can be called on a newly created socket, from other files */
 923int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
 924                   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
 925{
 926        /* Add Key to the list */
 927        struct tcp_md5sig_key *key;
 928        struct tcp_sock *tp = tcp_sk(sk);
 929        struct tcp_md5sig_info *md5sig;
 930
 931        key = tcp_md5_do_lookup(sk, addr, family);
 932        if (key) {
 933                /* Pre-existing entry - just update that one. */
 934                memcpy(key->key, newkey, newkeylen);
 935                key->keylen = newkeylen;
 936                return 0;
 937        }
 938
 939        md5sig = rcu_dereference_protected(tp->md5sig_info,
 940                                           lockdep_sock_is_held(sk));
 941        if (!md5sig) {
 942                md5sig = kmalloc(sizeof(*md5sig), gfp);
 943                if (!md5sig)
 944                        return -ENOMEM;
 945
 946                sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 947                INIT_HLIST_HEAD(&md5sig->head);
 948                rcu_assign_pointer(tp->md5sig_info, md5sig);
 949        }
 950
 951        key = sock_kmalloc(sk, sizeof(*key), gfp);
 952        if (!key)
 953                return -ENOMEM;
 954        if (!tcp_alloc_md5sig_pool()) {
 955                sock_kfree_s(sk, key, sizeof(*key));
 956                return -ENOMEM;
 957        }
 958
 959        memcpy(key->key, newkey, newkeylen);
 960        key->keylen = newkeylen;
 961        key->family = family;
 962        memcpy(&key->addr, addr,
 963               (family == AF_INET6) ? sizeof(struct in6_addr) :
 964                                      sizeof(struct in_addr));
 965        hlist_add_head_rcu(&key->node, &md5sig->head);
 966        return 0;
 967}
 968EXPORT_SYMBOL(tcp_md5_do_add);
 969
 970int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
 971{
 972        struct tcp_md5sig_key *key;
 973
 974        key = tcp_md5_do_lookup(sk, addr, family);
 975        if (!key)
 976                return -ENOENT;
 977        hlist_del_rcu(&key->node);
 978        atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
 979        kfree_rcu(key, rcu);
 980        return 0;
 981}
 982EXPORT_SYMBOL(tcp_md5_do_del);
 983
 984static void tcp_clear_md5_list(struct sock *sk)
 985{
 986        struct tcp_sock *tp = tcp_sk(sk);
 987        struct tcp_md5sig_key *key;
 988        struct hlist_node *n;
 989        struct tcp_md5sig_info *md5sig;
 990
 991        md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
 992
 993        hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
 994                hlist_del_rcu(&key->node);
 995                atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
 996                kfree_rcu(key, rcu);
 997        }
 998}
 999
1000static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,

1001                                 int optlen)
1002{
1003        struct tcp_md5sig cmd;
1004        struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1005
1006        if (optlen < sizeof(cmd))
1007                return -EINVAL;
1008
1009        if (copy_from_user(&cmd, optval, sizeof(cmd)))
1010                return -EFAULT;
1011
1012        if (sin->sin_family != AF_INET)
1013                return -EINVAL;
1014
1015        if (!cmd.tcpm_keylen)
1016                return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1017                                      AF_INET);
1018
1019        if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1020                return -EINVAL;
1021
1022        return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1023                              AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1024                              GFP_KERNEL);
1025}
1026
1027static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
1028                                   __be32 daddr, __be32 saddr,
1029                                   const struct tcphdr *th, int nbytes)
1030{
1031        struct tcp4_pseudohdr *bp;
1032        struct scatterlist sg;
1033        struct tcphdr *_th;
1034
1035        bp = hp->scratch;
1036        bp->saddr = saddr;
1037        bp->daddr = daddr;
1038        bp->pad = 0;
1039        bp->protocol = IPPROTO_TCP;
1040        bp->len = cpu_to_be16(nbytes);
1041
1042        _th = (struct tcphdr *)(bp + 1);
1043        memcpy(_th, th, sizeof(*th));
1044        _th->check = 0;
1045
1046        sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
1047        ahash_request_set_crypt(hp->md5_req, &sg, NULL,
1048                                sizeof(*bp) + sizeof(*th));
1049        return crypto_ahash_update(hp->md5_req);
1050}
1051
1052static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1053                               __be32 daddr, __be32 saddr, const struct tcphdr *th)
1054{
1055        struct tcp_md5sig_pool *hp;
1056        struct ahash_request *req;
1057
1058        hp = tcp_get_md5sig_pool();
1059        if (!hp)
1060                goto clear_hash_noput;
1061        req = hp->md5_req;
1062
1063        if (crypto_ahash_init(req))
1064                goto clear_hash;
1065        if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
1066                goto clear_hash;
1067        if (tcp_md5_hash_key(hp, key))
1068                goto clear_hash;
1069        ahash_request_set_crypt(req, NULL, md5_hash, 0);
1070        if (crypto_ahash_final(req))
1071                goto clear_hash;
1072
1073        tcp_put_md5sig_pool();
1074        return 0;
1075
1076clear_hash:
1077        tcp_put_md5sig_pool();
1078clear_hash_noput:
1079        memset(md5_hash, 0, 16);
1080        return 1;
1081}
1082
1083int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1084                        const struct sock *sk,
1085                        const struct sk_buff *skb)
1086{
1087        struct tcp_md5sig_pool *hp;
1088        struct ahash_request *req;
1089        const struct tcphdr *th = tcp_hdr(skb);
1090        __be32 saddr, daddr;
1091
1092        if (sk) { /* valid for establish/request sockets */
1093                saddr = sk->sk_rcv_saddr;
1094                daddr = sk->sk_daddr;
1095        } else {
1096                const struct iphdr *iph = ip_hdr(skb);
1097                saddr = iph->saddr;
1098                daddr = iph->daddr;
1099        }
1100
1101        hp = tcp_get_md5sig_pool();
1102        if (!hp)
1103                goto clear_hash_noput;
1104        req = hp->md5_req;
1105
1106        if (crypto_ahash_init(req))
1107                goto clear_hash;
1108
1109        if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
1110                goto clear_hash;
1111        if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1112                goto clear_hash;
1113        if (tcp_md5_hash_key(hp, key))
1114                goto clear_hash;
1115        ahash_request_set_crypt(req, NULL, md5_hash, 0);
1116        if (crypto_ahash_final(req))
1117                goto clear_hash;
1118
1119        tcp_put_md5sig_pool();
1120        return 0;
1121
1122clear_hash:
1123        tcp_put_md5sig_pool();
1124clear_hash_noput:
1125        memset(md5_hash, 0, 16);
1126        return 1;
1127}
1128EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1129
1130#endif
1131
1132/* Called with rcu_read_lock() */
1133static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
1134                                    const struct sk_buff *skb)
1135{
1136#ifdef CONFIG_TCP_MD5SIG
1137        /*
1138         * This gets called for each TCP segment that arrives
1139         * so we want to be efficient.
1140         * We have 3 drop cases:
1141         * o No MD5 hash and one expected.
1142         * o MD5 hash and we're not expecting one.
1143         * o MD5 hash and its wrong.
1144         */
1145        const __u8 *hash_location = NULL;
1146        struct tcp_md5sig_key *hash_expected;
1147        const struct iphdr *iph = ip_hdr(skb);
1148        const struct tcphdr *th = tcp_hdr(skb);
1149        int genhash;
1150        unsigned char newhash[16];
1151
1152        hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1153                                          AF_INET);
1154        hash_location = tcp_parse_md5sig_option(th);
1155
1156        /* We've parsed the options - do we have a hash? */
1157        if (!hash_expected && !hash_location)
1158                return false;
1159
1160        if (hash_expected && !hash_location) {
1161                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1162                return true;
1163        }
1164
1165        if (!hash_expected && hash_location) {
1166                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1167                return true;
1168        }
1169
1170        /* Okay, so this is hash_expected and hash_location -
1171         * so we need to calculate the checksum.
1172         */
1173        genhash = tcp_v4_md5_hash_skb(newhash,
1174                                      hash_expected,
1175                                      NULL, skb);
1176
1177        if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1178                net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1179                                     &iph->saddr, ntohs(th->source),
1180                                     &iph->daddr, ntohs(th->dest),
1181                                     genhash ? " tcp_v4_calc_md5_hash failed"
1182                                     : "");
1183                return true;
1184        }
1185        return false;
1186#endif
1187        return false;
1188}
1189
1190static void tcp_v4_init_req(struct request_sock *req,
1191                            const struct sock *sk_listener,
1192                            struct sk_buff *skb)
1193{
1194        struct inet_request_sock *ireq = inet_rsk(req);
1195
1196        sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
1197        sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
1198        ireq->no_srccheck = inet_sk(sk_listener)->transparent;
1199        ireq->opt = tcp_v4_save_options(skb);
1200}
1201
1202static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
1203                                          struct flowi *fl,
1204                                          const struct request_sock *req,
1205                                          bool *strict)
1206{
1207        struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
1208
1209        if (strict) {
1210                if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
1211                        *strict = true;
1212                else
1213                        *strict = false;
1214        }
1215
1216        return dst;
1217}
1218
1219struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1220        .family         =       PF_INET,
1221        .obj_size       =       sizeof(struct tcp_request_sock),
1222        .rtx_syn_ack    =       tcp_rtx_synack,
1223        .send_ack       =       tcp_v4_reqsk_send_ack,
1224        .destructor     =       tcp_v4_reqsk_destructor,
1225        .send_reset     =       tcp_v4_send_reset,
1226        .syn_ack_timeout =      tcp_syn_ack_timeout,
1227};
1228
1229static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1230        .mss_clamp      =       TCP_MSS_DEFAULT,
1231#ifdef CONFIG_TCP_MD5SIG
1232        .req_md5_lookup =       tcp_v4_md5_lookup,
1233        .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1234#endif
1235        .init_req       =       tcp_v4_init_req,
1236#ifdef CONFIG_SYN_COOKIES
1237        .cookie_init_seq =      cookie_v4_init_sequence,
1238#endif
1239        .route_req      =       tcp_v4_route_req,
1240        .init_seq       =       tcp_v4_init_sequence,
1241        .send_synack    =       tcp_v4_send_synack,
1242};
1243
1244int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1245{
1246        /* Never answer to SYNs send to broadcast or multicast */
1247        if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1248                goto drop;
1249
1250        return tcp_conn_request(&tcp_request_sock_ops,
1251                                &tcp_request_sock_ipv4_ops, sk, skb);
1252
1253drop:
1254        tcp_listendrop(sk);
1255        return 0;
1256}
1257EXPORT_SYMBOL(tcp_v4_conn_request);
1258
1259
1260/*
1261 * The three way handshake has completed - we got a valid synack -
1262 * now create the new socket.
1263 */
1264struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1265                                  struct request_sock *req,
1266                                  struct dst_entry *dst,
1267                                  struct request_sock *req_unhash,
1268                                  bool *own_req)
1269{
1270        struct inet_request_sock *ireq;
1271        struct inet_sock *newinet;
1272        struct tcp_sock *newtp;
1273        struct sock *newsk;
1274#ifdef CONFIG_TCP_MD5SIG
1275        struct tcp_md5sig_key *key;
1276#endif
1277        struct ip_options_rcu *inet_opt;
1278
1279        if (sk_acceptq_is_full(sk))
1280                goto exit_overflow;
1281
1282        newsk = tcp_create_openreq_child(sk, req, skb);
1283        if (!newsk)
1284                goto exit_nonewsk;
1285
1286        newsk->sk_gso_type = SKB_GSO_TCPV4;
1287        inet_sk_rx_dst_set(newsk, skb);
1288
1289        newtp                 = tcp_sk(newsk);
1290        newinet               = inet_sk(newsk);
1291        ireq                  = inet_rsk(req);
1292        sk_daddr_set(newsk, ireq->ir_rmt_addr);
1293        sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
1294        newsk->sk_bound_dev_if = ireq->ir_iif;
1295        newinet->inet_saddr           = ireq->ir_loc_addr;
1296        inet_opt              = ireq->opt;
1297        rcu_assign_pointer(newinet->inet_opt, inet_opt);
1298        ireq->opt             = NULL;
1299        newinet->mc_index     = inet_iif(skb);
1300        newinet->mc_ttl       = ip_hdr(skb)->ttl;
1301        newinet->rcv_tos      = ip_hdr(skb)->tos;
1302        inet_csk(newsk)->icsk_ext_hdr_len = 0;
1303        if (inet_opt)
1304                inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1305        newinet->inet_id = newtp->write_seq ^ jiffies;
1306
1307        if (!dst) {
1308                dst = inet_csk_route_child_sock(sk, newsk, req);
1309                if (!dst)
1310                        goto put_and_exit;
1311        } else {
1312                /* syncookie case : see end of cookie_v4_check() */
1313        }
1314        sk_setup_caps(newsk, dst);
1315
1316        tcp_ca_openreq_child(newsk, dst);
1317
1318        tcp_sync_mss(newsk, dst_mtu(dst));
1319        newtp->advmss = dst_metric_advmss(dst);
1320        if (tcp_sk(sk)->rx_opt.user_mss &&
1321            tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1322                newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1323
1324        tcp_initialize_rcv_mss(newsk);
1325
1326#ifdef CONFIG_TCP_MD5SIG
1327        /* Copy over the MD5 key from the original socket */
1328        key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1329                                AF_INET);
1330        if (key) {
1331                /*
1332                 * We're using one, so create a matching key
1333                 * on the newsk structure. If we fail to get
1334                 * memory, then we end up not copying the key
1335                 * across. Shucks.
1336                 */
1337                tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1338                               AF_INET, key->key, key->keylen, GFP_ATOMIC);
1339                sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1340        }
1341#endif
1342
1343        if (__inet_inherit_port(sk, newsk) < 0)
1344                goto put_and_exit;
1345        *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1346        if (*own_req)
1347                tcp_move_syn(newtp, req);
1348
1349        return newsk;
1350
1351exit_overflow:
1352        NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1353exit_nonewsk:
1354        dst_release(dst);
1355exit:
1356        tcp_listendrop(sk);
1357        return NULL;
1358put_and_exit:
1359        inet_csk_prepare_forced_close(newsk);
1360        tcp_done(newsk);
1361        goto exit;
1362}
1363EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1364
1365static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
1366{
1367#ifdef CONFIG_SYN_COOKIES
1368        const struct tcphdr *th = tcp_hdr(skb);
1369
1370        if (!th->syn)
1371                sk = cookie_v4_check(sk, skb);
1372#endif
1373        return sk;
1374}
1375
1376/* The socket must have it's spinlock held when we get
1377 * here, unless it is a TCP_LISTEN socket.
1378 *
1379 * We have a potential double-lock case here, so even when
1380 * doing backlog processing we use the BH locking scheme.
1381 * This is because we cannot sleep with the original spinlock
1382 * held.
1383 */
1384int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1385{
1386        struct sock *rsk;
1387
1388        if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1389                struct dst_entry *dst = sk->sk_rx_dst;
1390
1391                sock_rps_save_rxhash(sk, skb);
1392                sk_mark_napi_id(sk, skb);
1393                if (dst) {
1394                        if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1395                            !dst->ops->check(dst, 0)) {
1396                                dst_release(dst);
1397                                sk->sk_rx_dst = NULL;
1398                        }
1399                }
1400                tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1401                return 0;
1402        }
1403
1404        if (tcp_checksum_complete(skb))
1405                goto csum_err;
1406
1407        if (sk->sk_state == TCP_LISTEN) {
1408                struct sock *nsk = tcp_v4_cookie_check(sk, skb);
1409
1410                if (!nsk)
1411                        goto discard;
1412                if (nsk != sk) {
1413                        sock_rps_save_rxhash(nsk, skb);
1414                        sk_mark_napi_id(nsk, skb);
1415                        if (tcp_child_process(sk, nsk, skb)) {
1416                                rsk = nsk;
1417                                goto reset;
1418                        }
1419                        return 0;
1420                }
1421        } else
1422                sock_rps_save_rxhash(sk, skb);
1423
1424        if (tcp_rcv_state_process(sk, skb)) {
1425                rsk = sk;
1426                goto reset;
1427        }
1428        return 0;
1429
1430reset:
1431        tcp_v4_send_reset(rsk, skb);
1432discard:
1433        kfree_skb(skb);
1434        /* Be careful here. If this function gets more complicated and
1435         * gcc suffers from register pressure on the x86, sk (in %ebx)
1436         * might be destroyed here. This current version compiles correctly,
1437         * but you have been warned.
1438         */
1439        return 0;
1440
1441csum_err:
1442        TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1443        TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1444        goto discard;
1445}
1446EXPORT_SYMBOL(tcp_v4_do_rcv);
1447
1448void tcp_v4_early_demux(struct sk_buff *skb)
1449{
1450        const struct iphdr *iph;
1451        const struct tcphdr *th;
1452        struct sock *sk;
1453
1454        if (skb->pkt_type != PACKET_HOST)
1455                return;
1456
1457        if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1458                return;
1459
1460        iph = ip_hdr(skb);
1461        th = tcp_hdr(skb);
1462
1463        if (th->doff < sizeof(struct tcphdr) / 4)
1464                return;
1465
1466        sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1467                                       iph->saddr, th->source,
1468                                       iph->daddr, ntohs(th->dest),
1469                                       skb->skb_iif);
1470        if (sk) {
1471                skb->sk = sk;
1472                skb->destructor = sock_edemux;
1473                if (sk_fullsock(sk)) {
1474                        struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1475
1476                        if (dst)
1477                                dst = dst_check(dst, 0);
1478                        if (dst &&
1479                            inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1480                                skb_dst_set_noref(skb, dst);
1481                }
1482        }
1483}
1484
1485/* Packet is added to VJ-style prequeue for processing in process
1486 * context, if a reader task is waiting. Apparently, this exciting
1487 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1488 * failed somewhere. Latency? Burstiness? Well, at least now we will
1489 * see, why it failed. 8)8)                               --ANK
1490 *
1491 */
1492bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1493{
1494        struct tcp_sock *tp = tcp_sk(sk);
1495
1496        if (sysctl_tcp_low_latency || !tp->ucopy.task)
1497                return false;
1498
1499        if (skb->len <= tcp_hdrlen(skb) &&
1500            skb_queue_len(&tp->ucopy.prequeue) == 0)
1501                return false;
1502
1503        /* Before escaping RCU protected region, we need to take care of skb
1504         * dst. Prequeue is only enabled for established sockets.
1505         * For such sockets, we might need the skb dst only to set sk->sk_rx_dst
1506         * Instead of doing full sk_rx_dst validity here, let's perform
1507         * an optimistic check.
1508         */
1509        if (likely(sk->sk_rx_dst))
1510                skb_dst_drop(skb);
1511        else
1512                skb_dst_force_safe(skb);
1513
1514        __skb_queue_tail(&tp->ucopy.prequeue, skb);
1515        tp->ucopy.memory += skb->truesize;
1516        if (skb_queue_len(&tp->ucopy.prequeue) >= 32 ||
1517            tp->ucopy.memory + atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
1518                struct sk_buff *skb1;
1519
1520                BUG_ON(sock_owned_by_user(sk));
1521                __NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED,
1522                                skb_queue_len(&tp->ucopy.prequeue));
1523
1524                while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
1525                        sk_backlog_rcv(sk, skb1);
1526
1527                tp->ucopy.memory = 0;
1528        } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1529                wake_up_interruptible_sync_poll(sk_sleep(sk),
1530                                           POLLIN | POLLRDNORM | POLLRDBAND);
1531                if (!inet_csk_ack_scheduled(sk))
1532                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1533                                                  (3 * tcp_rto_min(sk)) / 4,
1534                                                  TCP_RTO_MAX);
1535        }
1536        return true;
1537}
1538EXPORT_SYMBOL(tcp_prequeue);
1539
1540/*
1541 *      From tcp_input.c
1542 */
1543
1544int tcp_v4_rcv(struct sk_buff *skb)
1545{
1546        struct net *net = dev_net(skb->dev);
1547        const struct iphdr *iph;
1548        const struct tcphdr *th;
1549        bool refcounted;
1550        struct sock *sk;
1551        int ret;
1552
1553        if (skb->pkt_type != PACKET_HOST)
1554                goto discard_it;
1555
1556        /* Count it even if it's bad */
1557        __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1558
1559        if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1560                goto discard_it;
1561
1562        th = (const struct tcphdr *)skb->data;
1563
1564        if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
1565                goto bad_packet;
1566        if (!pskb_may_pull(skb, th->doff * 4))
1567                goto discard_it;
1568
1569        /* An explanation is required here, I think.
1570         * Packet length and doff are validated by header prediction,
1571         * provided case of th->doff==0 is eliminated.
1572         * So, we defer the checks. */
1573
1574        if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
1575                goto csum_error;
1576
1577        th = (const struct tcphdr *)skb->data;
1578        iph = ip_hdr(skb);
1579        /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
1580         * barrier() makes sure compiler wont play fool^Waliasing games.
1581         */
1582        memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1583                sizeof(struct inet_skb_parm));
1584        barrier();
1585
1586        TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1587        TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1588                                    skb->len - th->doff * 4);
1589        TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1590        TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1591        TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1592        TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1593        TCP_SKB_CB(skb)->sacked  = 0;
1594
1595lookup:
1596        sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
1597                               th->dest, &refcounted);
1598        if (!sk)
1599                goto no_tcp_socket;
1600
1601process:
1602        if (sk->sk_state == TCP_TIME_WAIT)
1603                goto do_time_wait;
1604
1605        if (sk->sk_state == TCP_NEW_SYN_RECV) {
1606                struct request_sock *req = inet_reqsk(sk);
1607                struct sock *nsk;
1608
1609                sk = req->rsk_listener;
1610                if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
1611                        reqsk_put(req);
1612                        goto discard_it;
1613                }
1614                if (unlikely(sk->sk_state != TCP_LISTEN)) {
1615                        inet_csk_reqsk_queue_drop_and_put(sk, req);
1616                        goto lookup;
1617                }
1618                /* We own a reference on the listener, increase it again
1619                 * as we might lose it too soon.
1620                 */
1621                sock_hold(sk);
1622                refcounted = true;
1623                nsk = tcp_check_req(sk, skb, req, false);
1624                if (!nsk) {
1625                        reqsk_put(req);
1626                        goto discard_and_relse;
1627                }
1628                if (nsk == sk) {
1629                        reqsk_put(req);
1630                } else if (tcp_child_process(sk, nsk, skb)) {
1631                        tcp_v4_send_reset(nsk, skb);
1632                        goto discard_and_relse;
1633                } else {
1634                        sock_put(sk);
1635                        return 0;
1636                }
1637        }
1638        if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1639                __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1640                goto discard_and_relse;
1641        }
1642
1643        if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1644                goto discard_and_relse;
1645
1646        if (tcp_v4_inbound_md5_hash(sk, skb))
1647                goto discard_and_relse;
1648
1649        nf_reset(skb);
1650
1651        if (sk_filter(sk, skb))
1652                goto discard_and_relse;
1653
1654        skb->dev = NULL;
1655
1656        if (sk->sk_state == TCP_LISTEN) {
1657                ret = tcp_v4_do_rcv(sk, skb);
1658                goto put_and_return;
1659        }
1660
1661        sk_incoming_cpu_update(sk);
1662
1663        bh_lock_sock_nested(sk);
1664        tcp_segs_in(tcp_sk(sk), skb);
1665        ret = 0;
1666        if (!sock_owned_by_user(sk)) {
1667                if (!tcp_prequeue(sk, skb))
1668                        ret = tcp_v4_do_rcv(sk, skb);
1669        } else if (unlikely(sk_add_backlog(sk, skb,
1670                                           sk->sk_rcvbuf + sk->sk_sndbuf))) {
1671                bh_unlock_sock(sk);
1672                __NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP);
1673                goto discard_and_relse;
1674        }
1675        bh_unlock_sock(sk);
1676
1677put_and_return:
1678        if (refcounted)
1679                sock_put(sk);
1680
1681        return ret;
1682
1683no_tcp_socket:
1684        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1685                goto discard_it;
1686
1687        if (tcp_checksum_complete(skb)) {
1688csum_error:
1689                __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1690bad_packet:
1691                __TCP_INC_STATS(net, TCP_MIB_INERRS);
1692        } else {
1693                tcp_v4_send_reset(NULL, skb);
1694        }
1695
1696discard_it:
1697        /* Discard frame. */
1698        kfree_skb(skb);
1699        return 0;
1700
1701discard_and_relse:
1702        sk_drops_add(sk, skb);
1703        if (refcounted)
1704                sock_put(sk);
1705        goto discard_it;
1706
1707do_time_wait:
1708        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1709                inet_twsk_put(inet_twsk(sk));
1710                goto discard_it;
1711        }
1712
1713        if (tcp_checksum_complete(skb)) {
1714                inet_twsk_put(inet_twsk(sk));
1715                goto csum_error;
1716        }
1717        switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1718        case TCP_TW_SYN: {
1719                struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1720                                                        &tcp_hashinfo, skb,
1721                                                        __tcp_hdrlen(th),
1722                                                        iph->saddr, th->source,
1723                                                        iph->daddr, th->dest,
1724                                                        inet_iif(skb));
1725                if (sk2) {
1726                        inet_twsk_deschedule_put(inet_twsk(sk));
1727                        sk = sk2;
1728                        refcounted = false;
1729                        goto process;
1730                }
1731                /* Fall through to ACK */
1732        }
1733        case TCP_TW_ACK:
1734                tcp_v4_timewait_ack(sk, skb);
1735                break;
1736        case TCP_TW_RST:
1737                tcp_v4_send_reset(sk, skb);
1738                inet_twsk_deschedule_put(inet_twsk(sk));
1739                goto discard_it;
1740        case TCP_TW_SUCCESS:;
1741        }
1742        goto discard_it;
1743}
1744
1745static struct timewait_sock_ops tcp_timewait_sock_ops = {
1746        .twsk_obj_size  = sizeof(struct tcp_timewait_sock),
1747        .twsk_unique    = tcp_twsk_unique,
1748        .twsk_destructor= tcp_twsk_destructor,
1749};
1750
1751void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
1752{
1753        struct dst_entry *dst = skb_dst(skb);
1754
1755        if (dst && dst_hold_safe(dst)) {
1756                sk->sk_rx_dst = dst;
1757                inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1758        }
1759}
1760EXPORT_SYMBOL(inet_sk_rx_dst_set);
1761
1762const struct inet_connection_sock_af_ops ipv4_specific = {
1763        .queue_xmit        = ip_queue_xmit,
1764        .send_check        = tcp_v4_send_check,
1765        .rebuild_header    = inet_sk_rebuild_header,
1766        .sk_rx_dst_set     = inet_sk_rx_dst_set,
1767        .conn_request      = tcp_v4_conn_request,
1768        .syn_recv_sock     = tcp_v4_syn_recv_sock,
1769        .net_header_len    = sizeof(struct iphdr),
1770        .setsockopt        = ip_setsockopt,
1771        .getsockopt        = ip_getsockopt,
1772        .addr2sockaddr     = inet_csk_addr2sockaddr,
1773        .sockaddr_len      = sizeof(struct sockaddr_in),
1774        .bind_conflict     = inet_csk_bind_conflict,
1775#ifdef CONFIG_COMPAT
1776        .compat_setsockopt = compat_ip_setsockopt,
1777        .compat_getsockopt = compat_ip_getsockopt,
1778#endif
1779        .mtu_reduced       = tcp_v4_mtu_reduced,
1780};
1781EXPORT_SYMBOL(ipv4_specific);
1782
1783#ifdef CONFIG_TCP_MD5SIG
1784static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1785        .md5_lookup             = tcp_v4_md5_lookup,
1786        .calc_md5_hash          = tcp_v4_md5_hash_skb,
1787        .md5_parse              = tcp_v4_parse_md5_keys,
1788};
1789#endif
1790
1791/* NOTE: A lot of things set to zero explicitly by call to
1792 *       sk_alloc() so need not be done here.
1793 */
1794static int tcp_v4_init_sock(struct sock *sk)
1795{
1796        struct inet_connection_sock *icsk = inet_csk(sk);
1797
1798        tcp_init_sock(sk);
1799
1800        icsk->icsk_af_ops = &ipv4_specific;
1801
1802#ifdef CONFIG_TCP_MD5SIG
1803        tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1804#endif
1805
1806        return 0;
1807}
1808
1809void tcp_v4_destroy_sock(struct sock *sk)
1810{
1811        struct tcp_sock *tp = tcp_sk(sk);
1812
1813        tcp_clear_xmit_timers(sk);
1814
1815        tcp_cleanup_congestion_control(sk);
1816
1817        /* Cleanup up the write buffer. */
1818        tcp_write_queue_purge(sk);
1819
1820        /* Cleans up our, hopefully empty, out_of_order_queue. */
1821        __skb_queue_purge(&tp->out_of_order_queue);
1822
1823#ifdef CONFIG_TCP_MD5SIG
1824        /* Clean up the MD5 key list, if any */
1825        if (tp->md5sig_info) {
1826                tcp_clear_md5_list(sk);
1827                kfree_rcu(tp->md5sig_info, rcu);
1828                tp->md5sig_info = NULL;
1829        }
1830#endif
1831
1832        /* Clean prequeue, it must be empty really */
1833        __skb_queue_purge(&tp->ucopy.prequeue);
1834
1835        /* Clean up a referenced TCP bind bucket. */
1836        if (inet_csk(sk)->icsk_bind_hash)
1837                inet_put_port(sk);
1838
1839        BUG_ON(tp->fastopen_rsk);
1840
1841        /* If socket is aborted during connect operation */
1842        tcp_free_fastopen_req(tp);
1843        tcp_saved_syn_free(tp);
1844
1845        local_bh_disable();
1846        sk_sockets_allocated_dec(sk);
1847        local_bh_enable();
1848
1849        if (mem_cgroup_sockets_enabled && sk->sk_memcg)
1850                sock_release_memcg(sk);
1851}
1852EXPORT_SYMBOL(tcp_v4_destroy_sock);
1853
1854#ifdef CONFIG_PROC_FS
1855/* Proc filesystem TCP sock list dumping. */
1856
1857/*
1858 * Get next listener socket follow cur.  If cur is NULL, get first socket
1859 * starting from bucket given in st->bucket; when st->bucket is zero the
1860 * very first socket in the hash table is returned.
1861 */
1862static void *listening_get_next(struct seq_file *seq, void *cur)
1863{
1864        struct tcp_iter_state *st = seq->private;
1865        struct net *net = seq_file_net(seq);
1866        struct inet_listen_hashbucket *ilb;
1867        struct inet_connection_sock *icsk;
1868        struct sock *sk = cur;
1869
1870        if (!sk) {
1871get_head:
1872                ilb = &tcp_hashinfo.listening_hash[st->bucket];
1873                spin_lock_bh(&ilb->lock);
1874                sk = sk_head(&ilb->head);
1875                st->offset = 0;
1876                goto get_sk;
1877        }
1878        ilb = &tcp_hashinfo.listening_hash[st->bucket];
1879        ++st->num;
1880        ++st->offset;
1881
1882        sk = sk_next(sk);
1883get_sk:
1884        sk_for_each_from(sk) {
1885                if (!net_eq(sock_net(sk), net))
1886                        continue;
1887                if (sk->sk_family == st->family)
1888                        return sk;
1889                icsk = inet_csk(sk);
1890        }
1891        spin_unlock_bh(&ilb->lock);
1892        st->offset = 0;
1893        if (++st->bucket < INET_LHTABLE_SIZE)
1894                goto get_head;
1895        return NULL;
1896}
1897
1898static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1899{
1900        struct tcp_iter_state *st = seq->private;
1901        void *rc;
1902
1903        st->bucket = 0;
1904        st->offset = 0;
1905        rc = listening_get_next(seq, NULL);
1906
1907        while (rc && *pos) {
1908                rc = listening_get_next(seq, rc);
1909                --*pos;
1910        }
1911        return rc;
1912}
1913
1914static inline bool empty_bucket(const struct tcp_iter_state *st)
1915{
1916        return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
1917}
1918
1919/*
1920 * Get first established socket starting from bucket given in st->bucket.
1921 * If st->bucket is zero, the very first socket in the hash is returned.
1922 */
1923static void *established_get_first(struct seq_file *seq)
1924{
1925        struct tcp_iter_state *st = seq->private;
1926        struct net *net = seq_file_net(seq);
1927        void *rc = NULL;
1928
1929        st->offset = 0;
1930        for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1931                struct sock *sk;
1932                struct hlist_nulls_node *node;
1933                spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1934
1935                /* Lockless fast path for the common case of empty buckets */
1936                if (empty_bucket(st))
1937                        continue;
1938
1939                spin_lock_bh(lock);
1940                sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
1941                        if (sk->sk_family != st->family ||
1942                            !net_eq(sock_net(sk), net)) {
1943                                continue;
1944                        }
1945                        rc = sk;
1946                        goto out;
1947                }
1948                spin_unlock_bh(lock);
1949        }
1950out:
1951        return rc;
1952}
1953
1954static void *established_get_next(struct seq_file *seq, void *cur)
1955{
1956        struct sock *sk = cur;
1957        struct hlist_nulls_node *node;
1958        struct tcp_iter_state *st = seq->private;
1959        struct net *net = seq_file_net(seq);
1960
1961        ++st->num;
1962        ++st->offset;
1963
1964        sk = sk_nulls_next(sk);
1965
1966        sk_nulls_for_each_from(sk, node) {
1967                if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
1968                        return sk;
1969        }
1970
1971        spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1972        ++st->bucket;
1973        return established_get_first(seq);
1974}
1975
1976static void *established_get_idx(struct seq_file *seq, loff_t pos)
1977{
1978        struct tcp_iter_state *st = seq->private;
1979        void *rc;
1980
1981        st->bucket = 0;
1982        rc = established_get_first(seq);
1983
1984        while (rc && pos) {
1985                rc = established_get_next(seq, rc);
1986                --pos;
1987        }
1988        return rc;
1989}
1990
1991static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
1992{
1993        void *rc;
1994        struct tcp_iter_state *st = seq->private;
1995
1996        st->state = TCP_SEQ_STATE_LISTENING;
1997        rc        = listening_get_idx(seq, &pos);
1998
1999        if (!rc) {
2000                st->state = TCP_SEQ_STATE_ESTABLISHED;

2001                rc        = established_get_idx(seq, pos);
2002        }
2003
2004        return rc;
2005}
2006
2007static void *tcp_seek_last_pos(struct seq_file *seq)
2008{
2009        struct tcp_iter_state *st = seq->private;
2010        int offset = st->offset;
2011        int orig_num = st->num;
2012        void *rc = NULL;
2013
2014        switch (st->state) {
2015        case TCP_SEQ_STATE_LISTENING:
2016                if (st->bucket >= INET_LHTABLE_SIZE)
2017                        break;
2018                st->state = TCP_SEQ_STATE_LISTENING;
2019                rc = listening_get_next(seq, NULL);
2020                while (offset-- && rc)
2021                        rc = listening_get_next(seq, rc);
2022                if (rc)
2023                        break;
2024                st->bucket = 0;
2025                st->state = TCP_SEQ_STATE_ESTABLISHED;
2026                /* Fallthrough */
2027        case TCP_SEQ_STATE_ESTABLISHED:
2028                if (st->bucket > tcp_hashinfo.ehash_mask)
2029                        break;
2030                rc = established_get_first(seq);
2031                while (offset-- && rc)
2032                        rc = established_get_next(seq, rc);
2033        }
2034
2035        st->num = orig_num;
2036
2037        return rc;
2038}
2039
2040static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2041{
2042        struct tcp_iter_state *st = seq->private;
2043        void *rc;
2044
2045        if (*pos && *pos == st->last_pos) {
2046                rc = tcp_seek_last_pos(seq);
2047                if (rc)
2048                        goto out;
2049        }
2050
2051        st->state = TCP_SEQ_STATE_LISTENING;
2052        st->num = 0;
2053        st->bucket = 0;
2054        st->offset = 0;
2055        rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2056
2057out:
2058        st->last_pos = *pos;
2059        return rc;
2060}
2061
2062static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2063{
2064        struct tcp_iter_state *st = seq->private;
2065        void *rc = NULL;
2066
2067        if (v == SEQ_START_TOKEN) {
2068                rc = tcp_get_idx(seq, 0);
2069                goto out;
2070        }
2071
2072        switch (st->state) {
2073        case TCP_SEQ_STATE_LISTENING:
2074                rc = listening_get_next(seq, v);
2075                if (!rc) {
2076                        st->state = TCP_SEQ_STATE_ESTABLISHED;
2077                        st->bucket = 0;
2078                        st->offset = 0;
2079                        rc        = established_get_first(seq);
2080                }
2081                break;
2082        case TCP_SEQ_STATE_ESTABLISHED:
2083                rc = established_get_next(seq, v);
2084                break;
2085        }
2086out:
2087        ++*pos;
2088        st->last_pos = *pos;
2089        return rc;
2090}
2091
2092static void tcp_seq_stop(struct seq_file *seq, void *v)
2093{
2094        struct tcp_iter_state *st = seq->private;
2095
2096        switch (st->state) {
2097        case TCP_SEQ_STATE_LISTENING:
2098                if (v != SEQ_START_TOKEN)
2099                        spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2100                break;
2101        case TCP_SEQ_STATE_ESTABLISHED:
2102                if (v)
2103                        spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2104                break;
2105        }
2106}
2107
2108int tcp_seq_open(struct inode *inode, struct file *file)
2109{
2110        struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
2111        struct tcp_iter_state *s;
2112        int err;
2113
2114        err = seq_open_net(inode, file, &afinfo->seq_ops,
2115                          sizeof(struct tcp_iter_state));
2116        if (err < 0)
2117                return err;
2118
2119        s = ((struct seq_file *)file->private_data)->private;
2120        s->family               = afinfo->family;
2121        s->last_pos             = 0;
2122        return 0;
2123}
2124EXPORT_SYMBOL(tcp_seq_open);
2125
2126int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2127{
2128        int rc = 0;
2129        struct proc_dir_entry *p;
2130
2131        afinfo->seq_ops.start           = tcp_seq_start;
2132        afinfo->seq_ops.next            = tcp_seq_next;
2133        afinfo->seq_ops.stop            = tcp_seq_stop;
2134
2135        p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2136                             afinfo->seq_fops, afinfo);
2137        if (!p)
2138                rc = -ENOMEM;
2139        return rc;
2140}
2141EXPORT_SYMBOL(tcp_proc_register);
2142
2143void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2144{
2145        remove_proc_entry(afinfo->name, net->proc_net);
2146}
2147EXPORT_SYMBOL(tcp_proc_unregister);
2148
2149static void get_openreq4(const struct request_sock *req,
2150                         struct seq_file *f, int i)
2151{
2152        const struct inet_request_sock *ireq = inet_rsk(req);
2153        long delta = req->rsk_timer.expires - jiffies;
2154
2155        seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2156                " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
2157                i,
2158                ireq->ir_loc_addr,
2159                ireq->ir_num,
2160                ireq->ir_rmt_addr,
2161                ntohs(ireq->ir_rmt_port),
2162                TCP_SYN_RECV,
2163                0, 0, /* could print option size, but that is af dependent. */
2164                1,    /* timers active (only the expire timer) */
2165                jiffies_delta_to_clock_t(delta),
2166                req->num_timeout,
2167                from_kuid_munged(seq_user_ns(f),
2168                                 sock_i_uid(req->rsk_listener)),
2169                0,  /* non standard timer */
2170                0, /* open_requests have no inode */
2171                0,
2172                req);
2173}
2174
2175static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2176{
2177        int timer_active;
2178        unsigned long timer_expires;
2179        const struct tcp_sock *tp = tcp_sk(sk);
2180        const struct inet_connection_sock *icsk = inet_csk(sk);
2181        const struct inet_sock *inet = inet_sk(sk);
2182        const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2183        __be32 dest = inet->inet_daddr;
2184        __be32 src = inet->inet_rcv_saddr;
2185        __u16 destp = ntohs(inet->inet_dport);
2186        __u16 srcp = ntohs(inet->inet_sport);
2187        int rx_queue;
2188        int state;
2189
2190        if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2191            icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2192            icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2193                timer_active    = 1;
2194                timer_expires   = icsk->icsk_timeout;
2195        } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2196                timer_active    = 4;
2197                timer_expires   = icsk->icsk_timeout;
2198        } else if (timer_pending(&sk->sk_timer)) {
2199                timer_active    = 2;
2200                timer_expires   = sk->sk_timer.expires;
2201        } else {
2202                timer_active    = 0;
2203                timer_expires = jiffies;
2204        }
2205
2206        state = sk_state_load(sk);
2207        if (state == TCP_LISTEN)
2208                rx_queue = sk->sk_ack_backlog;
2209        else
2210                /* Because we don't lock the socket,
2211                 * we might find a transient negative value.
2212                 */
2213                rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2214
2215        seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2216                        "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
2217                i, src, srcp, dest, destp, state,
2218                tp->write_seq - tp->snd_una,
2219                rx_queue,
2220                timer_active,
2221                jiffies_delta_to_clock_t(timer_expires - jiffies),
2222                icsk->icsk_retransmits,
2223                from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
2224                icsk->icsk_probes_out,
2225                sock_i_ino(sk),
2226                atomic_read(&sk->sk_refcnt), sk,
2227                jiffies_to_clock_t(icsk->icsk_rto),
2228                jiffies_to_clock_t(icsk->icsk_ack.ato),
2229                (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2230                tp->snd_cwnd,
2231                state == TCP_LISTEN ?
2232                    fastopenq->max_qlen :
2233                    (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
2234}
2235
2236static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2237                               struct seq_file *f, int i)
2238{
2239        long delta = tw->tw_timer.expires - jiffies;
2240        __be32 dest, src;
2241        __u16 destp, srcp;
2242
2243        dest  = tw->tw_daddr;
2244        src   = tw->tw_rcv_saddr;
2245        destp = ntohs(tw->tw_dport);
2246        srcp  = ntohs(tw->tw_sport);
2247
2248        seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2249                " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
2250                i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2251                3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2252                atomic_read(&tw->tw_refcnt), tw);
2253}
2254
2255#define TMPSZ 150
2256
2257static int tcp4_seq_show(struct seq_file *seq, void *v)
2258{
2259        struct tcp_iter_state *st;
2260        struct sock *sk = v;
2261
2262        seq_setwidth(seq, TMPSZ - 1);
2263        if (v == SEQ_START_TOKEN) {
2264                seq_puts(seq, "  sl  local_address rem_address   st tx_queue "
2265                           "rx_queue tr tm->when retrnsmt   uid  timeout "
2266                           "inode");
2267                goto out;
2268        }
2269        st = seq->private;
2270
2271        if (sk->sk_state == TCP_TIME_WAIT)
2272                get_timewait4_sock(v, seq, st->num);
2273        else if (sk->sk_state == TCP_NEW_SYN_RECV)
2274                get_openreq4(v, seq, st->num);
2275        else
2276                get_tcp4_sock(v, seq, st->num);
2277out:
2278        seq_pad(seq, '\n');
2279        return 0;
2280}
2281
2282static const struct file_operations tcp_afinfo_seq_fops = {
2283        .owner   = THIS_MODULE,
2284        .open    = tcp_seq_open,
2285        .read    = seq_read,
2286        .llseek  = seq_lseek,
2287        .release = seq_release_net
2288};
2289
2290static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2291        .name           = "tcp",
2292        .family         = AF_INET,
2293        .seq_fops       = &tcp_afinfo_seq_fops,
2294        .seq_ops        = {
2295                .show           = tcp4_seq_show,
2296        },
2297};
2298
2299static int __net_init tcp4_proc_init_net(struct net *net)
2300{
2301        return tcp_proc_register(net, &tcp4_seq_afinfo);
2302}
2303
2304static void __net_exit tcp4_proc_exit_net(struct net *net)
2305{
2306        tcp_proc_unregister(net, &tcp4_seq_afinfo);
2307}
2308
2309static struct pernet_operations tcp4_net_ops = {
2310        .init = tcp4_proc_init_net,
2311        .exit = tcp4_proc_exit_net,
2312};
2313
2314int __init tcp4_proc_init(void)
2315{
2316        return register_pernet_subsys(&tcp4_net_ops);
2317}
2318
2319void tcp4_proc_exit(void)
2320{
2321        unregister_pernet_subsys(&tcp4_net_ops);
2322}
2323#endif /* CONFIG_PROC_FS */
2324
2325struct proto tcp_prot = {
2326        .name                   = "TCP",
2327        .owner                  = THIS_MODULE,
2328        .close                  = tcp_close,
2329        .connect                = tcp_v4_connect,
2330        .disconnect             = tcp_disconnect,
2331        .accept                 = inet_csk_accept,
2332        .ioctl                  = tcp_ioctl,
2333        .init                   = tcp_v4_init_sock,
2334        .destroy                = tcp_v4_destroy_sock,
2335        .shutdown               = tcp_shutdown,
2336        .setsockopt             = tcp_setsockopt,
2337        .getsockopt             = tcp_getsockopt,
2338        .recvmsg                = tcp_recvmsg,
2339        .sendmsg                = tcp_sendmsg,
2340        .sendpage               = tcp_sendpage,
2341        .backlog_rcv            = tcp_v4_do_rcv,
2342        .release_cb             = tcp_release_cb,
2343        .hash                   = inet_hash,
2344        .unhash                 = inet_unhash,
2345        .get_port               = inet_csk_get_port,
2346        .enter_memory_pressure  = tcp_enter_memory_pressure,
2347        .stream_memory_free     = tcp_stream_memory_free,
2348        .sockets_allocated      = &tcp_sockets_allocated,
2349        .orphan_count           = &tcp_orphan_count,
2350        .memory_allocated       = &tcp_memory_allocated,
2351        .memory_pressure        = &tcp_memory_pressure,
2352        .sysctl_mem             = sysctl_tcp_mem,
2353        .sysctl_wmem            = sysctl_tcp_wmem,
2354        .sysctl_rmem            = sysctl_tcp_rmem,
2355        .max_header             = MAX_TCP_HEADER,
2356        .obj_size               = sizeof(struct tcp_sock),
2357        .slab_flags             = SLAB_DESTROY_BY_RCU,
2358        .twsk_prot              = &tcp_timewait_sock_ops,
2359        .rsk_prot               = &tcp_request_sock_ops,
2360        .h.hashinfo             = &tcp_hashinfo,
2361        .no_autobind            = true,
2362#ifdef CONFIG_COMPAT
2363        .compat_setsockopt      = compat_tcp_setsockopt,
2364        .compat_getsockopt      = compat_tcp_getsockopt,
2365#endif
2366        .diag_destroy           = tcp_abort,
2367};
2368EXPORT_SYMBOL(tcp_prot);
2369
2370static void __net_exit tcp_sk_exit(struct net *net)
2371{
2372        int cpu;
2373
2374        for_each_possible_cpu(cpu)
2375                inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2376        free_percpu(net->ipv4.tcp_sk);
2377}
2378
2379static int __net_init tcp_sk_init(struct net *net)
2380{
2381        int res, cpu;
2382
2383        net->ipv4.tcp_sk = alloc_percpu(struct sock *);
2384        if (!net->ipv4.tcp_sk)
2385                return -ENOMEM;
2386
2387        for_each_possible_cpu(cpu) {
2388                struct sock *sk;
2389
2390                res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
2391                                           IPPROTO_TCP, net);
2392                if (res)
2393                        goto fail;
2394                sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2395                *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2396        }
2397
2398        net->ipv4.sysctl_tcp_ecn = 2;
2399        net->ipv4.sysctl_tcp_ecn_fallback = 1;
2400
2401        net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
2402        net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
2403        net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
2404
2405        net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
2406        net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
2407        net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
2408
2409        net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
2410        net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
2411        net->ipv4.sysctl_tcp_syncookies = 1;
2412        net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
2413        net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
2414        net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
2415        net->ipv4.sysctl_tcp_orphan_retries = 0;
2416        net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
2417        net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
2418
2419        return 0;
2420fail:
2421        tcp_sk_exit(net);
2422
2423        return res;
2424}
2425
2426static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2427{
2428        inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2429}
2430
2431static struct pernet_operations __net_initdata tcp_sk_ops = {
2432       .init       = tcp_sk_init,
2433       .exit       = tcp_sk_exit,
2434       .exit_batch = tcp_sk_exit_batch,
2435};
2436
2437void __init tcp_v4_init(void)
2438{
2439        inet_hashinfo_init(&tcp_hashinfo);
2440        if (register_pernet_subsys(&tcp_sk_ops))
2441                panic("Failed to create the TCP control socket.\n");
2442}
2443