linux/net/ipv4/tcp_ipv4.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              Implementation of the Transmission Control Protocol(TCP).
   7 *
   8 *              IPv4 specific functions
   9 *
  10 *
  11 *              code split from:
  12 *              linux/ipv4/tcp.c
  13 *              linux/ipv4/tcp_input.c
  14 *              linux/ipv4/tcp_output.c
  15 *
  16 *              See tcp.c for author information
  17 *
  18 *      This program is free software; you can redistribute it and/or
  19 *      modify it under the terms of the GNU General Public License
  20 *      as published by the Free Software Foundation; either version
  21 *      2 of the License, or (at your option) any later version.
  22 */
  23
  24/*
  25 * Changes:
  26 *              David S. Miller :       New socket lookup architecture.
  27 *                                      This code is dedicated to John Dyson.
  28 *              David S. Miller :       Change semantics of established hash,
  29 *                                      half is devoted to TIME_WAIT sockets
  30 *                                      and the rest go in the other half.
  31 *              Andi Kleen :            Add support for syncookies and fixed
  32 *                                      some bugs: ip options weren't passed to
  33 *                                      the TCP layer, missed a check for an
  34 *                                      ACK bit.
  35 *              Andi Kleen :            Implemented fast path mtu discovery.
  36 *                                      Fixed many serious bugs in the
  37 *                                      request_sock handling and moved
  38 *                                      most of it into the af independent code.
  39 *                                      Added tail drop and some other bugfixes.
  40 *                                      Added new listen semantics.
  41 *              Mike McLagan    :       Routing by source
  42 *      Juan Jose Ciarlante:            ip_dynaddr bits
  43 *              Andi Kleen:             various fixes.
  44 *      Vitaly E. Lavrov        :       Transparent proxy revived after year
  45 *                                      coma.
  46 *      Andi Kleen              :       Fix new listen.
  47 *      Andi Kleen              :       Fix accept error reporting.
  48 *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  49 *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  50 *                                      a single port at the same time.
  51 */
  52
  53#define pr_fmt(fmt) "TCP: " fmt
  54
  55#include <linux/bottom_half.h>
  56#include <linux/types.h>
  57#include <linux/fcntl.h>
  58#include <linux/module.h>
  59#include <linux/random.h>
  60#include <linux/cache.h>
  61#include <linux/jhash.h>
  62#include <linux/init.h>
  63#include <linux/times.h>
  64#include <linux/slab.h>
  65
  66#include <net/net_namespace.h>
  67#include <net/icmp.h>
  68#include <net/inet_hashtables.h>
  69#include <net/tcp.h>
  70#include <net/transp_v6.h>
  71#include <net/ipv6.h>
  72#include <net/inet_common.h>
  73#include <net/timewait_sock.h>
  74#include <net/xfrm.h>
  75#include <net/secure_seq.h>
  76#include <net/busy_poll.h>
  77
  78#include <linux/inet.h>
  79#include <linux/ipv6.h>
  80#include <linux/stddef.h>
  81#include <linux/proc_fs.h>
  82#include <linux/seq_file.h>
  83
  84#include <crypto/hash.h>
  85#include <linux/scatterlist.h>
  86
  87int sysctl_tcp_tw_reuse __read_mostly;
  88int sysctl_tcp_low_latency __read_mostly;
  89EXPORT_SYMBOL(sysctl_tcp_low_latency);
  90
  91#ifdef CONFIG_TCP_MD5SIG
  92static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
  93                               __be32 daddr, __be32 saddr, const struct tcphdr *th);
  94#endif
  95
  96struct inet_hashinfo tcp_hashinfo;
  97EXPORT_SYMBOL(tcp_hashinfo);
  98
  99static  __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
 100{
 101        return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
 102                                          ip_hdr(skb)->saddr,
 103                                          tcp_hdr(skb)->dest,
 104                                          tcp_hdr(skb)->source);
 105}
 106
 107int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 108{
 109        const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
 110        struct tcp_sock *tp = tcp_sk(sk);
 111
 112        /* With PAWS, it is safe from the viewpoint
 113           of data integrity. Even without PAWS it is safe provided sequence
 114           spaces do not overlap i.e. at data rates <= 80Mbit/sec.
 115
 116           Actually, the idea is close to VJ's one, only timestamp cache is
 117           held not per host, but per port pair and TW bucket is used as state
 118           holder.
 119
 120           If TW bucket has been already destroyed we fall back to VJ's scheme
 121           and use initial timestamp retrieved from peer table.
 122         */
 123        if (tcptw->tw_ts_recent_stamp &&
 124            (!twp || (sysctl_tcp_tw_reuse &&
 125                             get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
 126                tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
 127                if (tp->write_seq == 0)
 128                        tp->write_seq = 1;
 129                tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
 130                tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
 131                sock_hold(sktw);
 132                return 1;
 133        }
 134
 135        return 0;
 136}
 137EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 138
 139/* This will initiate an outgoing connection. */
 140int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 141{
 142        struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
 143        struct inet_sock *inet = inet_sk(sk);
 144        struct tcp_sock *tp = tcp_sk(sk);
 145        __be16 orig_sport, orig_dport;
 146        __be32 daddr, nexthop;
 147        struct flowi4 *fl4;
 148        struct rtable *rt;
 149        int err;
 150        struct ip_options_rcu *inet_opt;
 151
 152        if (addr_len < sizeof(struct sockaddr_in))
 153                return -EINVAL;
 154
 155        if (usin->sin_family != AF_INET)
 156                return -EAFNOSUPPORT;
 157
 158        nexthop = daddr = usin->sin_addr.s_addr;
 159        inet_opt = rcu_dereference_protected(inet->inet_opt,
 160                                             sock_owned_by_user(sk));
 161        if (inet_opt && inet_opt->opt.srr) {
 162                if (!daddr)
 163                        return -EINVAL;
 164                nexthop = inet_opt->opt.faddr;
 165        }
 166
 167        orig_sport = inet->inet_sport;
 168        orig_dport = usin->sin_port;
 169        fl4 = &inet->cork.fl.u.ip4;
 170        rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
 171                              RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
 172                              IPPROTO_TCP,
 173                              orig_sport, orig_dport, sk);
 174        if (IS_ERR(rt)) {
 175                err = PTR_ERR(rt);
 176                if (err == -ENETUNREACH)
 177                        IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
 178                return err;
 179        }
 180
 181        if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
 182                ip_rt_put(rt);
 183                return -ENETUNREACH;
 184        }
 185
 186        if (!inet_opt || !inet_opt->opt.srr)
 187                daddr = fl4->daddr;
 188
 189        if (!inet->inet_saddr)
 190                inet->inet_saddr = fl4->saddr;
 191        sk_rcv_saddr_set(sk, inet->inet_saddr);
 192
 193        if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
 194                /* Reset inherited state */
 195                tp->rx_opt.ts_recent       = 0;
 196                tp->rx_opt.ts_recent_stamp = 0;
 197                if (likely(!tp->repair))
 198                        tp->write_seq      = 0;
 199        }
 200
 201        if (tcp_death_row.sysctl_tw_recycle &&
 202            !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
 203                tcp_fetch_timewait_stamp(sk, &rt->dst);
 204
 205        inet->inet_dport = usin->sin_port;
 206        sk_daddr_set(sk, daddr);
 207
 208        inet_csk(sk)->icsk_ext_hdr_len = 0;
 209        if (inet_opt)
 210                inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
 211
 212        tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
 213
 214        /* Socket identity is still unknown (sport may be zero).
 215         * However we set state to SYN-SENT and not releasing socket
 216         * lock select source port, enter ourselves into the hash tables and
 217         * complete initialization after this.
 218         */
 219        tcp_set_state(sk, TCP_SYN_SENT);
 220        err = inet_hash_connect(&tcp_death_row, sk);
 221        if (err)
 222                goto failure;
 223
 224        sk_set_txhash(sk);
 225
 226        rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
 227                               inet->inet_sport, inet->inet_dport, sk);
 228        if (IS_ERR(rt)) {
 229                err = PTR_ERR(rt);
 230                rt = NULL;
 231                goto failure;
 232        }
 233        /* OK, now commit destination to socket.  */
 234        sk->sk_gso_type = SKB_GSO_TCPV4;
 235        sk_setup_caps(sk, &rt->dst);
 236
 237        if (!tp->write_seq && likely(!tp->repair))
 238                tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
 239                                                           inet->inet_daddr,
 240                                                           inet->inet_sport,
 241                                                           usin->sin_port);
 242
 243        inet->inet_id = tp->write_seq ^ jiffies;
 244
 245        err = tcp_connect(sk);
 246
 247        rt = NULL;
 248        if (err)
 249                goto failure;
 250
 251        return 0;
 252
 253failure:
 254        /*
 255         * This unhashes the socket and releases the local port,
 256         * if necessary.
 257         */
 258        tcp_set_state(sk, TCP_CLOSE);
 259        ip_rt_put(rt);
 260        sk->sk_route_caps = 0;
 261        inet->inet_dport = 0;
 262        return err;
 263}
 264EXPORT_SYMBOL(tcp_v4_connect);
 265
 266/*
 267 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
 268 * It can be called through tcp_release_cb() if socket was owned by user
 269 * at the time tcp_v4_err() was called to handle ICMP message.
 270 */
 271void tcp_v4_mtu_reduced(struct sock *sk)
 272{
 273        struct dst_entry *dst;
 274        struct inet_sock *inet = inet_sk(sk);
 275        u32 mtu = tcp_sk(sk)->mtu_info;
 276
 277        dst = inet_csk_update_pmtu(sk, mtu);
 278        if (!dst)
 279                return;
 280
 281        /* Something is about to be wrong... Remember soft error
 282         * for the case, if this connection will not able to recover.
 283         */
 284        if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
 285                sk->sk_err_soft = EMSGSIZE;
 286
 287        mtu = dst_mtu(dst);
 288
 289        if (inet->pmtudisc != IP_PMTUDISC_DONT &&
 290            ip_sk_accept_pmtu(sk) &&
 291            inet_csk(sk)->icsk_pmtu_cookie > mtu) {
 292                tcp_sync_mss(sk, mtu);
 293
 294                /* Resend the TCP packet because it's
 295                 * clear that the old packet has been
 296                 * dropped. This is the new "fast" path mtu
 297                 * discovery.
 298                 */
 299                tcp_simple_retransmit(sk);
 300        } /* else let the usual retransmit timer handle it */
 301}
 302EXPORT_SYMBOL(tcp_v4_mtu_reduced);
 303
 304static void do_redirect(struct sk_buff *skb, struct sock *sk)
 305{
 306        struct dst_entry *dst = __sk_dst_check(sk, 0);
 307
 308        if (dst)
 309                dst->ops->redirect(dst, sk, skb);
 310}
 311
 312
 313/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
 314void tcp_req_err(struct sock *sk, u32 seq, bool abort)
 315{
 316        struct request_sock *req = inet_reqsk(sk);
 317        struct net *net = sock_net(sk);
 318
 319        /* ICMPs are not backlogged, hence we cannot get
 320         * an established socket here.
 321         */
 322        if (seq != tcp_rsk(req)->snt_isn) {
 323                NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 324        } else if (abort) {
 325                /*
 326                 * Still in SYN_RECV, just remove it silently.
 327                 * There is no good way to pass the error to the newly
 328                 * created socket, and POSIX does not want network
 329                 * errors returned from accept().
 330                 */
 331                inet_csk_reqsk_queue_drop(req->rsk_listener, req);
 332                NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS);
 333        }
 334        reqsk_put(req);
 335}
 336EXPORT_SYMBOL(tcp_req_err);
 337
 338/*
 339 * This routine is called by the ICMP module when it gets some
 340 * sort of error condition.  If err < 0 then the socket should
 341 * be closed and the error returned to the user.  If err > 0
 342 * it's just the icmp type << 8 | icmp code.  After adjustment
 343 * header points to the first 8 bytes of the tcp header.  We need
 344 * to find the appropriate port.
 345 *
 346 * The locking strategy used here is very "optimistic". When
 347 * someone else accesses the socket the ICMP is just dropped
 348 * and for some paths there is no check at all.
 349 * A more general error queue to queue errors for later handling
 350 * is probably better.
 351 *
 352 */
 353
 354void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 355{
 356        const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
 357        struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
 358        struct inet_connection_sock *icsk;
 359        struct tcp_sock *tp;
 360        struct inet_sock *inet;
 361        const int type = icmp_hdr(icmp_skb)->type;
 362        const int code = icmp_hdr(icmp_skb)->code;
 363        struct sock *sk;
 364        struct sk_buff *skb;
 365        struct request_sock *fastopen;
 366        __u32 seq, snd_una;
 367        __u32 remaining;
 368        int err;
 369        struct net *net = dev_net(icmp_skb->dev);
 370
 371        sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
 372                                       th->dest, iph->saddr, ntohs(th->source),
 373                                       inet_iif(icmp_skb));
 374        if (!sk) {
 375                ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 376                return;
 377        }
 378        if (sk->sk_state == TCP_TIME_WAIT) {
 379                inet_twsk_put(inet_twsk(sk));
 380                return;
 381        }
 382        seq = ntohl(th->seq);
 383        if (sk->sk_state == TCP_NEW_SYN_RECV)
 384                return tcp_req_err(sk, seq,
 385                                  type == ICMP_PARAMETERPROB ||
 386                                  type == ICMP_TIME_EXCEEDED ||
 387                                  (type == ICMP_DEST_UNREACH &&
 388                                   (code == ICMP_NET_UNREACH ||
 389                                    code == ICMP_HOST_UNREACH)));
 390
 391        bh_lock_sock(sk);
 392        /* If too many ICMPs get dropped on busy
 393         * servers this needs to be solved differently.
 394         * We do take care of PMTU discovery (RFC1191) special case :
 395         * we can receive locally generated ICMP messages while socket is held.
 396         */
 397        if (sock_owned_by_user(sk)) {
 398                if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
 399                        NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
 400        }
 401        if (sk->sk_state == TCP_CLOSE)
 402                goto out;
 403
 404        if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
 405                NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
 406                goto out;
 407        }
 408
 409        icsk = inet_csk(sk);
 410        tp = tcp_sk(sk);
 411        /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
 412        fastopen = tp->fastopen_rsk;
 413        snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
 414        if (sk->sk_state != TCP_LISTEN &&
 415            !between(seq, snd_una, tp->snd_nxt)) {
 416                NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 417                goto out;
 418        }
 419
 420        switch (type) {
 421        case ICMP_REDIRECT:
 422                do_redirect(icmp_skb, sk);
 423                goto out;
 424        case ICMP_SOURCE_QUENCH:
 425                /* Just silently ignore these. */
 426                goto out;
 427        case ICMP_PARAMETERPROB:
 428                err = EPROTO;
 429                break;
 430        case ICMP_DEST_UNREACH:
 431                if (code > NR_ICMP_UNREACH)
 432                        goto out;
 433
 434                if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
 435                        /* We are not interested in TCP_LISTEN and open_requests
 436                         * (SYN-ACKs send out by Linux are always <576bytes so
 437                         * they should go through unfragmented).
 438                         */
 439                        if (sk->sk_state == TCP_LISTEN)
 440                                goto out;
 441
 442                        tp->mtu_info = info;
 443                        if (!sock_owned_by_user(sk)) {
 444                                tcp_v4_mtu_reduced(sk);
 445                        } else {
 446                                if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
 447                                        sock_hold(sk);
 448                        }
 449                        goto out;
 450                }
 451
 452                err = icmp_err_convert[code].errno;
 453                /* check if icmp_skb allows revert of backoff
 454                 * (see draft-zimmermann-tcp-lcd) */
 455                if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
 456                        break;
 457                if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
 458                    !icsk->icsk_backoff || fastopen)
 459                        break;
 460
 461                if (sock_owned_by_user(sk))
 462                        break;
 463
 464                icsk->icsk_backoff--;
 465                icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
 466                                               TCP_TIMEOUT_INIT;
 467                icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
 468
 469                skb = tcp_write_queue_head(sk);
 470                BUG_ON(!skb);
 471
 472                remaining = icsk->icsk_rto -
 473                            min(icsk->icsk_rto,
 474                                tcp_time_stamp - tcp_skb_timestamp(skb));
 475
 476                if (remaining) {
 477                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 478                                                  remaining, TCP_RTO_MAX);
 479                } else {
 480                        /* RTO revert clocked out retransmission.
 481                         * Will retransmit now */
 482                        tcp_retransmit_timer(sk);
 483                }
 484
 485                break;
 486        case ICMP_TIME_EXCEEDED:
 487                err = EHOSTUNREACH;
 488                break;
 489        default:
 490                goto out;
 491        }
 492
 493        switch (sk->sk_state) {
 494        case TCP_SYN_SENT:
 495        case TCP_SYN_RECV:
 496                /* Only in fast or simultaneous open. If a fast open socket is
 497                 * is already accepted it is treated as a connected one below.
 498                 */
 499                if (fastopen && !fastopen->sk)
 500                        break;
 501
 502                if (!sock_owned_by_user(sk)) {
 503                        sk->sk_err = err;
 504
 505                        sk->sk_error_report(sk);
 506
 507                        tcp_done(sk);
 508                } else {
 509                        sk->sk_err_soft = err;
 510                }
 511                goto out;
 512        }
 513
 514        /* If we've already connected we will keep trying
 515         * until we time out, or the user gives up.
 516         *
 517         * rfc1122 4.2.3.9 allows to consider as hard errors
 518         * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
 519         * but it is obsoleted by pmtu discovery).
 520         *
 521         * Note, that in modern internet, where routing is unreliable
 522         * and in each dark corner broken firewalls sit, sending random
 523         * errors ordered by their masters even this two messages finally lose
 524         * their original sense (even Linux sends invalid PORT_UNREACHs)
 525         *
 526         * Now we are in compliance with RFCs.
 527         *                                                      --ANK (980905)
 528         */
 529
 530        inet = inet_sk(sk);
 531        if (!sock_owned_by_user(sk) && inet->recverr) {
 532                sk->sk_err = err;
 533                sk->sk_error_report(sk);
 534        } else  { /* Only an error on timeout */
 535                sk->sk_err_soft = err;
 536        }
 537
 538out:
 539        bh_unlock_sock(sk);
 540        sock_put(sk);
 541}
 542
 543void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
 544{
 545        struct tcphdr *th = tcp_hdr(skb);
 546
 547        if (skb->ip_summed == CHECKSUM_PARTIAL) {
 548                th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
 549                skb->csum_start = skb_transport_header(skb) - skb->head;
 550                skb->csum_offset = offsetof(struct tcphdr, check);
 551        } else {
 552                th->check = tcp_v4_check(skb->len, saddr, daddr,
 553                                         csum_partial(th,
 554                                                      th->doff << 2,
 555                                                      skb->csum));
 556        }
 557}
 558
 559/* This routine computes an IPv4 TCP checksum. */
 560void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
 561{
 562        const struct inet_sock *inet = inet_sk(sk);
 563
 564        __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
 565}
 566EXPORT_SYMBOL(tcp_v4_send_check);
 567
 568/*
 569 *      This routine will send an RST to the other tcp.
 570 *
 571 *      Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
 572 *                    for reset.
 573 *      Answer: if a packet caused RST, it is not for a socket
 574 *              existing in our system, if it is matched to a socket,
 575 *              it is just duplicate segment or bug in other side's TCP.
 576 *              So that we build reply only basing on parameters
 577 *              arrived with segment.
 578 *      Exception: precedence violation. We do not implement it in any case.
 579 */
 580
 581static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
 582{
 583        const struct tcphdr *th = tcp_hdr(skb);
 584        struct {
 585                struct tcphdr th;
 586#ifdef CONFIG_TCP_MD5SIG
 587                __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
 588#endif
 589        } rep;
 590        struct ip_reply_arg arg;
 591#ifdef CONFIG_TCP_MD5SIG
 592        struct tcp_md5sig_key *key = NULL;
 593        const __u8 *hash_location = NULL;
 594        unsigned char newhash[16];
 595        int genhash;
 596        struct sock *sk1 = NULL;
 597#endif
 598        struct net *net;
 599
 600        /* Never send a reset in response to a reset. */
 601        if (th->rst)
 602                return;
 603
 604        /* If sk not NULL, it means we did a successful lookup and incoming
 605         * route had to be correct. prequeue might have dropped our dst.
 606         */
 607        if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
 608                return;
 609
 610        /* Swap the send and the receive. */
 611        memset(&rep, 0, sizeof(rep));
 612        rep.th.dest   = th->source;
 613        rep.th.source = th->dest;
 614        rep.th.doff   = sizeof(struct tcphdr) / 4;
 615        rep.th.rst    = 1;
 616
 617        if (th->ack) {
 618                rep.th.seq = th->ack_seq;
 619        } else {
 620                rep.th.ack = 1;
 621                rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
 622                                       skb->len - (th->doff << 2));
 623        }
 624
 625        memset(&arg, 0, sizeof(arg));
 626        arg.iov[0].iov_base = (unsigned char *)&rep;
 627        arg.iov[0].iov_len  = sizeof(rep.th);
 628
 629        net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
 630#ifdef CONFIG_TCP_MD5SIG
 631        hash_location = tcp_parse_md5sig_option(th);
 632        if (sk && sk_fullsock(sk)) {
 633                key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
 634                                        &ip_hdr(skb)->saddr, AF_INET);
 635        } else if (hash_location) {
 636                /*
 637                 * active side is lost. Try to find listening socket through
 638                 * source port, and then find md5 key through listening socket.
 639                 * we are not loose security here:
 640                 * Incoming packet is checked with md5 hash with finding key,
 641                 * no RST generated if md5 hash doesn't match.
 642                 */
 643                sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
 644                                             ip_hdr(skb)->saddr,
 645                                             th->source, ip_hdr(skb)->daddr,
 646                                             ntohs(th->source), inet_iif(skb));
 647                /* don't send rst if it can't find key */
 648                if (!sk1)
 649                        return;
 650                rcu_read_lock();
 651                key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
 652                                        &ip_hdr(skb)->saddr, AF_INET);
 653                if (!key)
 654                        goto release_sk1;
 655
 656                genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
 657                if (genhash || memcmp(hash_location, newhash, 16) != 0)
 658                        goto release_sk1;
 659        }
 660
 661        if (key) {
 662                rep.opt[0] = htonl((TCPOPT_NOP << 24) |
 663                                   (TCPOPT_NOP << 16) |
 664                                   (TCPOPT_MD5SIG << 8) |
 665                                   TCPOLEN_MD5SIG);
 666                /* Update length and the length the header thinks exists */
 667                arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 668                rep.th.doff = arg.iov[0].iov_len / 4;
 669
 670                tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
 671                                     key, ip_hdr(skb)->saddr,
 672                                     ip_hdr(skb)->daddr, &rep.th);
 673        }
 674#endif
 675        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 676                                      ip_hdr(skb)->saddr, /* XXX */
 677                                      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 678        arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 679        arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
 680
 681        /* When socket is gone, all binding information is lost.
 682         * routing might fail in this case. No choice here, if we choose to force
 683         * input interface, we will misroute in case of asymmetric route.
 684         */
 685        if (sk)
 686                arg.bound_dev_if = sk->sk_bound_dev_if;
 687
 688        BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
 689                     offsetof(struct inet_timewait_sock, tw_bound_dev_if));
 690
 691        arg.tos = ip_hdr(skb)->tos;
 692        ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
 693                              skb, &TCP_SKB_CB(skb)->header.h4.opt,
 694                              ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
 695                              &arg, arg.iov[0].iov_len);
 696
 697        TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 698        TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
 699
 700#ifdef CONFIG_TCP_MD5SIG
 701release_sk1:
 702        if (sk1) {
 703                rcu_read_unlock();
 704                sock_put(sk1);
 705        }
 706#endif
 707}
 708
 709/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
 710   outside socket context is ugly, certainly. What can I do?
 711 */
 712
 713static void tcp_v4_send_ack(struct net *net,
 714                            struct sk_buff *skb, u32 seq, u32 ack,
 715                            u32 win, u32 tsval, u32 tsecr, int oif,
 716                            struct tcp_md5sig_key *key,
 717                            int reply_flags, u8 tos)
 718{
 719        const struct tcphdr *th = tcp_hdr(skb);
 720        struct {
 721                struct tcphdr th;
 722                __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
 723#ifdef CONFIG_TCP_MD5SIG
 724                           + (TCPOLEN_MD5SIG_ALIGNED >> 2)
 725#endif
 726                        ];
 727        } rep;
 728        struct ip_reply_arg arg;
 729
 730        memset(&rep.th, 0, sizeof(struct tcphdr));
 731        memset(&arg, 0, sizeof(arg));
 732
 733        arg.iov[0].iov_base = (unsigned char *)&rep;
 734        arg.iov[0].iov_len  = sizeof(rep.th);
 735        if (tsecr) {
 736                rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 737                                   (TCPOPT_TIMESTAMP << 8) |
 738                                   TCPOLEN_TIMESTAMP);
 739                rep.opt[1] = htonl(tsval);
 740                rep.opt[2] = htonl(tsecr);
 741                arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
 742        }
 743
 744        /* Swap the send and the receive. */
 745        rep.th.dest    = th->source;
 746        rep.th.source  = th->dest;
 747        rep.th.doff    = arg.iov[0].iov_len / 4;
 748        rep.th.seq     = htonl(seq);
 749        rep.th.ack_seq = htonl(ack);
 750        rep.th.ack     = 1;
 751        rep.th.window  = htons(win);
 752
 753#ifdef CONFIG_TCP_MD5SIG
 754        if (key) {
 755                int offset = (tsecr) ? 3 : 0;
 756
 757                rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
 758                                          (TCPOPT_NOP << 16) |
 759                                          (TCPOPT_MD5SIG << 8) |
 760                                          TCPOLEN_MD5SIG);
 761                arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 762                rep.th.doff = arg.iov[0].iov_len/4;
 763
 764                tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
 765                                    key, ip_hdr(skb)->saddr,
 766                                    ip_hdr(skb)->daddr, &rep.th);
 767        }
 768#endif
 769        arg.flags = reply_flags;
 770        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 771                                      ip_hdr(skb)->saddr, /* XXX */
 772                                      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 773        arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 774        if (oif)
 775                arg.bound_dev_if = oif;
 776        arg.tos = tos;
 777        ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
 778                              skb, &TCP_SKB_CB(skb)->header.h4.opt,
 779                              ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
 780                              &arg, arg.iov[0].iov_len);
 781
 782        TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 783}
 784
 785static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 786{
 787        struct inet_timewait_sock *tw = inet_twsk(sk);
 788        struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 789
 790        tcp_v4_send_ack(sock_net(sk), skb,
 791                        tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 792                        tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
 793                        tcp_time_stamp + tcptw->tw_ts_offset,
 794                        tcptw->tw_ts_recent,
 795                        tw->tw_bound_dev_if,
 796                        tcp_twsk_md5_key(tcptw),
 797                        tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
 798                        tw->tw_tos
 799                        );
 800
 801        inet_twsk_put(tw);
 802}
 803
 804static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 805                                  struct request_sock *req)
 806{
 807        /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
 808         * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
 809         */
 810        u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
 811                                             tcp_sk(sk)->snd_nxt;
 812
 813        tcp_v4_send_ack(sock_net(sk), skb, seq,
 814                        tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
 815                        tcp_time_stamp,
 816                        req->ts_recent,
 817                        0,
 818                        tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
 819                                          AF_INET),
 820                        inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
 821                        ip_hdr(skb)->tos);
 822}
 823
 824/*
 825 *      Send a SYN-ACK after having received a SYN.
 826 *      This still operates on a request_sock only, not on a big
 827 *      socket.
 828 */
 829static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
 830                              struct flowi *fl,
 831                              struct request_sock *req,
 832                              struct tcp_fastopen_cookie *foc,
 833                                  bool attach_req)
 834{
 835        const struct inet_request_sock *ireq = inet_rsk(req);
 836        struct flowi4 fl4;
 837        int err = -1;
 838        struct sk_buff *skb;
 839
 840        /* First, grab a route. */
 841        if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
 842                return -1;
 843
 844        skb = tcp_make_synack(sk, dst, req, foc, attach_req);
 845
 846        if (skb) {
 847                __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
 848
 849                err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
 850                                            ireq->ir_rmt_addr,
 851                                            ireq->opt);
 852                err = net_xmit_eval(err);
 853        }
 854
 855        return err;
 856}
 857
 858/*
 859 *      IPv4 request_sock destructor.
 860 */
 861static void tcp_v4_reqsk_destructor(struct request_sock *req)
 862{
 863        kfree(inet_rsk(req)->opt);
 864}
 865
 866#ifdef CONFIG_TCP_MD5SIG
 867/*
 868 * RFC2385 MD5 checksumming requires a mapping of
 869 * IP address->MD5 Key.
 870 * We need to maintain these in the sk structure.
 871 */
 872
 873/* Find the Key structure for an address.  */
 874struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
 875                                         const union tcp_md5_addr *addr,
 876                                         int family)
 877{
 878        const struct tcp_sock *tp = tcp_sk(sk);
 879        struct tcp_md5sig_key *key;
 880        unsigned int size = sizeof(struct in_addr);
 881        const struct tcp_md5sig_info *md5sig;
 882
 883        /* caller either holds rcu_read_lock() or socket lock */
 884        md5sig = rcu_dereference_check(tp->md5sig_info,
 885                                       sock_owned_by_user(sk) ||
 886                                       lockdep_is_held((spinlock_t *)&sk->sk_lock.slock));
 887        if (!md5sig)
 888                return NULL;
 889#if IS_ENABLED(CONFIG_IPV6)
 890        if (family == AF_INET6)
 891                size = sizeof(struct in6_addr);
 892#endif
 893        hlist_for_each_entry_rcu(key, &md5sig->head, node) {
 894                if (key->family != family)
 895                        continue;
 896                if (!memcmp(&key->addr, addr, size))
 897                        return key;
 898        }
 899        return NULL;
 900}
 901EXPORT_SYMBOL(tcp_md5_do_lookup);
 902
 903struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
 904                                         const struct sock *addr_sk)
 905{
 906        const union tcp_md5_addr *addr;
 907
 908        addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
 909        return tcp_md5_do_lookup(sk, addr, AF_INET);
 910}
 911EXPORT_SYMBOL(tcp_v4_md5_lookup);
 912
 913/* This can be called on a newly created socket, from other files */
 914int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
 915                   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
 916{
 917        /* Add Key to the list */
 918        struct tcp_md5sig_key *key;
 919        struct tcp_sock *tp = tcp_sk(sk);
 920        struct tcp_md5sig_info *md5sig;
 921
 922        key = tcp_md5_do_lookup(sk, addr, family);
 923        if (key) {
 924                /* Pre-existing entry - just update that one. */
 925                memcpy(key->key, newkey, newkeylen);
 926                key->keylen = newkeylen;
 927                return 0;
 928        }
 929
 930        md5sig = rcu_dereference_protected(tp->md5sig_info,
 931                                           sock_owned_by_user(sk) ||
 932                                           lockdep_is_held(&sk->sk_lock.slock));
 933        if (!md5sig) {
 934                md5sig = kmalloc(sizeof(*md5sig), gfp);
 935                if (!md5sig)
 936                        return -ENOMEM;
 937
 938                sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 939                INIT_HLIST_HEAD(&md5sig->head);
 940                rcu_assign_pointer(tp->md5sig_info, md5sig);
 941        }
 942
 943        key = sock_kmalloc(sk, sizeof(*key), gfp);
 944        if (!key)
 945                return -ENOMEM;
 946        if (!tcp_alloc_md5sig_pool()) {
 947                sock_kfree_s(sk, key, sizeof(*key));
 948                return -ENOMEM;
 949        }
 950
 951        memcpy(key->key, newkey, newkeylen);
 952        key->keylen = newkeylen;
 953        key->family = family;
 954        memcpy(&key->addr, addr,
 955               (family == AF_INET6) ? sizeof(struct in6_addr) :
 956                                      sizeof(struct in_addr));
 957        hlist_add_head_rcu(&key->node, &md5sig->head);
 958        return 0;
 959}
 960EXPORT_SYMBOL(tcp_md5_do_add);
 961
 962int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
 963{
 964        struct tcp_md5sig_key *key;
 965
 966        key = tcp_md5_do_lookup(sk, addr, family);
 967        if (!key)
 968                return -ENOENT;
 969        hlist_del_rcu(&key->node);
 970        atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
 971        kfree_rcu(key, rcu);
 972        return 0;
 973}
 974EXPORT_SYMBOL(tcp_md5_do_del);
 975
 976static void tcp_clear_md5_list(struct sock *sk)
 977{
 978        struct tcp_sock *tp = tcp_sk(sk);
 979        struct tcp_md5sig_key *key;
 980        struct hlist_node *n;
 981        struct tcp_md5sig_info *md5sig;
 982
 983        md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
 984
 985        hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
 986                hlist_del_rcu(&key->node);
 987                atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
 988                kfree_rcu(key, rcu);
 989        }
 990}
 991
 992static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
 993                                 int optlen)
 994{
 995        struct tcp_md5sig cmd;
 996        struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
 997
 998        if (optlen < sizeof(cmd))
 999                return -EINVAL;
1000
1001        if (copy_from_user(&cmd, optval, sizeof(cmd)))
1002                return -EFAULT;
1003
1004        if (sin->sin_family != AF_INET)
1005                return -EINVAL;
1006
1007        if (!cmd.tcpm_keylen)
1008                return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1009                                      AF_INET);
1010
1011        if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1012                return -EINVAL;
1013
1014        return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1015                              AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1016                              GFP_KERNEL);
1017}
1018
1019static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1020                                        __be32 daddr, __be32 saddr, int nbytes)
1021{
1022        struct tcp4_pseudohdr *bp;
1023        struct scatterlist sg;
1024
1025        bp = &hp->md5_blk.ip4;
1026
1027        /*
1028         * 1. the TCP pseudo-header (in the order: source IP address,
1029         * destination IP address, zero-padded protocol number, and
1030         * segment length)
1031         */
1032        bp->saddr = saddr;
1033        bp->daddr = daddr;
1034        bp->pad = 0;
1035        bp->protocol = IPPROTO_TCP;
1036        bp->len = cpu_to_be16(nbytes);
1037
1038        sg_init_one(&sg, bp, sizeof(*bp));
1039        ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp));
1040        return crypto_ahash_update(hp->md5_req);
1041}
1042
1043static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1044                               __be32 daddr, __be32 saddr, const struct tcphdr *th)
1045{
1046        struct tcp_md5sig_pool *hp;
1047        struct ahash_request *req;
1048
1049        hp = tcp_get_md5sig_pool();
1050        if (!hp)
1051                goto clear_hash_noput;
1052        req = hp->md5_req;
1053
1054        if (crypto_ahash_init(req))
1055                goto clear_hash;
1056        if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1057                goto clear_hash;
1058        if (tcp_md5_hash_header(hp, th))
1059                goto clear_hash;
1060        if (tcp_md5_hash_key(hp, key))
1061                goto clear_hash;
1062        ahash_request_set_crypt(req, NULL, md5_hash, 0);
1063        if (crypto_ahash_final(req))
1064                goto clear_hash;
1065
1066        tcp_put_md5sig_pool();
1067        return 0;
1068
1069clear_hash:
1070        tcp_put_md5sig_pool();
1071clear_hash_noput:
1072        memset(md5_hash, 0, 16);
1073        return 1;
1074}
1075
1076int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1077                        const struct sock *sk,
1078                        const struct sk_buff *skb)
1079{
1080        struct tcp_md5sig_pool *hp;
1081        struct ahash_request *req;
1082        const struct tcphdr *th = tcp_hdr(skb);
1083        __be32 saddr, daddr;
1084
1085        if (sk) { /* valid for establish/request sockets */
1086                saddr = sk->sk_rcv_saddr;
1087                daddr = sk->sk_daddr;
1088        } else {
1089                const struct iphdr *iph = ip_hdr(skb);
1090                saddr = iph->saddr;
1091                daddr = iph->daddr;
1092        }
1093
1094        hp = tcp_get_md5sig_pool();
1095        if (!hp)
1096                goto clear_hash_noput;
1097        req = hp->md5_req;
1098
1099        if (crypto_ahash_init(req))
1100                goto clear_hash;
1101
1102        if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1103                goto clear_hash;
1104        if (tcp_md5_hash_header(hp, th))
1105                goto clear_hash;
1106        if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1107                goto clear_hash;
1108        if (tcp_md5_hash_key(hp, key))
1109                goto clear_hash;
1110        ahash_request_set_crypt(req, NULL, md5_hash, 0);
1111        if (crypto_ahash_final(req))
1112                goto clear_hash;
1113
1114        tcp_put_md5sig_pool();
1115        return 0;
1116
1117clear_hash:
1118        tcp_put_md5sig_pool();
1119clear_hash_noput:
1120        memset(md5_hash, 0, 16);
1121        return 1;
1122}
1123EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1124
1125#endif
1126
1127/* Called with rcu_read_lock() */
1128static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
1129                                    const struct sk_buff *skb)
1130{
1131#ifdef CONFIG_TCP_MD5SIG
1132        /*
1133         * This gets called for each TCP segment that arrives
1134         * so we want to be efficient.
1135         * We have 3 drop cases:
1136         * o No MD5 hash and one expected.
1137         * o MD5 hash and we're not expecting one.
1138         * o MD5 hash and its wrong.
1139         */
1140        const __u8 *hash_location = NULL;
1141        struct tcp_md5sig_key *hash_expected;
1142        const struct iphdr *iph = ip_hdr(skb);
1143        const struct tcphdr *th = tcp_hdr(skb);
1144        int genhash;
1145        unsigned char newhash[16];
1146
1147        hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1148                                          AF_INET);
1149        hash_location = tcp_parse_md5sig_option(th);
1150
1151        /* We've parsed the options - do we have a hash? */
1152        if (!hash_expected && !hash_location)
1153                return false;
1154
1155        if (hash_expected && !hash_location) {
1156                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1157                return true;
1158        }
1159
1160        if (!hash_expected && hash_location) {
1161                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1162                return true;
1163        }
1164
1165        /* Okay, so this is hash_expected and hash_location -
1166         * so we need to calculate the checksum.
1167         */
1168        genhash = tcp_v4_md5_hash_skb(newhash,
1169                                      hash_expected,
1170                                      NULL, skb);
1171
1172        if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1173                net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1174                                     &iph->saddr, ntohs(th->source),
1175                                     &iph->daddr, ntohs(th->dest),
1176                                     genhash ? " tcp_v4_calc_md5_hash failed"
1177                                     : "");
1178                return true;
1179        }
1180        return false;
1181#endif
1182        return false;
1183}
1184
1185static void tcp_v4_init_req(struct request_sock *req,
1186                            const struct sock *sk_listener,
1187                            struct sk_buff *skb)
1188{
1189        struct inet_request_sock *ireq = inet_rsk(req);
1190
1191        sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
1192        sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
1193        ireq->no_srccheck = inet_sk(sk_listener)->transparent;
1194        ireq->opt = tcp_v4_save_options(skb);
1195}
1196
1197static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
1198                                          struct flowi *fl,
1199                                          const struct request_sock *req,
1200                                          bool *strict)
1201{
1202        struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
1203
1204        if (strict) {
1205                if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
1206                        *strict = true;
1207                else
1208                        *strict = false;
1209        }
1210
1211        return dst;
1212}
1213
1214struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1215        .family         =       PF_INET,
1216        .obj_size       =       sizeof(struct tcp_request_sock),
1217        .rtx_syn_ack    =       tcp_rtx_synack,
1218        .send_ack       =       tcp_v4_reqsk_send_ack,
1219        .destructor     =       tcp_v4_reqsk_destructor,
1220        .send_reset     =       tcp_v4_send_reset,
1221        .syn_ack_timeout =      tcp_syn_ack_timeout,
1222};
1223
1224static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1225        .mss_clamp      =       TCP_MSS_DEFAULT,
1226#ifdef CONFIG_TCP_MD5SIG
1227        .req_md5_lookup =       tcp_v4_md5_lookup,
1228        .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1229#endif
1230        .init_req       =       tcp_v4_init_req,
1231#ifdef CONFIG_SYN_COOKIES
1232        .cookie_init_seq =      cookie_v4_init_sequence,
1233#endif
1234        .route_req      =       tcp_v4_route_req,
1235        .init_seq       =       tcp_v4_init_sequence,
1236        .send_synack    =       tcp_v4_send_synack,
1237};
1238
1239int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1240{
1241        /* Never answer to SYNs send to broadcast or multicast */
1242        if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1243                goto drop;
1244
1245        return tcp_conn_request(&tcp_request_sock_ops,
1246                                &tcp_request_sock_ipv4_ops, sk, skb);
1247
1248drop:
1249        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1250        return 0;
1251}
1252EXPORT_SYMBOL(tcp_v4_conn_request);
1253
1254
1255/*
1256 * The three way handshake has completed - we got a valid synack -
1257 * now create the new socket.
1258 */
1259struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1260                                  struct request_sock *req,
1261                                  struct dst_entry *dst,
1262                                  struct request_sock *req_unhash,
1263                                  bool *own_req)
1264{
1265        struct inet_request_sock *ireq;
1266        struct inet_sock *newinet;
1267        struct tcp_sock *newtp;
1268        struct sock *newsk;
1269#ifdef CONFIG_TCP_MD5SIG
1270        struct tcp_md5sig_key *key;
1271#endif
1272        struct ip_options_rcu *inet_opt;
1273
1274        if (sk_acceptq_is_full(sk))
1275                goto exit_overflow;
1276
1277        newsk = tcp_create_openreq_child(sk, req, skb);
1278        if (!newsk)
1279                goto exit_nonewsk;
1280
1281        newsk->sk_gso_type = SKB_GSO_TCPV4;
1282        inet_sk_rx_dst_set(newsk, skb);
1283
1284        newtp                 = tcp_sk(newsk);
1285        newinet               = inet_sk(newsk);
1286        ireq                  = inet_rsk(req);
1287        sk_daddr_set(newsk, ireq->ir_rmt_addr);
1288        sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
1289        newsk->sk_bound_dev_if = ireq->ir_iif;
1290        newinet->inet_saddr           = ireq->ir_loc_addr;
1291        inet_opt              = ireq->opt;
1292        rcu_assign_pointer(newinet->inet_opt, inet_opt);
1293        ireq->opt             = NULL;
1294        newinet->mc_index     = inet_iif(skb);
1295        newinet->mc_ttl       = ip_hdr(skb)->ttl;
1296        newinet->rcv_tos      = ip_hdr(skb)->tos;
1297        inet_csk(newsk)->icsk_ext_hdr_len = 0;
1298        if (inet_opt)
1299                inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1300        newinet->inet_id = newtp->write_seq ^ jiffies;
1301
1302        if (!dst) {
1303                dst = inet_csk_route_child_sock(sk, newsk, req);
1304                if (!dst)
1305                        goto put_and_exit;
1306        } else {
1307                /* syncookie case : see end of cookie_v4_check() */
1308        }
1309        sk_setup_caps(newsk, dst);
1310
1311        tcp_ca_openreq_child(newsk, dst);
1312
1313        tcp_sync_mss(newsk, dst_mtu(dst));
1314        newtp->advmss = dst_metric_advmss(dst);
1315        if (tcp_sk(sk)->rx_opt.user_mss &&
1316            tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1317                newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1318
1319        tcp_initialize_rcv_mss(newsk);
1320
1321#ifdef CONFIG_TCP_MD5SIG
1322        /* Copy over the MD5 key from the original socket */
1323        key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1324                                AF_INET);
1325        if (key) {
1326                /*
1327                 * We're using one, so create a matching key
1328                 * on the newsk structure. If we fail to get
1329                 * memory, then we end up not copying the key
1330                 * across. Shucks.
1331                 */
1332                tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1333                               AF_INET, key->key, key->keylen, GFP_ATOMIC);
1334                sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1335        }
1336#endif
1337
1338        if (__inet_inherit_port(sk, newsk) < 0)
1339                goto put_and_exit;
1340        *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1341        if (*own_req)
1342                tcp_move_syn(newtp, req);
1343
1344        return newsk;
1345
1346exit_overflow:
1347        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1348exit_nonewsk:
1349        dst_release(dst);
1350exit:
1351        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1352        return NULL;
1353put_and_exit:
1354        inet_csk_prepare_forced_close(newsk);
1355        tcp_done(newsk);
1356        goto exit;
1357}
1358EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1359
1360static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
1361{
1362#ifdef CONFIG_SYN_COOKIES
1363        const struct tcphdr *th = tcp_hdr(skb);
1364
1365        if (!th->syn)
1366                sk = cookie_v4_check(sk, skb);
1367#endif
1368        return sk;
1369}
1370
1371/* The socket must have it's spinlock held when we get
1372 * here, unless it is a TCP_LISTEN socket.
1373 *
1374 * We have a potential double-lock case here, so even when
1375 * doing backlog processing we use the BH locking scheme.
1376 * This is because we cannot sleep with the original spinlock
1377 * held.
1378 */
1379int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1380{
1381        struct sock *rsk;
1382
1383        if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1384                struct dst_entry *dst = sk->sk_rx_dst;
1385
1386                sock_rps_save_rxhash(sk, skb);
1387                sk_mark_napi_id(sk, skb);
1388                if (dst) {
1389                        if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1390                            !dst->ops->check(dst, 0)) {
1391                                dst_release(dst);
1392                                sk->sk_rx_dst = NULL;
1393                        }
1394                }
1395                tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1396                return 0;
1397        }
1398
1399        if (tcp_checksum_complete(skb))
1400                goto csum_err;
1401
1402        if (sk->sk_state == TCP_LISTEN) {
1403                struct sock *nsk = tcp_v4_cookie_check(sk, skb);
1404
1405                if (!nsk)
1406                        goto discard;
1407                if (nsk != sk) {
1408                        sock_rps_save_rxhash(nsk, skb);
1409                        sk_mark_napi_id(nsk, skb);
1410                        if (tcp_child_process(sk, nsk, skb)) {
1411                                rsk = nsk;
1412                                goto reset;
1413                        }
1414                        return 0;
1415                }
1416        } else
1417                sock_rps_save_rxhash(sk, skb);
1418
1419        if (tcp_rcv_state_process(sk, skb)) {
1420                rsk = sk;
1421                goto reset;
1422        }
1423        return 0;
1424
1425reset:
1426        tcp_v4_send_reset(rsk, skb);
1427discard:
1428        kfree_skb(skb);
1429        /* Be careful here. If this function gets more complicated and
1430         * gcc suffers from register pressure on the x86, sk (in %ebx)
1431         * might be destroyed here. This current version compiles correctly,
1432         * but you have been warned.
1433         */
1434        return 0;
1435
1436csum_err:
1437        TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
1438        TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1439        goto discard;
1440}
1441EXPORT_SYMBOL(tcp_v4_do_rcv);
1442
1443void tcp_v4_early_demux(struct sk_buff *skb)
1444{
1445        const struct iphdr *iph;
1446        const struct tcphdr *th;
1447        struct sock *sk;
1448
1449        if (skb->pkt_type != PACKET_HOST)
1450                return;
1451
1452        if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1453                return;
1454
1455        iph = ip_hdr(skb);
1456        th = tcp_hdr(skb);
1457
1458        if (th->doff < sizeof(struct tcphdr) / 4)
1459                return;
1460
1461        sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1462                                       iph->saddr, th->source,
1463                                       iph->daddr, ntohs(th->dest),
1464                                       skb->skb_iif);
1465        if (sk) {
1466                skb->sk = sk;
1467                skb->destructor = sock_edemux;
1468                if (sk_fullsock(sk)) {
1469                        struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1470
1471                        if (dst)
1472                                dst = dst_check(dst, 0);
1473                        if (dst &&
1474                            inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1475                                skb_dst_set_noref(skb, dst);
1476                }
1477        }
1478}
1479
1480/* Packet is added to VJ-style prequeue for processing in process
1481 * context, if a reader task is waiting. Apparently, this exciting
1482 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1483 * failed somewhere. Latency? Burstiness? Well, at least now we will
1484 * see, why it failed. 8)8)                               --ANK
1485 *
1486 */
1487bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1488{
1489        struct tcp_sock *tp = tcp_sk(sk);
1490
1491        if (sysctl_tcp_low_latency || !tp->ucopy.task)
1492                return false;
1493
1494        if (skb->len <= tcp_hdrlen(skb) &&
1495            skb_queue_len(&tp->ucopy.prequeue) == 0)
1496                return false;
1497
1498        /* Before escaping RCU protected region, we need to take care of skb
1499         * dst. Prequeue is only enabled for established sockets.
1500         * For such sockets, we might need the skb dst only to set sk->sk_rx_dst
1501         * Instead of doing full sk_rx_dst validity here, let's perform
1502         * an optimistic check.
1503         */
1504        if (likely(sk->sk_rx_dst))
1505                skb_dst_drop(skb);
1506        else
1507                skb_dst_force_safe(skb);
1508
1509        __skb_queue_tail(&tp->ucopy.prequeue, skb);
1510        tp->ucopy.memory += skb->truesize;
1511        if (tp->ucopy.memory > sk->sk_rcvbuf) {
1512                struct sk_buff *skb1;
1513
1514                BUG_ON(sock_owned_by_user(sk));
1515
1516                while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1517                        sk_backlog_rcv(sk, skb1);
1518                        NET_INC_STATS_BH(sock_net(sk),
1519                                         LINUX_MIB_TCPPREQUEUEDROPPED);
1520                }
1521
1522                tp->ucopy.memory = 0;
1523        } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1524                wake_up_interruptible_sync_poll(sk_sleep(sk),
1525                                           POLLIN | POLLRDNORM | POLLRDBAND);
1526                if (!inet_csk_ack_scheduled(sk))
1527                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1528                                                  (3 * tcp_rto_min(sk)) / 4,
1529                                                  TCP_RTO_MAX);
1530        }
1531        return true;
1532}
1533EXPORT_SYMBOL(tcp_prequeue);
1534
1535/*
1536 *      From tcp_input.c
1537 */
1538
1539int tcp_v4_rcv(struct sk_buff *skb)
1540{
1541        const struct iphdr *iph;
1542        const struct tcphdr *th;
1543        struct sock *sk;
1544        int ret;
1545        struct net *net = dev_net(skb->dev);
1546
1547        if (skb->pkt_type != PACKET_HOST)
1548                goto discard_it;
1549
1550        /* Count it even if it's bad */
1551        TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1552
1553        if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1554                goto discard_it;
1555
1556        th = tcp_hdr(skb);
1557
1558        if (th->doff < sizeof(struct tcphdr) / 4)
1559                goto bad_packet;
1560        if (!pskb_may_pull(skb, th->doff * 4))
1561                goto discard_it;
1562
1563        /* An explanation is required here, I think.
1564         * Packet length and doff are validated by header prediction,
1565         * provided case of th->doff==0 is eliminated.
1566         * So, we defer the checks. */
1567
1568        if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
1569                goto csum_error;
1570
1571        th = tcp_hdr(skb);
1572        iph = ip_hdr(skb);
1573        /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
1574         * barrier() makes sure compiler wont play fool^Waliasing games.
1575         */
1576        memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1577                sizeof(struct inet_skb_parm));
1578        barrier();
1579
1580        TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1581        TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1582                                    skb->len - th->doff * 4);
1583        TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1584        TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1585        TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1586        TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1587        TCP_SKB_CB(skb)->sacked  = 0;
1588
1589lookup:
1590        sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
1591                               th->dest);
1592        if (!sk)
1593                goto no_tcp_socket;
1594
1595process:
1596        if (sk->sk_state == TCP_TIME_WAIT)
1597                goto do_time_wait;
1598
1599        if (sk->sk_state == TCP_NEW_SYN_RECV) {
1600                struct request_sock *req = inet_reqsk(sk);
1601                struct sock *nsk;
1602
1603                sk = req->rsk_listener;
1604                if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
1605                        reqsk_put(req);
1606                        goto discard_it;
1607                }
1608                if (unlikely(sk->sk_state != TCP_LISTEN)) {
1609                        inet_csk_reqsk_queue_drop_and_put(sk, req);
1610                        goto lookup;
1611                }
1612                sock_hold(sk);
1613                nsk = tcp_check_req(sk, skb, req, false);
1614                if (!nsk) {
1615                        reqsk_put(req);
1616                        goto discard_and_relse;
1617                }
1618                if (nsk == sk) {
1619                        reqsk_put(req);
1620                } else if (tcp_child_process(sk, nsk, skb)) {
1621                        tcp_v4_send_reset(nsk, skb);
1622                        goto discard_and_relse;
1623                } else {
1624                        sock_put(sk);
1625                        return 0;
1626                }
1627        }
1628        if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1629                NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1630                goto discard_and_relse;
1631        }
1632
1633        if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1634                goto discard_and_relse;
1635
1636        if (tcp_v4_inbound_md5_hash(sk, skb))
1637                goto discard_and_relse;
1638
1639        nf_reset(skb);
1640
1641        if (sk_filter(sk, skb))
1642                goto discard_and_relse;
1643
1644        skb->dev = NULL;
1645
1646        if (sk->sk_state == TCP_LISTEN) {
1647                ret = tcp_v4_do_rcv(sk, skb);
1648                goto put_and_return;
1649        }
1650
1651        sk_incoming_cpu_update(sk);
1652
1653        bh_lock_sock_nested(sk);
1654        tcp_segs_in(tcp_sk(sk), skb);
1655        ret = 0;
1656        if (!sock_owned_by_user(sk)) {
1657                if (!tcp_prequeue(sk, skb))
1658                        ret = tcp_v4_do_rcv(sk, skb);
1659        } else if (unlikely(sk_add_backlog(sk, skb,
1660                                           sk->sk_rcvbuf + sk->sk_sndbuf))) {
1661                bh_unlock_sock(sk);
1662                NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1663                goto discard_and_relse;
1664        }
1665        bh_unlock_sock(sk);
1666
1667put_and_return:
1668        sock_put(sk);
1669
1670        return ret;
1671
1672no_tcp_socket:
1673        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1674                goto discard_it;
1675
1676        if (tcp_checksum_complete(skb)) {
1677csum_error:
1678                TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
1679bad_packet:
1680                TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1681        } else {
1682                tcp_v4_send_reset(NULL, skb);
1683        }
1684
1685discard_it:
1686        /* Discard frame. */
1687        kfree_skb(skb);
1688        return 0;
1689
1690discard_and_relse:
1691        sock_put(sk);
1692        goto discard_it;
1693
1694do_time_wait:
1695        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1696                inet_twsk_put(inet_twsk(sk));
1697                goto discard_it;
1698        }
1699
1700        if (tcp_checksum_complete(skb)) {
1701                inet_twsk_put(inet_twsk(sk));
1702                goto csum_error;
1703        }
1704        switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1705        case TCP_TW_SYN: {
1706                struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1707                                                        &tcp_hashinfo, skb,
1708                                                        __tcp_hdrlen(th),
1709                                                        iph->saddr, th->source,
1710                                                        iph->daddr, th->dest,
1711                                                        inet_iif(skb));
1712                if (sk2) {
1713                        inet_twsk_deschedule_put(inet_twsk(sk));
1714                        sk = sk2;
1715                        goto process;
1716                }
1717                /* Fall through to ACK */
1718        }
1719        case TCP_TW_ACK:
1720                tcp_v4_timewait_ack(sk, skb);
1721                break;
1722        case TCP_TW_RST:
1723                tcp_v4_send_reset(sk, skb);
1724                inet_twsk_deschedule_put(inet_twsk(sk));
1725                goto discard_it;
1726        case TCP_TW_SUCCESS:;
1727        }
1728        goto discard_it;
1729}
1730
1731static struct timewait_sock_ops tcp_timewait_sock_ops = {
1732        .twsk_obj_size  = sizeof(struct tcp_timewait_sock),
1733        .twsk_unique    = tcp_twsk_unique,
1734        .twsk_destructor= tcp_twsk_destructor,
1735};
1736
1737void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
1738{
1739        struct dst_entry *dst = skb_dst(skb);
1740
1741        if (dst && dst_hold_safe(dst)) {
1742                sk->sk_rx_dst = dst;
1743                inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1744        }
1745}
1746EXPORT_SYMBOL(inet_sk_rx_dst_set);
1747
1748const struct inet_connection_sock_af_ops ipv4_specific = {
1749        .queue_xmit        = ip_queue_xmit,
1750        .send_check        = tcp_v4_send_check,
1751        .rebuild_header    = inet_sk_rebuild_header,
1752        .sk_rx_dst_set     = inet_sk_rx_dst_set,
1753        .conn_request      = tcp_v4_conn_request,
1754        .syn_recv_sock     = tcp_v4_syn_recv_sock,
1755        .net_header_len    = sizeof(struct iphdr),
1756        .setsockopt        = ip_setsockopt,
1757        .getsockopt        = ip_getsockopt,
1758        .addr2sockaddr     = inet_csk_addr2sockaddr,
1759        .sockaddr_len      = sizeof(struct sockaddr_in),
1760        .bind_conflict     = inet_csk_bind_conflict,
1761#ifdef CONFIG_COMPAT
1762        .compat_setsockopt = compat_ip_setsockopt,
1763        .compat_getsockopt = compat_ip_getsockopt,
1764#endif
1765        .mtu_reduced       = tcp_v4_mtu_reduced,
1766};
1767EXPORT_SYMBOL(ipv4_specific);
1768
1769#ifdef CONFIG_TCP_MD5SIG
1770static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1771        .md5_lookup             = tcp_v4_md5_lookup,
1772        .calc_md5_hash          = tcp_v4_md5_hash_skb,
1773        .md5_parse              = tcp_v4_parse_md5_keys,
1774};
1775#endif
1776
1777/* NOTE: A lot of things set to zero explicitly by call to
1778 *       sk_alloc() so need not be done here.
1779 */
1780static int tcp_v4_init_sock(struct sock *sk)
1781{
1782        struct inet_connection_sock *icsk = inet_csk(sk);
1783
1784        tcp_init_sock(sk);
1785
1786        icsk->icsk_af_ops = &ipv4_specific;
1787
1788#ifdef CONFIG_TCP_MD5SIG
1789        tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1790#endif
1791
1792        return 0;
1793}
1794
1795void tcp_v4_destroy_sock(struct sock *sk)
1796{
1797        struct tcp_sock *tp = tcp_sk(sk);
1798
1799        tcp_clear_xmit_timers(sk);
1800
1801        tcp_cleanup_congestion_control(sk);
1802
1803        /* Cleanup up the write buffer. */
1804        tcp_write_queue_purge(sk);
1805
1806        /* Cleans up our, hopefully empty, out_of_order_queue. */
1807        __skb_queue_purge(&tp->out_of_order_queue);
1808
1809#ifdef CONFIG_TCP_MD5SIG
1810        /* Clean up the MD5 key list, if any */
1811        if (tp->md5sig_info) {
1812                tcp_clear_md5_list(sk);
1813                kfree_rcu(tp->md5sig_info, rcu);
1814                tp->md5sig_info = NULL;
1815        }
1816#endif
1817
1818        /* Clean prequeue, it must be empty really */
1819        __skb_queue_purge(&tp->ucopy.prequeue);
1820
1821        /* Clean up a referenced TCP bind bucket. */
1822        if (inet_csk(sk)->icsk_bind_hash)
1823                inet_put_port(sk);
1824
1825        BUG_ON(tp->fastopen_rsk);
1826
1827        /* If socket is aborted during connect operation */
1828        tcp_free_fastopen_req(tp);
1829        tcp_saved_syn_free(tp);
1830
1831        sk_sockets_allocated_dec(sk);
1832
1833        if (mem_cgroup_sockets_enabled && sk->sk_memcg)
1834                sock_release_memcg(sk);
1835}
1836EXPORT_SYMBOL(tcp_v4_destroy_sock);
1837
1838#ifdef CONFIG_PROC_FS
1839/* Proc filesystem TCP sock list dumping. */
1840
1841/*
1842 * Get next listener socket follow cur.  If cur is NULL, get first socket
1843 * starting from bucket given in st->bucket; when st->bucket is zero the
1844 * very first socket in the hash table is returned.
1845 */
1846static void *listening_get_next(struct seq_file *seq, void *cur)
1847{
1848        struct inet_connection_sock *icsk;
1849        struct hlist_nulls_node *node;
1850        struct sock *sk = cur;
1851        struct inet_listen_hashbucket *ilb;
1852        struct tcp_iter_state *st = seq->private;
1853        struct net *net = seq_file_net(seq);
1854
1855        if (!sk) {
1856                ilb = &tcp_hashinfo.listening_hash[st->bucket];
1857                spin_lock_bh(&ilb->lock);
1858                sk = sk_nulls_head(&ilb->head);
1859                st->offset = 0;
1860                goto get_sk;
1861        }
1862        ilb = &tcp_hashinfo.listening_hash[st->bucket];
1863        ++st->num;
1864        ++st->offset;
1865
1866        sk = sk_nulls_next(sk);
1867get_sk:
1868        sk_nulls_for_each_from(sk, node) {
1869                if (!net_eq(sock_net(sk), net))
1870                        continue;
1871                if (sk->sk_family == st->family) {
1872                        cur = sk;
1873                        goto out;
1874                }
1875                icsk = inet_csk(sk);
1876        }
1877        spin_unlock_bh(&ilb->lock);
1878        st->offset = 0;
1879        if (++st->bucket < INET_LHTABLE_SIZE) {
1880                ilb = &tcp_hashinfo.listening_hash[st->bucket];
1881                spin_lock_bh(&ilb->lock);
1882                sk = sk_nulls_head(&ilb->head);
1883                goto get_sk;
1884        }
1885        cur = NULL;
1886out:
1887        return cur;
1888}
1889
1890static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1891{
1892        struct tcp_iter_state *st = seq->private;
1893        void *rc;
1894
1895        st->bucket = 0;
1896        st->offset = 0;
1897        rc = listening_get_next(seq, NULL);
1898
1899        while (rc && *pos) {
1900                rc = listening_get_next(seq, rc);
1901                --*pos;
1902        }
1903        return rc;
1904}
1905
1906static inline bool empty_bucket(const struct tcp_iter_state *st)
1907{
1908        return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
1909}
1910
1911/*
1912 * Get first established socket starting from bucket given in st->bucket.
1913 * If st->bucket is zero, the very first socket in the hash is returned.
1914 */
1915static void *established_get_first(struct seq_file *seq)
1916{
1917        struct tcp_iter_state *st = seq->private;
1918        struct net *net = seq_file_net(seq);
1919        void *rc = NULL;
1920
1921        st->offset = 0;
1922        for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1923                struct sock *sk;
1924                struct hlist_nulls_node *node;
1925                spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1926
1927                /* Lockless fast path for the common case of empty buckets */
1928                if (empty_bucket(st))
1929                        continue;
1930
1931                spin_lock_bh(lock);
1932                sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
1933                        if (sk->sk_family != st->family ||
1934                            !net_eq(sock_net(sk), net)) {
1935                                continue;
1936                        }
1937                        rc = sk;
1938                        goto out;
1939                }
1940                spin_unlock_bh(lock);
1941        }
1942out:
1943        return rc;
1944}
1945
1946static void *established_get_next(struct seq_file *seq, void *cur)
1947{
1948        struct sock *sk = cur;
1949        struct hlist_nulls_node *node;
1950        struct tcp_iter_state *st = seq->private;
1951        struct net *net = seq_file_net(seq);
1952
1953        ++st->num;
1954        ++st->offset;
1955
1956        sk = sk_nulls_next(sk);
1957
1958        sk_nulls_for_each_from(sk, node) {
1959                if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
1960                        return sk;
1961        }
1962
1963        spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1964        ++st->bucket;
1965        return established_get_first(seq);
1966}
1967
1968static void *established_get_idx(struct seq_file *seq, loff_t pos)
1969{
1970        struct tcp_iter_state *st = seq->private;
1971        void *rc;
1972
1973        st->bucket = 0;
1974        rc = established_get_first(seq);
1975
1976        while (rc && pos) {
1977                rc = established_get_next(seq, rc);
1978                --pos;
1979        }
1980        return rc;
1981}
1982
1983static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
1984{
1985        void *rc;
1986        struct tcp_iter_state *st = seq->private;
1987
1988        st->state = TCP_SEQ_STATE_LISTENING;
1989        rc        = listening_get_idx(seq, &pos);
1990
1991        if (!rc) {
1992                st->state = TCP_SEQ_STATE_ESTABLISHED;
1993                rc        = established_get_idx(seq, pos);
1994        }
1995
1996        return rc;
1997}
1998
1999static void *tcp_seek_last_pos(struct seq_file *seq)
2000{
2001        struct tcp_iter_state *st = seq->private;
2002        int offset = st->offset;
2003        int orig_num = st->num;
2004        void *rc = NULL;
2005
2006        switch (st->state) {
2007        case TCP_SEQ_STATE_LISTENING:
2008                if (st->bucket >= INET_LHTABLE_SIZE)
2009                        break;
2010                st->state = TCP_SEQ_STATE_LISTENING;
2011                rc = listening_get_next(seq, NULL);
2012                while (offset-- && rc)
2013                        rc = listening_get_next(seq, rc);
2014                if (rc)
2015                        break;
2016                st->bucket = 0;
2017                st->state = TCP_SEQ_STATE_ESTABLISHED;
2018                /* Fallthrough */
2019        case TCP_SEQ_STATE_ESTABLISHED:
2020                if (st->bucket > tcp_hashinfo.ehash_mask)
2021                        break;
2022                rc = established_get_first(seq);
2023                while (offset-- && rc)
2024                        rc = established_get_next(seq, rc);
2025        }
2026
2027        st->num = orig_num;
2028
2029        return rc;
2030}
2031
2032static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2033{
2034        struct tcp_iter_state *st = seq->private;
2035        void *rc;
2036
2037        if (*pos && *pos == st->last_pos) {
2038                rc = tcp_seek_last_pos(seq);
2039                if (rc)
2040                        goto out;
2041        }
2042
2043        st->state = TCP_SEQ_STATE_LISTENING;
2044        st->num = 0;
2045        st->bucket = 0;
2046        st->offset = 0;
2047        rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2048
2049out:
2050        st->last_pos = *pos;
2051        return rc;
2052}
2053
2054static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2055{
2056        struct tcp_iter_state *st = seq->private;
2057        void *rc = NULL;
2058
2059        if (v == SEQ_START_TOKEN) {
2060                rc = tcp_get_idx(seq, 0);
2061                goto out;
2062        }
2063
2064        switch (st->state) {
2065        case TCP_SEQ_STATE_LISTENING:
2066                rc = listening_get_next(seq, v);
2067                if (!rc) {
2068                        st->state = TCP_SEQ_STATE_ESTABLISHED;
2069                        st->bucket = 0;
2070                        st->offset = 0;
2071                        rc        = established_get_first(seq);
2072                }
2073                break;
2074        case TCP_SEQ_STATE_ESTABLISHED:
2075                rc = established_get_next(seq, v);
2076                break;
2077        }
2078out:
2079        ++*pos;
2080        st->last_pos = *pos;
2081        return rc;
2082}
2083
2084static void tcp_seq_stop(struct seq_file *seq, void *v)
2085{
2086        struct tcp_iter_state *st = seq->private;
2087
2088        switch (st->state) {
2089        case TCP_SEQ_STATE_LISTENING:
2090                if (v != SEQ_START_TOKEN)
2091                        spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2092                break;
2093        case TCP_SEQ_STATE_ESTABLISHED:
2094                if (v)
2095                        spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2096                break;
2097        }
2098}
2099
2100int tcp_seq_open(struct inode *inode, struct file *file)
2101{
2102        struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
2103        struct tcp_iter_state *s;
2104        int err;
2105
2106        err = seq_open_net(inode, file, &afinfo->seq_ops,
2107                          sizeof(struct tcp_iter_state));
2108        if (err < 0)
2109                return err;
2110
2111        s = ((struct seq_file *)file->private_data)->private;
2112        s->family               = afinfo->family;
2113        s->last_pos             = 0;
2114        return 0;
2115}
2116EXPORT_SYMBOL(tcp_seq_open);
2117
2118int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2119{
2120        int rc = 0;
2121        struct proc_dir_entry *p;
2122
2123        afinfo->seq_ops.start           = tcp_seq_start;
2124        afinfo->seq_ops.next            = tcp_seq_next;
2125        afinfo->seq_ops.stop            = tcp_seq_stop;
2126
2127        p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2128                             afinfo->seq_fops, afinfo);
2129        if (!p)
2130                rc = -ENOMEM;
2131        return rc;
2132}
2133EXPORT_SYMBOL(tcp_proc_register);
2134
2135void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2136{
2137        remove_proc_entry(afinfo->name, net->proc_net);
2138}
2139EXPORT_SYMBOL(tcp_proc_unregister);
2140
2141static void get_openreq4(const struct request_sock *req,
2142                         struct seq_file *f, int i)
2143{
2144        const struct inet_request_sock *ireq = inet_rsk(req);
2145        long delta = req->rsk_timer.expires - jiffies;
2146
2147        seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2148                " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
2149                i,
2150                ireq->ir_loc_addr,
2151                ireq->ir_num,
2152                ireq->ir_rmt_addr,
2153                ntohs(ireq->ir_rmt_port),
2154                TCP_SYN_RECV,
2155                0, 0, /* could print option size, but that is af dependent. */
2156                1,    /* timers active (only the expire timer) */
2157                jiffies_delta_to_clock_t(delta),
2158                req->num_timeout,
2159                from_kuid_munged(seq_user_ns(f),
2160                                 sock_i_uid(req->rsk_listener)),
2161                0,  /* non standard timer */
2162                0, /* open_requests have no inode */
2163                0,
2164                req);
2165}
2166
2167static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2168{
2169        int timer_active;
2170        unsigned long timer_expires;
2171        const struct tcp_sock *tp = tcp_sk(sk);
2172        const struct inet_connection_sock *icsk = inet_csk(sk);
2173        const struct inet_sock *inet = inet_sk(sk);
2174        const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2175        __be32 dest = inet->inet_daddr;
2176        __be32 src = inet->inet_rcv_saddr;
2177        __u16 destp = ntohs(inet->inet_dport);
2178        __u16 srcp = ntohs(inet->inet_sport);
2179        int rx_queue;
2180        int state;
2181
2182        if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2183            icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2184            icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2185                timer_active    = 1;
2186                timer_expires   = icsk->icsk_timeout;
2187        } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2188                timer_active    = 4;
2189                timer_expires   = icsk->icsk_timeout;
2190        } else if (timer_pending(&sk->sk_timer)) {
2191                timer_active    = 2;
2192                timer_expires   = sk->sk_timer.expires;
2193        } else {
2194                timer_active    = 0;
2195                timer_expires = jiffies;
2196        }
2197
2198        state = sk_state_load(sk);
2199        if (state == TCP_LISTEN)
2200                rx_queue = sk->sk_ack_backlog;
2201        else
2202                /* Because we don't lock the socket,
2203                 * we might find a transient negative value.
2204                 */
2205                rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2206
2207        seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2208                        "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
2209                i, src, srcp, dest, destp, state,
2210                tp->write_seq - tp->snd_una,
2211                rx_queue,
2212                timer_active,
2213                jiffies_delta_to_clock_t(timer_expires - jiffies),
2214                icsk->icsk_retransmits,
2215                from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
2216                icsk->icsk_probes_out,
2217                sock_i_ino(sk),
2218                atomic_read(&sk->sk_refcnt), sk,
2219                jiffies_to_clock_t(icsk->icsk_rto),
2220                jiffies_to_clock_t(icsk->icsk_ack.ato),
2221                (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2222                tp->snd_cwnd,
2223                state == TCP_LISTEN ?
2224                    fastopenq->max_qlen :
2225                    (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
2226}
2227
2228static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2229                               struct seq_file *f, int i)
2230{
2231        long delta = tw->tw_timer.expires - jiffies;
2232        __be32 dest, src;
2233        __u16 destp, srcp;
2234
2235        dest  = tw->tw_daddr;
2236        src   = tw->tw_rcv_saddr;
2237        destp = ntohs(tw->tw_dport);
2238        srcp  = ntohs(tw->tw_sport);
2239
2240        seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2241                " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
2242                i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2243                3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2244                atomic_read(&tw->tw_refcnt), tw);
2245}
2246
2247#define TMPSZ 150
2248
2249static int tcp4_seq_show(struct seq_file *seq, void *v)
2250{
2251        struct tcp_iter_state *st;
2252        struct sock *sk = v;
2253
2254        seq_setwidth(seq, TMPSZ - 1);
2255        if (v == SEQ_START_TOKEN) {
2256                seq_puts(seq, "  sl  local_address rem_address   st tx_queue "
2257                           "rx_queue tr tm->when retrnsmt   uid  timeout "
2258                           "inode");
2259                goto out;
2260        }
2261        st = seq->private;
2262
2263        if (sk->sk_state == TCP_TIME_WAIT)
2264                get_timewait4_sock(v, seq, st->num);
2265        else if (sk->sk_state == TCP_NEW_SYN_RECV)
2266                get_openreq4(v, seq, st->num);
2267        else
2268                get_tcp4_sock(v, seq, st->num);
2269out:
2270        seq_pad(seq, '\n');
2271        return 0;
2272}
2273
2274static const struct file_operations tcp_afinfo_seq_fops = {
2275        .owner   = THIS_MODULE,
2276        .open    = tcp_seq_open,
2277        .read    = seq_read,
2278        .llseek  = seq_lseek,
2279        .release = seq_release_net
2280};
2281
2282static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2283        .name           = "tcp",
2284        .family         = AF_INET,
2285        .seq_fops       = &tcp_afinfo_seq_fops,
2286        .seq_ops        = {
2287                .show           = tcp4_seq_show,
2288        },
2289};
2290
2291static int __net_init tcp4_proc_init_net(struct net *net)
2292{
2293        return tcp_proc_register(net, &tcp4_seq_afinfo);
2294}
2295
2296static void __net_exit tcp4_proc_exit_net(struct net *net)
2297{
2298        tcp_proc_unregister(net, &tcp4_seq_afinfo);
2299}
2300
2301static struct pernet_operations tcp4_net_ops = {
2302        .init = tcp4_proc_init_net,
2303        .exit = tcp4_proc_exit_net,
2304};
2305
2306int __init tcp4_proc_init(void)
2307{
2308        return register_pernet_subsys(&tcp4_net_ops);
2309}
2310
2311void tcp4_proc_exit(void)
2312{
2313        unregister_pernet_subsys(&tcp4_net_ops);
2314}
2315#endif /* CONFIG_PROC_FS */
2316
2317struct proto tcp_prot = {
2318        .name                   = "TCP",
2319        .owner                  = THIS_MODULE,
2320        .close                  = tcp_close,
2321        .connect                = tcp_v4_connect,
2322        .disconnect             = tcp_disconnect,
2323        .accept                 = inet_csk_accept,
2324        .ioctl                  = tcp_ioctl,
2325        .init                   = tcp_v4_init_sock,
2326        .destroy                = tcp_v4_destroy_sock,
2327        .shutdown               = tcp_shutdown,
2328        .setsockopt             = tcp_setsockopt,
2329        .getsockopt             = tcp_getsockopt,
2330        .recvmsg                = tcp_recvmsg,
2331        .sendmsg                = tcp_sendmsg,
2332        .sendpage               = tcp_sendpage,
2333        .backlog_rcv            = tcp_v4_do_rcv,
2334        .release_cb             = tcp_release_cb,
2335        .hash                   = inet_hash,
2336        .unhash                 = inet_unhash,
2337        .get_port               = inet_csk_get_port,
2338        .enter_memory_pressure  = tcp_enter_memory_pressure,
2339        .stream_memory_free     = tcp_stream_memory_free,
2340        .sockets_allocated      = &tcp_sockets_allocated,
2341        .orphan_count           = &tcp_orphan_count,
2342        .memory_allocated       = &tcp_memory_allocated,
2343        .memory_pressure        = &tcp_memory_pressure,
2344        .sysctl_mem             = sysctl_tcp_mem,
2345        .sysctl_wmem            = sysctl_tcp_wmem,
2346        .sysctl_rmem            = sysctl_tcp_rmem,
2347        .max_header             = MAX_TCP_HEADER,
2348        .obj_size               = sizeof(struct tcp_sock),
2349        .slab_flags             = SLAB_DESTROY_BY_RCU,
2350        .twsk_prot              = &tcp_timewait_sock_ops,
2351        .rsk_prot               = &tcp_request_sock_ops,
2352        .h.hashinfo             = &tcp_hashinfo,
2353        .no_autobind            = true,
2354#ifdef CONFIG_COMPAT
2355        .compat_setsockopt      = compat_tcp_setsockopt,
2356        .compat_getsockopt      = compat_tcp_getsockopt,
2357#endif
2358        .diag_destroy           = tcp_abort,
2359};
2360EXPORT_SYMBOL(tcp_prot);
2361
2362static void __net_exit tcp_sk_exit(struct net *net)
2363{
2364        int cpu;
2365
2366        for_each_possible_cpu(cpu)
2367                inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2368        free_percpu(net->ipv4.tcp_sk);
2369}
2370
2371static int __net_init tcp_sk_init(struct net *net)
2372{
2373        int res, cpu;
2374
2375        net->ipv4.tcp_sk = alloc_percpu(struct sock *);
2376        if (!net->ipv4.tcp_sk)
2377                return -ENOMEM;
2378
2379        for_each_possible_cpu(cpu) {
2380                struct sock *sk;
2381
2382                res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
2383                                           IPPROTO_TCP, net);
2384                if (res)
2385                        goto fail;
2386                *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2387        }
2388
2389        net->ipv4.sysctl_tcp_ecn = 2;
2390        net->ipv4.sysctl_tcp_ecn_fallback = 1;
2391
2392        net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
2393        net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
2394        net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
2395
2396        net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
2397        net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
2398        net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
2399
2400        net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
2401        net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
2402        net->ipv4.sysctl_tcp_syncookies = 1;
2403        net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
2404        net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
2405        net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
2406        net->ipv4.sysctl_tcp_orphan_retries = 0;
2407        net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
2408        net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
2409
2410        return 0;
2411fail:
2412        tcp_sk_exit(net);
2413
2414        return res;
2415}
2416
2417static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2418{
2419        inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2420}
2421
2422static struct pernet_operations __net_initdata tcp_sk_ops = {
2423       .init       = tcp_sk_init,
2424       .exit       = tcp_sk_exit,
2425       .exit_batch = tcp_sk_exit_batch,
2426};
2427
2428void __init tcp_v4_init(void)
2429{
2430        inet_hashinfo_init(&tcp_hashinfo);
2431        if (register_pernet_subsys(&tcp_sk_ops))
2432                panic("Failed to create the TCP control socket.\n");
2433}
2434