linux/net/ipv6/tcp_ipv6.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *      TCP over IPv6
   4 *      Linux INET6 implementation
   5 *
   6 *      Authors:
   7 *      Pedro Roque             <roque@di.fc.ul.pt>
   8 *
   9 *      Based on:
  10 *      linux/net/ipv4/tcp.c
  11 *      linux/net/ipv4/tcp_input.c
  12 *      linux/net/ipv4/tcp_output.c
  13 *
  14 *      Fixes:
  15 *      Hideaki YOSHIFUJI       :       sin6_scope_id support
  16 *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  17 *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  18 *                                      a single port at the same time.
  19 *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
  20 */
  21
  22#include <linux/bottom_half.h>
  23#include <linux/module.h>
  24#include <linux/errno.h>
  25#include <linux/types.h>
  26#include <linux/socket.h>
  27#include <linux/sockios.h>
  28#include <linux/net.h>
  29#include <linux/jiffies.h>
  30#include <linux/in.h>
  31#include <linux/in6.h>
  32#include <linux/netdevice.h>
  33#include <linux/init.h>
  34#include <linux/jhash.h>
  35#include <linux/ipsec.h>
  36#include <linux/times.h>
  37#include <linux/slab.h>
  38#include <linux/uaccess.h>
  39#include <linux/ipv6.h>
  40#include <linux/icmpv6.h>
  41#include <linux/random.h>
  42#include <linux/indirect_call_wrapper.h>
  43
  44#include <net/tcp.h>
  45#include <net/ndisc.h>
  46#include <net/inet6_hashtables.h>
  47#include <net/inet6_connection_sock.h>
  48#include <net/ipv6.h>
  49#include <net/transp_v6.h>
  50#include <net/addrconf.h>
  51#include <net/ip6_route.h>
  52#include <net/ip6_checksum.h>
  53#include <net/inet_ecn.h>
  54#include <net/protocol.h>
  55#include <net/xfrm.h>
  56#include <net/snmp.h>
  57#include <net/dsfield.h>
  58#include <net/timewait_sock.h>
  59#include <net/inet_common.h>
  60#include <net/secure_seq.h>
  61#include <net/busy_poll.h>
  62
  63#include <linux/proc_fs.h>
  64#include <linux/seq_file.h>
  65
  66#include <crypto/hash.h>
  67#include <linux/scatterlist.h>
  68
  69#include <trace/events/tcp.h>
  70
  71static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
  72static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
  73                                      struct request_sock *req);
  74
  75static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
  76
  77static const struct inet_connection_sock_af_ops ipv6_mapped;
  78const struct inet_connection_sock_af_ops ipv6_specific;
  79#ifdef CONFIG_TCP_MD5SIG
  80static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
  81static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
  82#else
  83static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
  84                                                   const struct in6_addr *addr,
  85                                                   int l3index)
  86{
  87        return NULL;
  88}
  89#endif
  90
  91/* Helper returning the inet6 address from a given tcp socket.
  92 * It can be used in TCP stack instead of inet6_sk(sk).
  93 * This avoids a dereference and allow compiler optimizations.
  94 * It is a specialized version of inet6_sk_generic().
  95 */
  96static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
  97{
  98        unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
  99
 100        return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
 101}
 102
 103static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
 104{
 105        struct dst_entry *dst = skb_dst(skb);
 106
 107        if (dst && dst_hold_safe(dst)) {
 108                const struct rt6_info *rt = (const struct rt6_info *)dst;
 109
 110                sk->sk_rx_dst = dst;
 111                inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
 112                tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
 113        }
 114}
 115
 116static u32 tcp_v6_init_seq(const struct sk_buff *skb)
 117{
 118        return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
 119                                ipv6_hdr(skb)->saddr.s6_addr32,
 120                                tcp_hdr(skb)->dest,
 121                                tcp_hdr(skb)->source);
 122}
 123
 124static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
 125{
 126        return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
 127                                   ipv6_hdr(skb)->saddr.s6_addr32);
 128}
 129
 130static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
 131                              int addr_len)
 132{
 133        /* This check is replicated from tcp_v6_connect() and intended to
 134         * prevent BPF program called below from accessing bytes that are out
 135         * of the bound specified by user in addr_len.
 136         */
 137        if (addr_len < SIN6_LEN_RFC2133)
 138                return -EINVAL;
 139
 140        sock_owned_by_me(sk);
 141
 142        return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
 143}
 144
 145static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 146                          int addr_len)
 147{
 148        struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
 149        struct inet_sock *inet = inet_sk(sk);
 150        struct inet_connection_sock *icsk = inet_csk(sk);
 151        struct ipv6_pinfo *np = tcp_inet6_sk(sk);
 152        struct tcp_sock *tp = tcp_sk(sk);
 153        struct in6_addr *saddr = NULL, *final_p, final;
 154        struct ipv6_txoptions *opt;
 155        struct flowi6 fl6;
 156        struct dst_entry *dst;
 157        int addr_type;
 158        int err;
 159        struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
 160
 161        if (addr_len < SIN6_LEN_RFC2133)
 162                return -EINVAL;
 163
 164        if (usin->sin6_family != AF_INET6)
 165                return -EAFNOSUPPORT;
 166
 167        memset(&fl6, 0, sizeof(fl6));
 168
 169        if (np->sndflow) {
 170                fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
 171                IP6_ECN_flow_init(fl6.flowlabel);
 172                if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
 173                        struct ip6_flowlabel *flowlabel;
 174                        flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
 175                        if (IS_ERR(flowlabel))
 176                                return -EINVAL;
 177                        fl6_sock_release(flowlabel);
 178                }
 179        }
 180
 181        /*
 182         *      connect() to INADDR_ANY means loopback (BSD'ism).
 183         */
 184
 185        if (ipv6_addr_any(&usin->sin6_addr)) {
 186                if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
 187                        ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
 188                                               &usin->sin6_addr);
 189                else
 190                        usin->sin6_addr = in6addr_loopback;
 191        }
 192
 193        addr_type = ipv6_addr_type(&usin->sin6_addr);
 194
 195        if (addr_type & IPV6_ADDR_MULTICAST)
 196                return -ENETUNREACH;
 197
 198        if (addr_type&IPV6_ADDR_LINKLOCAL) {
 199                if (addr_len >= sizeof(struct sockaddr_in6) &&
 200                    usin->sin6_scope_id) {
 201                        /* If interface is set while binding, indices
 202                         * must coincide.
 203                         */
 204                        if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
 205                                return -EINVAL;
 206
 207                        sk->sk_bound_dev_if = usin->sin6_scope_id;
 208                }
 209
 210                /* Connect to link-local address requires an interface */
 211                if (!sk->sk_bound_dev_if)
 212                        return -EINVAL;
 213        }
 214
 215        if (tp->rx_opt.ts_recent_stamp &&
 216            !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
 217                tp->rx_opt.ts_recent = 0;
 218                tp->rx_opt.ts_recent_stamp = 0;
 219                WRITE_ONCE(tp->write_seq, 0);
 220        }
 221
 222        sk->sk_v6_daddr = usin->sin6_addr;
 223        np->flow_label = fl6.flowlabel;
 224
 225        /*
 226         *      TCP over IPv4
 227         */
 228
 229        if (addr_type & IPV6_ADDR_MAPPED) {
 230                u32 exthdrlen = icsk->icsk_ext_hdr_len;
 231                struct sockaddr_in sin;
 232
 233                if (__ipv6_only_sock(sk))
 234                        return -ENETUNREACH;
 235
 236                sin.sin_family = AF_INET;
 237                sin.sin_port = usin->sin6_port;
 238                sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
 239
 240                icsk->icsk_af_ops = &ipv6_mapped;
 241                if (sk_is_mptcp(sk))
 242                        mptcpv6_handle_mapped(sk, true);
 243                sk->sk_backlog_rcv = tcp_v4_do_rcv;
 244#ifdef CONFIG_TCP_MD5SIG
 245                tp->af_specific = &tcp_sock_ipv6_mapped_specific;
 246#endif
 247
 248                err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
 249
 250                if (err) {
 251                        icsk->icsk_ext_hdr_len = exthdrlen;
 252                        icsk->icsk_af_ops = &ipv6_specific;
 253                        if (sk_is_mptcp(sk))
 254                                mptcpv6_handle_mapped(sk, false);
 255                        sk->sk_backlog_rcv = tcp_v6_do_rcv;
 256#ifdef CONFIG_TCP_MD5SIG
 257                        tp->af_specific = &tcp_sock_ipv6_specific;
 258#endif
 259                        goto failure;
 260                }
 261                np->saddr = sk->sk_v6_rcv_saddr;
 262
 263                return err;
 264        }
 265
 266        if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
 267                saddr = &sk->sk_v6_rcv_saddr;
 268
 269        fl6.flowi6_proto = IPPROTO_TCP;
 270        fl6.daddr = sk->sk_v6_daddr;
 271        fl6.saddr = saddr ? *saddr : np->saddr;
 272        fl6.flowi6_oif = sk->sk_bound_dev_if;
 273        fl6.flowi6_mark = sk->sk_mark;
 274        fl6.fl6_dport = usin->sin6_port;
 275        fl6.fl6_sport = inet->inet_sport;
 276        fl6.flowi6_uid = sk->sk_uid;
 277
 278        opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
 279        final_p = fl6_update_dst(&fl6, opt, &final);
 280
 281        security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
 282
 283        dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
 284        if (IS_ERR(dst)) {
 285                err = PTR_ERR(dst);
 286                goto failure;
 287        }
 288
 289        if (!saddr) {
 290                saddr = &fl6.saddr;
 291                sk->sk_v6_rcv_saddr = *saddr;
 292        }
 293
 294        /* set the source address */
 295        np->saddr = *saddr;
 296        inet->inet_rcv_saddr = LOOPBACK4_IPV6;
 297
 298        sk->sk_gso_type = SKB_GSO_TCPV6;
 299        ip6_dst_store(sk, dst, NULL, NULL);
 300
 301        icsk->icsk_ext_hdr_len = 0;
 302        if (opt)
 303                icsk->icsk_ext_hdr_len = opt->opt_flen +
 304                                         opt->opt_nflen;
 305
 306        tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 307
 308        inet->inet_dport = usin->sin6_port;
 309
 310        tcp_set_state(sk, TCP_SYN_SENT);
 311        err = inet6_hash_connect(tcp_death_row, sk);
 312        if (err)
 313                goto late_failure;
 314
 315        sk_set_txhash(sk);
 316
 317        if (likely(!tp->repair)) {
 318                if (!tp->write_seq)
 319                        WRITE_ONCE(tp->write_seq,
 320                                   secure_tcpv6_seq(np->saddr.s6_addr32,
 321                                                    sk->sk_v6_daddr.s6_addr32,
 322                                                    inet->inet_sport,
 323                                                    inet->inet_dport));
 324                tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
 325                                                   np->saddr.s6_addr32,
 326                                                   sk->sk_v6_daddr.s6_addr32);
 327        }
 328
 329        if (tcp_fastopen_defer_connect(sk, &err))
 330                return err;
 331        if (err)
 332                goto late_failure;
 333
 334        err = tcp_connect(sk);
 335        if (err)
 336                goto late_failure;
 337
 338        return 0;
 339
 340late_failure:
 341        tcp_set_state(sk, TCP_CLOSE);
 342failure:
 343        inet->inet_dport = 0;
 344        sk->sk_route_caps = 0;
 345        return err;
 346}
 347
 348static void tcp_v6_mtu_reduced(struct sock *sk)
 349{
 350        struct dst_entry *dst;
 351        u32 mtu;
 352
 353        if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
 354                return;
 355
 356        mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
 357
 358        /* Drop requests trying to increase our current mss.
 359         * Check done in __ip6_rt_update_pmtu() is too late.
 360         */
 361        if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
 362                return;
 363
 364        dst = inet6_csk_update_pmtu(sk, mtu);
 365        if (!dst)
 366                return;
 367
 368        if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
 369                tcp_sync_mss(sk, dst_mtu(dst));
 370                tcp_simple_retransmit(sk);
 371        }
 372}
 373
 374static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 375                u8 type, u8 code, int offset, __be32 info)
 376{
 377        const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
 378        const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
 379        struct net *net = dev_net(skb->dev);
 380        struct request_sock *fastopen;
 381        struct ipv6_pinfo *np;
 382        struct tcp_sock *tp;
 383        __u32 seq, snd_una;
 384        struct sock *sk;
 385        bool fatal;
 386        int err;
 387
 388        sk = __inet6_lookup_established(net, &tcp_hashinfo,
 389                                        &hdr->daddr, th->dest,
 390                                        &hdr->saddr, ntohs(th->source),
 391                                        skb->dev->ifindex, inet6_sdif(skb));
 392
 393        if (!sk) {
 394                __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
 395                                  ICMP6_MIB_INERRORS);
 396                return -ENOENT;
 397        }
 398
 399        if (sk->sk_state == TCP_TIME_WAIT) {
 400                inet_twsk_put(inet_twsk(sk));
 401                return 0;
 402        }
 403        seq = ntohl(th->seq);
 404        fatal = icmpv6_err_convert(type, code, &err);
 405        if (sk->sk_state == TCP_NEW_SYN_RECV) {
 406                tcp_req_err(sk, seq, fatal);
 407                return 0;
 408        }
 409
 410        bh_lock_sock(sk);
 411        if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
 412                __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
 413
 414        if (sk->sk_state == TCP_CLOSE)
 415                goto out;
 416
 417        if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
 418                __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
 419                goto out;
 420        }
 421
 422        tp = tcp_sk(sk);
 423        /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
 424        fastopen = rcu_dereference(tp->fastopen_rsk);
 425        snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
 426        if (sk->sk_state != TCP_LISTEN &&
 427            !between(seq, snd_una, tp->snd_nxt)) {
 428                __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
 429                goto out;
 430        }
 431
 432        np = tcp_inet6_sk(sk);
 433
 434        if (type == NDISC_REDIRECT) {
 435                if (!sock_owned_by_user(sk)) {
 436                        struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
 437
 438                        if (dst)
 439                                dst->ops->redirect(dst, sk, skb);
 440                }
 441                goto out;
 442        }
 443
 444        if (type == ICMPV6_PKT_TOOBIG) {
 445                u32 mtu = ntohl(info);
 446
 447                /* We are not interested in TCP_LISTEN and open_requests
 448                 * (SYN-ACKs send out by Linux are always <576bytes so
 449                 * they should go through unfragmented).
 450                 */
 451                if (sk->sk_state == TCP_LISTEN)
 452                        goto out;
 453
 454                if (!ip6_sk_accept_pmtu(sk))
 455                        goto out;
 456
 457                if (mtu < IPV6_MIN_MTU)
 458                        goto out;
 459
 460                WRITE_ONCE(tp->mtu_info, mtu);
 461
 462                if (!sock_owned_by_user(sk))
 463                        tcp_v6_mtu_reduced(sk);
 464                else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
 465                                           &sk->sk_tsq_flags))
 466                        sock_hold(sk);
 467                goto out;
 468        }
 469
 470
 471        /* Might be for an request_sock */
 472        switch (sk->sk_state) {
 473        case TCP_SYN_SENT:
 474        case TCP_SYN_RECV:
 475                /* Only in fast or simultaneous open. If a fast open socket is
 476                 * already accepted it is treated as a connected one below.
 477                 */
 478                if (fastopen && !fastopen->sk)
 479                        break;
 480
 481                ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
 482
 483                if (!sock_owned_by_user(sk)) {
 484                        sk->sk_err = err;
 485                        sk_error_report(sk);            /* Wake people up to see the error (see connect in sock.c) */
 486
 487                        tcp_done(sk);
 488                } else
 489                        sk->sk_err_soft = err;
 490                goto out;
 491        case TCP_LISTEN:
 492                break;
 493        default:
 494                /* check if this ICMP message allows revert of backoff.
 495                 * (see RFC 6069)
 496                 */
 497                if (!fastopen && type == ICMPV6_DEST_UNREACH &&
 498                    code == ICMPV6_NOROUTE)
 499                        tcp_ld_RTO_revert(sk, seq);
 500        }
 501
 502        if (!sock_owned_by_user(sk) && np->recverr) {
 503                sk->sk_err = err;
 504                sk_error_report(sk);
 505        } else
 506                sk->sk_err_soft = err;
 507
 508out:
 509        bh_unlock_sock(sk);
 510        sock_put(sk);
 511        return 0;
 512}
 513
 514
 515static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
 516                              struct flowi *fl,
 517                              struct request_sock *req,
 518                              struct tcp_fastopen_cookie *foc,
 519                              enum tcp_synack_type synack_type,
 520                              struct sk_buff *syn_skb)
 521{
 522        struct inet_request_sock *ireq = inet_rsk(req);
 523        struct ipv6_pinfo *np = tcp_inet6_sk(sk);
 524        struct ipv6_txoptions *opt;
 525        struct flowi6 *fl6 = &fl->u.ip6;
 526        struct sk_buff *skb;
 527        int err = -ENOMEM;
 528        u8 tclass;
 529
 530        /* First, grab a route. */
 531        if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
 532                                               IPPROTO_TCP)) == NULL)
 533                goto done;
 534
 535        skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
 536
 537        if (skb) {
 538                __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
 539                                    &ireq->ir_v6_rmt_addr);
 540
 541                fl6->daddr = ireq->ir_v6_rmt_addr;
 542                if (np->repflow && ireq->pktopts)
 543                        fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
 544
 545                tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
 546                                (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
 547                                (np->tclass & INET_ECN_MASK) :
 548                                np->tclass;
 549
 550                if (!INET_ECN_is_capable(tclass) &&
 551                    tcp_bpf_ca_needs_ecn((struct sock *)req))
 552                        tclass |= INET_ECN_ECT_0;
 553
 554                rcu_read_lock();
 555                opt = ireq->ipv6_opt;
 556                if (!opt)
 557                        opt = rcu_dereference(np->opt);
 558                err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
 559                               tclass, sk->sk_priority);
 560                rcu_read_unlock();
 561                err = net_xmit_eval(err);
 562        }
 563
 564done:
 565        return err;
 566}
 567
 568
 569static void tcp_v6_reqsk_destructor(struct request_sock *req)
 570{
 571        kfree(inet_rsk(req)->ipv6_opt);
 572        kfree_skb(inet_rsk(req)->pktopts);
 573}
 574
 575#ifdef CONFIG_TCP_MD5SIG
 576static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
 577                                                   const struct in6_addr *addr,
 578                                                   int l3index)
 579{
 580        return tcp_md5_do_lookup(sk, l3index,
 581                                 (union tcp_md5_addr *)addr, AF_INET6);
 582}
 583
 584static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
 585                                                const struct sock *addr_sk)
 586{
 587        int l3index;
 588
 589        l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
 590                                                 addr_sk->sk_bound_dev_if);
 591        return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
 592                                    l3index);
 593}
 594
 595static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
 596                                 sockptr_t optval, int optlen)
 597{
 598        struct tcp_md5sig cmd;
 599        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
 600        int l3index = 0;
 601        u8 prefixlen;
 602        u8 flags;
 603
 604        if (optlen < sizeof(cmd))
 605                return -EINVAL;
 606
 607        if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
 608                return -EFAULT;
 609
 610        if (sin6->sin6_family != AF_INET6)
 611                return -EINVAL;
 612
 613        flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
 614
 615        if (optname == TCP_MD5SIG_EXT &&
 616            cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
 617                prefixlen = cmd.tcpm_prefixlen;
 618                if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
 619                                        prefixlen > 32))
 620                        return -EINVAL;
 621        } else {
 622                prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
 623        }
 624
 625        if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
 626            cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
 627                struct net_device *dev;
 628
 629                rcu_read_lock();
 630                dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
 631                if (dev && netif_is_l3_master(dev))
 632                        l3index = dev->ifindex;
 633                rcu_read_unlock();
 634
 635                /* ok to reference set/not set outside of rcu;
 636                 * right now device MUST be an L3 master
 637                 */
 638                if (!dev || !l3index)
 639                        return -EINVAL;
 640        }
 641
 642        if (!cmd.tcpm_keylen) {
 643                if (ipv6_addr_v4mapped(&sin6->sin6_addr))
 644                        return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
 645                                              AF_INET, prefixlen,
 646                                              l3index, flags);
 647                return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
 648                                      AF_INET6, prefixlen, l3index, flags);
 649        }
 650
 651        if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
 652                return -EINVAL;
 653
 654        if (ipv6_addr_v4mapped(&sin6->sin6_addr))
 655                return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
 656                                      AF_INET, prefixlen, l3index, flags,
 657                                      cmd.tcpm_key, cmd.tcpm_keylen,
 658                                      GFP_KERNEL);
 659
 660        return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
 661                              AF_INET6, prefixlen, l3index, flags,
 662                              cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
 663}
 664
 665static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
 666                                   const struct in6_addr *daddr,
 667                                   const struct in6_addr *saddr,
 668                                   const struct tcphdr *th, int nbytes)
 669{
 670        struct tcp6_pseudohdr *bp;
 671        struct scatterlist sg;
 672        struct tcphdr *_th;
 673
 674        bp = hp->scratch;
 675        /* 1. TCP pseudo-header (RFC2460) */
 676        bp->saddr = *saddr;
 677        bp->daddr = *daddr;
 678        bp->protocol = cpu_to_be32(IPPROTO_TCP);
 679        bp->len = cpu_to_be32(nbytes);
 680
 681        _th = (struct tcphdr *)(bp + 1);
 682        memcpy(_th, th, sizeof(*th));
 683        _th->check = 0;
 684
 685        sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
 686        ahash_request_set_crypt(hp->md5_req, &sg, NULL,
 687                                sizeof(*bp) + sizeof(*th));
 688        return crypto_ahash_update(hp->md5_req);
 689}
 690
 691static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 692                               const struct in6_addr *daddr, struct in6_addr *saddr,
 693                               const struct tcphdr *th)
 694{
 695        struct tcp_md5sig_pool *hp;
 696        struct ahash_request *req;
 697
 698        hp = tcp_get_md5sig_pool();
 699        if (!hp)
 700                goto clear_hash_noput;
 701        req = hp->md5_req;
 702
 703        if (crypto_ahash_init(req))
 704                goto clear_hash;
 705        if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
 706                goto clear_hash;
 707        if (tcp_md5_hash_key(hp, key))
 708                goto clear_hash;
 709        ahash_request_set_crypt(req, NULL, md5_hash, 0);
 710        if (crypto_ahash_final(req))
 711                goto clear_hash;
 712
 713        tcp_put_md5sig_pool();
 714        return 0;
 715
 716clear_hash:
 717        tcp_put_md5sig_pool();
 718clear_hash_noput:
 719        memset(md5_hash, 0, 16);
 720        return 1;
 721}
 722
 723static int tcp_v6_md5_hash_skb(char *md5_hash,
 724                               const struct tcp_md5sig_key *key,
 725                               const struct sock *sk,
 726                               const struct sk_buff *skb)
 727{
 728        const struct in6_addr *saddr, *daddr;
 729        struct tcp_md5sig_pool *hp;
 730        struct ahash_request *req;
 731        const struct tcphdr *th = tcp_hdr(skb);
 732
 733        if (sk) { /* valid for establish/request sockets */
 734                saddr = &sk->sk_v6_rcv_saddr;
 735                daddr = &sk->sk_v6_daddr;
 736        } else {
 737                const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 738                saddr = &ip6h->saddr;
 739                daddr = &ip6h->daddr;
 740        }
 741
 742        hp = tcp_get_md5sig_pool();
 743        if (!hp)
 744                goto clear_hash_noput;
 745        req = hp->md5_req;
 746
 747        if (crypto_ahash_init(req))
 748                goto clear_hash;
 749
 750        if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
 751                goto clear_hash;
 752        if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
 753                goto clear_hash;
 754        if (tcp_md5_hash_key(hp, key))
 755                goto clear_hash;
 756        ahash_request_set_crypt(req, NULL, md5_hash, 0);
 757        if (crypto_ahash_final(req))
 758                goto clear_hash;
 759
 760        tcp_put_md5sig_pool();
 761        return 0;
 762
 763clear_hash:
 764        tcp_put_md5sig_pool();
 765clear_hash_noput:
 766        memset(md5_hash, 0, 16);
 767        return 1;
 768}
 769
 770#endif
 771
 772static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
 773                                    const struct sk_buff *skb,
 774                                    int dif, int sdif)
 775{
 776#ifdef CONFIG_TCP_MD5SIG
 777        const __u8 *hash_location = NULL;
 778        struct tcp_md5sig_key *hash_expected;
 779        const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 780        const struct tcphdr *th = tcp_hdr(skb);
 781        int genhash, l3index;
 782        u8 newhash[16];
 783
 784        /* sdif set, means packet ingressed via a device
 785         * in an L3 domain and dif is set to the l3mdev
 786         */
 787        l3index = sdif ? dif : 0;
 788
 789        hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
 790        hash_location = tcp_parse_md5sig_option(th);
 791
 792        /* We've parsed the options - do we have a hash? */
 793        if (!hash_expected && !hash_location)
 794                return false;
 795
 796        if (hash_expected && !hash_location) {
 797                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
 798                return true;
 799        }
 800
 801        if (!hash_expected && hash_location) {
 802                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
 803                return true;
 804        }
 805
 806        /* check the signature */
 807        genhash = tcp_v6_md5_hash_skb(newhash,
 808                                      hash_expected,
 809                                      NULL, skb);
 810
 811        if (genhash || memcmp(hash_location, newhash, 16) != 0) {
 812                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
 813                net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
 814                                     genhash ? "failed" : "mismatch",
 815                                     &ip6h->saddr, ntohs(th->source),
 816                                     &ip6h->daddr, ntohs(th->dest), l3index);
 817                return true;
 818        }
 819#endif
 820        return false;
 821}
 822
 823static void tcp_v6_init_req(struct request_sock *req,
 824                            const struct sock *sk_listener,
 825                            struct sk_buff *skb)
 826{
 827        bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
 828        struct inet_request_sock *ireq = inet_rsk(req);
 829        const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
 830
 831        ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
 832        ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
 833
 834        /* So that link locals have meaning */
 835        if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
 836            ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
 837                ireq->ir_iif = tcp_v6_iif(skb);
 838
 839        if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
 840            (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
 841             np->rxopt.bits.rxinfo ||
 842             np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
 843             np->rxopt.bits.rxohlim || np->repflow)) {
 844                refcount_inc(&skb->users);
 845                ireq->pktopts = skb;
 846        }
 847}
 848
 849static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
 850                                          struct sk_buff *skb,
 851                                          struct flowi *fl,
 852                                          struct request_sock *req)
 853{
 854        tcp_v6_init_req(req, sk, skb);
 855
 856        if (security_inet_conn_request(sk, skb, req))
 857                return NULL;
 858
 859        return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
 860}
 861
 862struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
 863        .family         =       AF_INET6,
 864        .obj_size       =       sizeof(struct tcp6_request_sock),
 865        .rtx_syn_ack    =       tcp_rtx_synack,
 866        .send_ack       =       tcp_v6_reqsk_send_ack,
 867        .destructor     =       tcp_v6_reqsk_destructor,
 868        .send_reset     =       tcp_v6_send_reset,
 869        .syn_ack_timeout =      tcp_syn_ack_timeout,
 870};
 871
 872const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
 873        .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
 874                                sizeof(struct ipv6hdr),
 875#ifdef CONFIG_TCP_MD5SIG
 876        .req_md5_lookup =       tcp_v6_md5_lookup,
 877        .calc_md5_hash  =       tcp_v6_md5_hash_skb,
 878#endif
 879#ifdef CONFIG_SYN_COOKIES
 880        .cookie_init_seq =      cookie_v6_init_sequence,
 881#endif
 882        .route_req      =       tcp_v6_route_req,
 883        .init_seq       =       tcp_v6_init_seq,
 884        .init_ts_off    =       tcp_v6_init_ts_off,
 885        .send_synack    =       tcp_v6_send_synack,
 886};
 887
 888static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
 889                                 u32 ack, u32 win, u32 tsval, u32 tsecr,
 890                                 int oif, struct tcp_md5sig_key *key, int rst,
 891                                 u8 tclass, __be32 label, u32 priority)
 892{
 893        const struct tcphdr *th = tcp_hdr(skb);
 894        struct tcphdr *t1;
 895        struct sk_buff *buff;
 896        struct flowi6 fl6;
 897        struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
 898        struct sock *ctl_sk = net->ipv6.tcp_sk;
 899        unsigned int tot_len = sizeof(struct tcphdr);
 900        __be32 mrst = 0, *topt;
 901        struct dst_entry *dst;
 902        __u32 mark = 0;
 903
 904        if (tsecr)
 905                tot_len += TCPOLEN_TSTAMP_ALIGNED;
 906#ifdef CONFIG_TCP_MD5SIG
 907        if (key)
 908                tot_len += TCPOLEN_MD5SIG_ALIGNED;
 909#endif
 910
 911#ifdef CONFIG_MPTCP
 912        if (rst && !key) {
 913                mrst = mptcp_reset_option(skb);
 914
 915                if (mrst)
 916                        tot_len += sizeof(__be32);
 917        }
 918#endif
 919
 920        buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
 921                         GFP_ATOMIC);
 922        if (!buff)
 923                return;
 924
 925        skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
 926
 927        t1 = skb_push(buff, tot_len);
 928        skb_reset_transport_header(buff);
 929
 930        /* Swap the send and the receive. */
 931        memset(t1, 0, sizeof(*t1));
 932        t1->dest = th->source;
 933        t1->source = th->dest;
 934        t1->doff = tot_len / 4;
 935        t1->seq = htonl(seq);
 936        t1->ack_seq = htonl(ack);
 937        t1->ack = !rst || !th->ack;
 938        t1->rst = rst;
 939        t1->window = htons(win);
 940
 941        topt = (__be32 *)(t1 + 1);
 942
 943        if (tsecr) {
 944                *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 945                                (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
 946                *topt++ = htonl(tsval);
 947                *topt++ = htonl(tsecr);
 948        }
 949
 950        if (mrst)
 951                *topt++ = mrst;
 952
 953#ifdef CONFIG_TCP_MD5SIG
 954        if (key) {
 955                *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 956                                (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
 957                tcp_v6_md5_hash_hdr((__u8 *)topt, key,
 958                                    &ipv6_hdr(skb)->saddr,
 959                                    &ipv6_hdr(skb)->daddr, t1);
 960        }
 961#endif
 962
 963        memset(&fl6, 0, sizeof(fl6));
 964        fl6.daddr = ipv6_hdr(skb)->saddr;
 965        fl6.saddr = ipv6_hdr(skb)->daddr;
 966        fl6.flowlabel = label;
 967
 968        buff->ip_summed = CHECKSUM_PARTIAL;
 969        buff->csum = 0;
 970
 971        __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
 972
 973        fl6.flowi6_proto = IPPROTO_TCP;
 974        if (rt6_need_strict(&fl6.daddr) && !oif)
 975                fl6.flowi6_oif = tcp_v6_iif(skb);
 976        else {
 977                if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
 978                        oif = skb->skb_iif;
 979
 980                fl6.flowi6_oif = oif;
 981        }
 982
 983        if (sk) {
 984                if (sk->sk_state == TCP_TIME_WAIT) {
 985                        mark = inet_twsk(sk)->tw_mark;
 986                        /* autoflowlabel relies on buff->hash */
 987                        skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
 988                                     PKT_HASH_TYPE_L4);
 989                } else {
 990                        mark = sk->sk_mark;
 991                }
 992                buff->tstamp = tcp_transmit_time(sk);
 993        }
 994        fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
 995        fl6.fl6_dport = t1->dest;
 996        fl6.fl6_sport = t1->source;
 997        fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
 998        security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
 999
1000        /* Pass a socket to ip6_dst_lookup either it is for RST
1001         * Underlying function will use this to retrieve the network
1002         * namespace
1003         */
1004        dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
1005        if (!IS_ERR(dst)) {
1006                skb_dst_set(buff, dst);
1007                ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
1008                         tclass & ~INET_ECN_MASK, priority);
1009                TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
1010                if (rst)
1011                        TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
1012                return;
1013        }
1014
1015        kfree_skb(buff);
1016}
1017
1018static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1019{
1020        const struct tcphdr *th = tcp_hdr(skb);
1021        struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1022        u32 seq = 0, ack_seq = 0;
1023        struct tcp_md5sig_key *key = NULL;
1024#ifdef CONFIG_TCP_MD5SIG
1025        const __u8 *hash_location = NULL;
1026        unsigned char newhash[16];
1027        int genhash;
1028        struct sock *sk1 = NULL;
1029#endif
1030        __be32 label = 0;
1031        u32 priority = 0;
1032        struct net *net;
1033        int oif = 0;
1034
1035        if (th->rst)
1036                return;
1037
1038        /* If sk not NULL, it means we did a successful lookup and incoming
1039         * route had to be correct. prequeue might have dropped our dst.
1040         */
1041        if (!sk && !ipv6_unicast_destination(skb))
1042                return;
1043
1044        net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1045#ifdef CONFIG_TCP_MD5SIG
1046        rcu_read_lock();
1047        hash_location = tcp_parse_md5sig_option(th);
1048        if (sk && sk_fullsock(sk)) {
1049                int l3index;
1050
1051                /* sdif set, means packet ingressed via a device
1052                 * in an L3 domain and inet_iif is set to it.
1053                 */
1054                l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1055                key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1056        } else if (hash_location) {
1057                int dif = tcp_v6_iif_l3_slave(skb);
1058                int sdif = tcp_v6_sdif(skb);
1059                int l3index;
1060
1061                /*
1062                 * active side is lost. Try to find listening socket through
1063                 * source port, and then find md5 key through listening socket.
1064                 * we are not loose security here:
1065                 * Incoming packet is checked with md5 hash with finding key,
1066                 * no RST generated if md5 hash doesn't match.
1067                 */
1068                sk1 = inet6_lookup_listener(net,
1069                                           &tcp_hashinfo, NULL, 0,
1070                                           &ipv6h->saddr,
1071                                           th->source, &ipv6h->daddr,
1072                                           ntohs(th->source), dif, sdif);
1073                if (!sk1)
1074                        goto out;
1075
1076                /* sdif set, means packet ingressed via a device
1077                 * in an L3 domain and dif is set to it.
1078                 */
1079                l3index = tcp_v6_sdif(skb) ? dif : 0;
1080
1081                key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1082                if (!key)
1083                        goto out;
1084
1085                genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1086                if (genhash || memcmp(hash_location, newhash, 16) != 0)
1087                        goto out;
1088        }
1089#endif
1090
1091        if (th->ack)
1092                seq = ntohl(th->ack_seq);
1093        else
1094                ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1095                          (th->doff << 2);
1096
1097        if (sk) {
1098                oif = sk->sk_bound_dev_if;
1099                if (sk_fullsock(sk)) {
1100                        const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1101
1102                        trace_tcp_send_reset(sk, skb);
1103                        if (np->repflow)
1104                                label = ip6_flowlabel(ipv6h);
1105                        priority = sk->sk_priority;
1106                }
1107                if (sk->sk_state == TCP_TIME_WAIT) {
1108                        label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1109                        priority = inet_twsk(sk)->tw_priority;
1110                }
1111        } else {
1112                if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1113                        label = ip6_flowlabel(ipv6h);
1114        }
1115
1116        tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1117                             ipv6_get_dsfield(ipv6h), label, priority);
1118
1119#ifdef CONFIG_TCP_MD5SIG
1120out:
1121        rcu_read_unlock();
1122#endif
1123}
1124
1125static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1126                            u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1127                            struct tcp_md5sig_key *key, u8 tclass,
1128                            __be32 label, u32 priority)
1129{
1130        tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1131                             tclass, label, priority);
1132}
1133
1134static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1135{
1136        struct inet_timewait_sock *tw = inet_twsk(sk);
1137        struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1138
1139        tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1140                        tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1141                        tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1142                        tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1143                        tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1144
1145        inet_twsk_put(tw);
1146}
1147
1148static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1149                                  struct request_sock *req)
1150{
1151        int l3index;
1152
1153        l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1154
1155        /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1156         * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1157         */
1158        /* RFC 7323 2.3
1159         * The window field (SEG.WND) of every outgoing segment, with the
1160         * exception of <SYN> segments, MUST be right-shifted by
1161         * Rcv.Wind.Shift bits:
1162         */
1163        tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1164                        tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1165                        tcp_rsk(req)->rcv_nxt,
1166                        req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1167                        tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1168                        req->ts_recent, sk->sk_bound_dev_if,
1169                        tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1170                        ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1171}
1172
1173
1174static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1175{
1176#ifdef CONFIG_SYN_COOKIES
1177        const struct tcphdr *th = tcp_hdr(skb);
1178
1179        if (!th->syn)
1180                sk = cookie_v6_check(sk, skb);
1181#endif
1182        return sk;
1183}
1184
1185u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1186                         struct tcphdr *th, u32 *cookie)
1187{
1188        u16 mss = 0;
1189#ifdef CONFIG_SYN_COOKIES
1190        mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1191                                    &tcp_request_sock_ipv6_ops, sk, th);
1192        if (mss) {
1193                *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1194                tcp_synq_overflow(sk);
1195        }
1196#endif
1197        return mss;
1198}
1199
1200static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1201{
1202        if (skb->protocol == htons(ETH_P_IP))
1203                return tcp_v4_conn_request(sk, skb);
1204
1205        if (!ipv6_unicast_destination(skb))
1206                goto drop;
1207
1208        if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1209                __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1210                return 0;
1211        }
1212
1213        return tcp_conn_request(&tcp6_request_sock_ops,
1214                                &tcp_request_sock_ipv6_ops, sk, skb);
1215
1216drop:
1217        tcp_listendrop(sk);
1218        return 0; /* don't send reset */
1219}
1220
1221static void tcp_v6_restore_cb(struct sk_buff *skb)
1222{
1223        /* We need to move header back to the beginning if xfrm6_policy_check()
1224         * and tcp_v6_fill_cb() are going to be called again.
1225         * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1226         */
1227        memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1228                sizeof(struct inet6_skb_parm));
1229}
1230
1231static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1232                                         struct request_sock *req,
1233                                         struct dst_entry *dst,
1234                                         struct request_sock *req_unhash,
1235                                         bool *own_req)
1236{
1237        struct inet_request_sock *ireq;
1238        struct ipv6_pinfo *newnp;
1239        const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1240        struct ipv6_txoptions *opt;
1241        struct inet_sock *newinet;
1242        bool found_dup_sk = false;
1243        struct tcp_sock *newtp;
1244        struct sock *newsk;
1245#ifdef CONFIG_TCP_MD5SIG
1246        struct tcp_md5sig_key *key;
1247        int l3index;
1248#endif
1249        struct flowi6 fl6;
1250
1251        if (skb->protocol == htons(ETH_P_IP)) {
1252                /*
1253                 *      v6 mapped
1254                 */
1255
1256                newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1257                                             req_unhash, own_req);
1258
1259                if (!newsk)
1260                        return NULL;
1261
1262                inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1263
1264                newinet = inet_sk(newsk);
1265                newnp = tcp_inet6_sk(newsk);
1266                newtp = tcp_sk(newsk);
1267
1268                memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1269
1270                newnp->saddr = newsk->sk_v6_rcv_saddr;
1271
1272                inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1273                if (sk_is_mptcp(newsk))
1274                        mptcpv6_handle_mapped(newsk, true);
1275                newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1276#ifdef CONFIG_TCP_MD5SIG
1277                newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1278#endif
1279
1280                newnp->ipv6_mc_list = NULL;
1281                newnp->ipv6_ac_list = NULL;
1282                newnp->ipv6_fl_list = NULL;
1283                newnp->pktoptions  = NULL;
1284                newnp->opt         = NULL;
1285                newnp->mcast_oif   = inet_iif(skb);
1286                newnp->mcast_hops  = ip_hdr(skb)->ttl;
1287                newnp->rcv_flowinfo = 0;
1288                if (np->repflow)
1289                        newnp->flow_label = 0;
1290
1291                /*
1292                 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1293                 * here, tcp_create_openreq_child now does this for us, see the comment in
1294                 * that function for the gory details. -acme
1295                 */
1296
1297                /* It is tricky place. Until this moment IPv4 tcp
1298                   worked with IPv6 icsk.icsk_af_ops.
1299                   Sync it now.
1300                 */
1301                tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1302
1303                return newsk;
1304        }
1305
1306        ireq = inet_rsk(req);
1307
1308        if (sk_acceptq_is_full(sk))
1309                goto out_overflow;
1310
1311        if (!dst) {
1312                dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1313                if (!dst)
1314                        goto out;
1315        }
1316
1317        newsk = tcp_create_openreq_child(sk, req, skb);
1318        if (!newsk)
1319                goto out_nonewsk;
1320
1321        /*
1322         * No need to charge this sock to the relevant IPv6 refcnt debug socks
1323         * count here, tcp_create_openreq_child now does this for us, see the
1324         * comment in that function for the gory details. -acme
1325         */
1326
1327        newsk->sk_gso_type = SKB_GSO_TCPV6;
1328        ip6_dst_store(newsk, dst, NULL, NULL);
1329        inet6_sk_rx_dst_set(newsk, skb);
1330
1331        inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1332
1333        newtp = tcp_sk(newsk);
1334        newinet = inet_sk(newsk);
1335        newnp = tcp_inet6_sk(newsk);
1336
1337        memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1338
1339        newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1340        newnp->saddr = ireq->ir_v6_loc_addr;
1341        newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1342        newsk->sk_bound_dev_if = ireq->ir_iif;
1343
1344        /* Now IPv6 options...
1345
1346           First: no IPv4 options.
1347         */
1348        newinet->inet_opt = NULL;
1349        newnp->ipv6_mc_list = NULL;
1350        newnp->ipv6_ac_list = NULL;
1351        newnp->ipv6_fl_list = NULL;
1352
1353        /* Clone RX bits */
1354        newnp->rxopt.all = np->rxopt.all;
1355
1356        newnp->pktoptions = NULL;
1357        newnp->opt        = NULL;
1358        newnp->mcast_oif  = tcp_v6_iif(skb);
1359        newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1360        newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1361        if (np->repflow)
1362                newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1363
1364        /* Set ToS of the new socket based upon the value of incoming SYN.
1365         * ECT bits are set later in tcp_init_transfer().
1366         */
1367        if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
1368                newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1369
1370        /* Clone native IPv6 options from listening socket (if any)
1371
1372           Yes, keeping reference count would be much more clever,
1373           but we make one more one thing there: reattach optmem
1374           to newsk.
1375         */
1376        opt = ireq->ipv6_opt;
1377        if (!opt)
1378                opt = rcu_dereference(np->opt);
1379        if (opt) {
1380                opt = ipv6_dup_options(newsk, opt);
1381                RCU_INIT_POINTER(newnp->opt, opt);
1382        }
1383        inet_csk(newsk)->icsk_ext_hdr_len = 0;
1384        if (opt)
1385                inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1386                                                    opt->opt_flen;
1387
1388        tcp_ca_openreq_child(newsk, dst);
1389
1390        tcp_sync_mss(newsk, dst_mtu(dst));
1391        newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1392
1393        tcp_initialize_rcv_mss(newsk);
1394
1395        newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1396        newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1397
1398#ifdef CONFIG_TCP_MD5SIG
1399        l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1400
1401        /* Copy over the MD5 key from the original socket */
1402        key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1403        if (key) {
1404                /* We're using one, so create a matching key
1405                 * on the newsk structure. If we fail to get
1406                 * memory, then we end up not copying the key
1407                 * across. Shucks.
1408                 */
1409                tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1410                               AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
1411                               sk_gfp_mask(sk, GFP_ATOMIC));
1412        }
1413#endif
1414
1415        if (__inet_inherit_port(sk, newsk) < 0) {
1416                inet_csk_prepare_forced_close(newsk);
1417                tcp_done(newsk);
1418                goto out;
1419        }
1420        *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1421                                       &found_dup_sk);
1422        if (*own_req) {
1423                tcp_move_syn(newtp, req);
1424
1425                /* Clone pktoptions received with SYN, if we own the req */
1426                if (ireq->pktopts) {
1427                        newnp->pktoptions = skb_clone(ireq->pktopts,
1428                                                      sk_gfp_mask(sk, GFP_ATOMIC));
1429                        consume_skb(ireq->pktopts);
1430                        ireq->pktopts = NULL;
1431                        if (newnp->pktoptions) {
1432                                tcp_v6_restore_cb(newnp->pktoptions);
1433                                skb_set_owner_r(newnp->pktoptions, newsk);
1434                        }
1435                }
1436        } else {
1437                if (!req_unhash && found_dup_sk) {
1438                        /* This code path should only be executed in the
1439                         * syncookie case only
1440                         */
1441                        bh_unlock_sock(newsk);
1442                        sock_put(newsk);
1443                        newsk = NULL;
1444                }
1445        }
1446
1447        return newsk;
1448
1449out_overflow:
1450        __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1451out_nonewsk:
1452        dst_release(dst);
1453out:
1454        tcp_listendrop(sk);
1455        return NULL;
1456}
1457
1458INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1459                                                           u32));
1460/* The socket must have it's spinlock held when we get
1461 * here, unless it is a TCP_LISTEN socket.
1462 *
1463 * We have a potential double-lock case here, so even when
1464 * doing backlog processing we use the BH locking scheme.
1465 * This is because we cannot sleep with the original spinlock
1466 * held.
1467 */
1468static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1469{
1470        struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1471        struct sk_buff *opt_skb = NULL;
1472        struct tcp_sock *tp;
1473
1474        /* Imagine: socket is IPv6. IPv4 packet arrives,
1475           goes to IPv4 receive handler and backlogged.
1476           From backlog it always goes here. Kerboom...
1477           Fortunately, tcp_rcv_established and rcv_established
1478           handle them correctly, but it is not case with
1479           tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1480         */
1481
1482        if (skb->protocol == htons(ETH_P_IP))
1483                return tcp_v4_do_rcv(sk, skb);
1484
1485        /*
1486         *      socket locking is here for SMP purposes as backlog rcv
1487         *      is currently called with bh processing disabled.
1488         */
1489
1490        /* Do Stevens' IPV6_PKTOPTIONS.
1491
1492           Yes, guys, it is the only place in our code, where we
1493           may make it not affecting IPv4.
1494           The rest of code is protocol independent,
1495           and I do not like idea to uglify IPv4.
1496
1497           Actually, all the idea behind IPV6_PKTOPTIONS
1498           looks not very well thought. For now we latch
1499           options, received in the last packet, enqueued
1500           by tcp. Feel free to propose better solution.
1501                                               --ANK (980728)
1502         */
1503        if (np->rxopt.all)
1504                opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1505
1506        if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1507                struct dst_entry *dst = sk->sk_rx_dst;
1508
1509                sock_rps_save_rxhash(sk, skb);
1510                sk_mark_napi_id(sk, skb);
1511                if (dst) {
1512                        if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1513                            INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1514                                            dst, np->rx_dst_cookie) == NULL) {
1515                                dst_release(dst);
1516                                sk->sk_rx_dst = NULL;
1517                        }
1518                }
1519
1520                tcp_rcv_established(sk, skb);
1521                if (opt_skb)
1522                        goto ipv6_pktoptions;
1523                return 0;
1524        }
1525
1526        if (tcp_checksum_complete(skb))
1527                goto csum_err;
1528
1529        if (sk->sk_state == TCP_LISTEN) {
1530                struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1531
1532                if (!nsk)
1533                        goto discard;
1534
1535                if (nsk != sk) {
1536                        if (tcp_child_process(sk, nsk, skb))
1537                                goto reset;
1538                        if (opt_skb)
1539                                __kfree_skb(opt_skb);
1540                        return 0;
1541                }
1542        } else
1543                sock_rps_save_rxhash(sk, skb);
1544
1545        if (tcp_rcv_state_process(sk, skb))
1546                goto reset;
1547        if (opt_skb)
1548                goto ipv6_pktoptions;
1549        return 0;
1550
1551reset:
1552        tcp_v6_send_reset(sk, skb);
1553discard:
1554        if (opt_skb)
1555                __kfree_skb(opt_skb);
1556        kfree_skb(skb);
1557        return 0;
1558csum_err:
1559        trace_tcp_bad_csum(skb);
1560        TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1561        TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1562        goto discard;
1563
1564
1565ipv6_pktoptions:
1566        /* Do you ask, what is it?
1567
1568           1. skb was enqueued by tcp.
1569           2. skb is added to tail of read queue, rather than out of order.
1570           3. socket is not in passive state.
1571           4. Finally, it really contains options, which user wants to receive.
1572         */
1573        tp = tcp_sk(sk);
1574        if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1575            !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1576                if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1577                        np->mcast_oif = tcp_v6_iif(opt_skb);
1578                if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1579                        np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1580                if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1581                        np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1582                if (np->repflow)
1583                        np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1584                if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1585                        skb_set_owner_r(opt_skb, sk);
1586                        tcp_v6_restore_cb(opt_skb);
1587                        opt_skb = xchg(&np->pktoptions, opt_skb);
1588                } else {
1589                        __kfree_skb(opt_skb);
1590                        opt_skb = xchg(&np->pktoptions, NULL);
1591                }
1592        }
1593
1594        kfree_skb(opt_skb);
1595        return 0;
1596}
1597
1598static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1599                           const struct tcphdr *th)
1600{
1601        /* This is tricky: we move IP6CB at its correct location into
1602         * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1603         * _decode_session6() uses IP6CB().
1604         * barrier() makes sure compiler won't play aliasing games.
1605         */
1606        memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1607                sizeof(struct inet6_skb_parm));
1608        barrier();
1609
1610        TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1611        TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1612                                    skb->len - th->doff*4);
1613        TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1614        TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1615        TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1616        TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1617        TCP_SKB_CB(skb)->sacked = 0;
1618        TCP_SKB_CB(skb)->has_rxtstamp =
1619                        skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1620}
1621
1622INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1623{
1624        struct sk_buff *skb_to_free;
1625        int sdif = inet6_sdif(skb);
1626        int dif = inet6_iif(skb);
1627        const struct tcphdr *th;
1628        const struct ipv6hdr *hdr;
1629        bool refcounted;
1630        struct sock *sk;
1631        int ret;
1632        struct net *net = dev_net(skb->dev);
1633
1634        if (skb->pkt_type != PACKET_HOST)
1635                goto discard_it;
1636
1637        /*
1638         *      Count it even if it's bad.
1639         */
1640        __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1641
1642        if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1643                goto discard_it;
1644
1645        th = (const struct tcphdr *)skb->data;
1646
1647        if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1648                goto bad_packet;
1649        if (!pskb_may_pull(skb, th->doff*4))
1650                goto discard_it;
1651
1652        if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1653                goto csum_error;
1654
1655        th = (const struct tcphdr *)skb->data;
1656        hdr = ipv6_hdr(skb);
1657
1658lookup:
1659        sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1660                                th->source, th->dest, inet6_iif(skb), sdif,
1661                                &refcounted);
1662        if (!sk)
1663                goto no_tcp_socket;
1664
1665process:
1666        if (sk->sk_state == TCP_TIME_WAIT)
1667                goto do_time_wait;
1668
1669        if (sk->sk_state == TCP_NEW_SYN_RECV) {
1670                struct request_sock *req = inet_reqsk(sk);
1671                bool req_stolen = false;
1672                struct sock *nsk;
1673
1674                sk = req->rsk_listener;
1675                if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1676                        sk_drops_add(sk, skb);
1677                        reqsk_put(req);
1678                        goto discard_it;
1679                }
1680                if (tcp_checksum_complete(skb)) {
1681                        reqsk_put(req);
1682                        goto csum_error;
1683                }
1684                if (unlikely(sk->sk_state != TCP_LISTEN)) {
1685                        nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1686                        if (!nsk) {
1687                                inet_csk_reqsk_queue_drop_and_put(sk, req);
1688                                goto lookup;
1689                        }
1690                        sk = nsk;
1691                        /* reuseport_migrate_sock() has already held one sk_refcnt
1692                         * before returning.
1693                         */
1694                } else {
1695                        sock_hold(sk);
1696                }
1697                refcounted = true;
1698                nsk = NULL;
1699                if (!tcp_filter(sk, skb)) {
1700                        th = (const struct tcphdr *)skb->data;
1701                        hdr = ipv6_hdr(skb);
1702                        tcp_v6_fill_cb(skb, hdr, th);
1703                        nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1704                }
1705                if (!nsk) {
1706                        reqsk_put(req);
1707                        if (req_stolen) {
1708                                /* Another cpu got exclusive access to req
1709                                 * and created a full blown socket.
1710                                 * Try to feed this packet to this socket
1711                                 * instead of discarding it.
1712                                 */
1713                                tcp_v6_restore_cb(skb);
1714                                sock_put(sk);
1715                                goto lookup;
1716                        }
1717                        goto discard_and_relse;
1718                }
1719                if (nsk == sk) {
1720                        reqsk_put(req);
1721                        tcp_v6_restore_cb(skb);
1722                } else if (tcp_child_process(sk, nsk, skb)) {
1723                        tcp_v6_send_reset(nsk, skb);
1724                        goto discard_and_relse;
1725                } else {
1726                        sock_put(sk);
1727                        return 0;
1728                }
1729        }
1730        if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1731                __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1732                goto discard_and_relse;
1733        }
1734
1735        if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1736                goto discard_and_relse;
1737
1738        if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1739                goto discard_and_relse;
1740
1741        if (tcp_filter(sk, skb))
1742                goto discard_and_relse;
1743        th = (const struct tcphdr *)skb->data;
1744        hdr = ipv6_hdr(skb);
1745        tcp_v6_fill_cb(skb, hdr, th);
1746
1747        skb->dev = NULL;
1748
1749        if (sk->sk_state == TCP_LISTEN) {
1750                ret = tcp_v6_do_rcv(sk, skb);
1751                goto put_and_return;
1752        }
1753
1754        sk_incoming_cpu_update(sk);
1755
1756        bh_lock_sock_nested(sk);
1757        tcp_segs_in(tcp_sk(sk), skb);
1758        ret = 0;
1759        if (!sock_owned_by_user(sk)) {
1760                skb_to_free = sk->sk_rx_skb_cache;
1761                sk->sk_rx_skb_cache = NULL;
1762                ret = tcp_v6_do_rcv(sk, skb);
1763        } else {
1764                if (tcp_add_backlog(sk, skb))
1765                        goto discard_and_relse;
1766                skb_to_free = NULL;
1767        }
1768        bh_unlock_sock(sk);
1769        if (skb_to_free)
1770                __kfree_skb(skb_to_free);
1771put_and_return:
1772        if (refcounted)
1773                sock_put(sk);
1774        return ret ? -1 : 0;
1775
1776no_tcp_socket:
1777        if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1778                goto discard_it;
1779
1780        tcp_v6_fill_cb(skb, hdr, th);
1781
1782        if (tcp_checksum_complete(skb)) {
1783csum_error:
1784                trace_tcp_bad_csum(skb);
1785                __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1786bad_packet:
1787                __TCP_INC_STATS(net, TCP_MIB_INERRS);
1788        } else {
1789                tcp_v6_send_reset(NULL, skb);
1790        }
1791
1792discard_it:
1793        kfree_skb(skb);
1794        return 0;
1795
1796discard_and_relse:
1797        sk_drops_add(sk, skb);
1798        if (refcounted)
1799                sock_put(sk);
1800        goto discard_it;
1801
1802do_time_wait:
1803        if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1804                inet_twsk_put(inet_twsk(sk));
1805                goto discard_it;
1806        }
1807
1808        tcp_v6_fill_cb(skb, hdr, th);
1809
1810        if (tcp_checksum_complete(skb)) {
1811                inet_twsk_put(inet_twsk(sk));
1812                goto csum_error;
1813        }
1814
1815        switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1816        case TCP_TW_SYN:
1817        {
1818                struct sock *sk2;
1819
1820                sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1821                                            skb, __tcp_hdrlen(th),
1822                                            &ipv6_hdr(skb)->saddr, th->source,
1823                                            &ipv6_hdr(skb)->daddr,
1824                                            ntohs(th->dest),
1825                                            tcp_v6_iif_l3_slave(skb),
1826                                            sdif);
1827                if (sk2) {
1828                        struct inet_timewait_sock *tw = inet_twsk(sk);
1829                        inet_twsk_deschedule_put(tw);
1830                        sk = sk2;
1831                        tcp_v6_restore_cb(skb);
1832                        refcounted = false;
1833                        goto process;
1834                }
1835        }
1836                /* to ACK */
1837                fallthrough;
1838        case TCP_TW_ACK:
1839                tcp_v6_timewait_ack(sk, skb);
1840                break;
1841        case TCP_TW_RST:
1842                tcp_v6_send_reset(sk, skb);
1843                inet_twsk_deschedule_put(inet_twsk(sk));
1844                goto discard_it;
1845        case TCP_TW_SUCCESS:
1846                ;
1847        }
1848        goto discard_it;
1849}
1850
1851INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1852{
1853        const struct ipv6hdr *hdr;
1854        const struct tcphdr *th;
1855        struct sock *sk;
1856
1857        if (skb->pkt_type != PACKET_HOST)
1858                return;
1859
1860        if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1861                return;
1862
1863        hdr = ipv6_hdr(skb);
1864        th = tcp_hdr(skb);
1865
1866        if (th->doff < sizeof(struct tcphdr) / 4)
1867                return;
1868
1869        /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1870        sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1871                                        &hdr->saddr, th->source,
1872                                        &hdr->daddr, ntohs(th->dest),
1873                                        inet6_iif(skb), inet6_sdif(skb));
1874        if (sk) {
1875                skb->sk = sk;
1876                skb->destructor = sock_edemux;
1877                if (sk_fullsock(sk)) {
1878                        struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1879
1880                        if (dst)
1881                                dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1882                        if (dst &&
1883                            inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1884                                skb_dst_set_noref(skb, dst);
1885                }
1886        }
1887}
1888
1889static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1890        .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1891        .twsk_unique    = tcp_twsk_unique,
1892        .twsk_destructor = tcp_twsk_destructor,
1893};
1894
1895INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1896{
1897        struct ipv6_pinfo *np = inet6_sk(sk);
1898
1899        __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1900}
1901
1902const struct inet_connection_sock_af_ops ipv6_specific = {
1903        .queue_xmit        = inet6_csk_xmit,
1904        .send_check        = tcp_v6_send_check,
1905        .rebuild_header    = inet6_sk_rebuild_header,
1906        .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1907        .conn_request      = tcp_v6_conn_request,
1908        .syn_recv_sock     = tcp_v6_syn_recv_sock,
1909        .net_header_len    = sizeof(struct ipv6hdr),
1910        .net_frag_header_len = sizeof(struct frag_hdr),
1911        .setsockopt        = ipv6_setsockopt,
1912        .getsockopt        = ipv6_getsockopt,
1913        .addr2sockaddr     = inet6_csk_addr2sockaddr,
1914        .sockaddr_len      = sizeof(struct sockaddr_in6),
1915        .mtu_reduced       = tcp_v6_mtu_reduced,
1916};
1917
1918#ifdef CONFIG_TCP_MD5SIG
1919static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1920        .md5_lookup     =       tcp_v6_md5_lookup,
1921        .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1922        .md5_parse      =       tcp_v6_parse_md5_keys,
1923};
1924#endif
1925
1926/*
1927 *      TCP over IPv4 via INET6 API
1928 */
1929static const struct inet_connection_sock_af_ops ipv6_mapped = {
1930        .queue_xmit        = ip_queue_xmit,
1931        .send_check        = tcp_v4_send_check,
1932        .rebuild_header    = inet_sk_rebuild_header,
1933        .sk_rx_dst_set     = inet_sk_rx_dst_set,
1934        .conn_request      = tcp_v6_conn_request,
1935        .syn_recv_sock     = tcp_v6_syn_recv_sock,
1936        .net_header_len    = sizeof(struct iphdr),
1937        .setsockopt        = ipv6_setsockopt,
1938        .getsockopt        = ipv6_getsockopt,
1939        .addr2sockaddr     = inet6_csk_addr2sockaddr,
1940        .sockaddr_len      = sizeof(struct sockaddr_in6),
1941        .mtu_reduced       = tcp_v4_mtu_reduced,
1942};
1943
1944#ifdef CONFIG_TCP_MD5SIG
1945static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1946        .md5_lookup     =       tcp_v4_md5_lookup,
1947        .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1948        .md5_parse      =       tcp_v6_parse_md5_keys,
1949};
1950#endif
1951
1952/* NOTE: A lot of things set to zero explicitly by call to
1953 *       sk_alloc() so need not be done here.
1954 */
1955static int tcp_v6_init_sock(struct sock *sk)
1956{
1957        struct inet_connection_sock *icsk = inet_csk(sk);
1958
1959        tcp_init_sock(sk);
1960
1961        icsk->icsk_af_ops = &ipv6_specific;
1962
1963#ifdef CONFIG_TCP_MD5SIG
1964        tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1965#endif
1966
1967        return 0;
1968}
1969
1970static void tcp_v6_destroy_sock(struct sock *sk)
1971{
1972        tcp_v4_destroy_sock(sk);
1973        inet6_destroy_sock(sk);
1974}
1975
1976#ifdef CONFIG_PROC_FS
1977/* Proc filesystem TCPv6 sock list dumping. */
1978static void get_openreq6(struct seq_file *seq,
1979                         const struct request_sock *req, int i)
1980{
1981        long ttd = req->rsk_timer.expires - jiffies;
1982        const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1983        const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1984
1985        if (ttd < 0)
1986                ttd = 0;
1987
1988        seq_printf(seq,
1989                   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1990                   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1991                   i,
1992                   src->s6_addr32[0], src->s6_addr32[1],
1993                   src->s6_addr32[2], src->s6_addr32[3],
1994                   inet_rsk(req)->ir_num,
1995                   dest->s6_addr32[0], dest->s6_addr32[1],
1996                   dest->s6_addr32[2], dest->s6_addr32[3],
1997                   ntohs(inet_rsk(req)->ir_rmt_port),
1998                   TCP_SYN_RECV,
1999                   0, 0, /* could print option size, but that is af dependent. */
2000                   1,   /* timers active (only the expire timer) */
2001                   jiffies_to_clock_t(ttd),
2002                   req->num_timeout,
2003                   from_kuid_munged(seq_user_ns(seq),
2004                                    sock_i_uid(req->rsk_listener)),
2005                   0,  /* non standard timer */
2006                   0, /* open_requests have no inode */
2007                   0, req);
2008}
2009
2010static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2011{
2012        const struct in6_addr *dest, *src;
2013        __u16 destp, srcp;
2014        int timer_active;
2015        unsigned long timer_expires;
2016        const struct inet_sock *inet = inet_sk(sp);
2017        const struct tcp_sock *tp = tcp_sk(sp);
2018        const struct inet_connection_sock *icsk = inet_csk(sp);
2019        const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2020        int rx_queue;
2021        int state;
2022
2023        dest  = &sp->sk_v6_daddr;
2024        src   = &sp->sk_v6_rcv_saddr;
2025        destp = ntohs(inet->inet_dport);
2026        srcp  = ntohs(inet->inet_sport);
2027
2028        if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2029            icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2030            icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2031                timer_active    = 1;
2032                timer_expires   = icsk->icsk_timeout;
2033        } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2034                timer_active    = 4;
2035                timer_expires   = icsk->icsk_timeout;
2036        } else if (timer_pending(&sp->sk_timer)) {
2037                timer_active    = 2;
2038                timer_expires   = sp->sk_timer.expires;
2039        } else {
2040                timer_active    = 0;
2041                timer_expires = jiffies;
2042        }
2043
2044        state = inet_sk_state_load(sp);
2045        if (state == TCP_LISTEN)
2046                rx_queue = READ_ONCE(sp->sk_ack_backlog);
2047        else
2048                /* Because we don't lock the socket,
2049                 * we might find a transient negative value.
2050                 */
2051                rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2052                                      READ_ONCE(tp->copied_seq), 0);
2053
2054        seq_printf(seq,
2055                   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2056                   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2057                   i,
2058                   src->s6_addr32[0], src->s6_addr32[1],
2059                   src->s6_addr32[2], src->s6_addr32[3], srcp,
2060                   dest->s6_addr32[0], dest->s6_addr32[1],
2061                   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2062                   state,
2063                   READ_ONCE(tp->write_seq) - tp->snd_una,
2064                   rx_queue,
2065                   timer_active,
2066                   jiffies_delta_to_clock_t(timer_expires - jiffies),
2067                   icsk->icsk_retransmits,
2068                   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2069                   icsk->icsk_probes_out,
2070                   sock_i_ino(sp),
2071                   refcount_read(&sp->sk_refcnt), sp,
2072                   jiffies_to_clock_t(icsk->icsk_rto),
2073                   jiffies_to_clock_t(icsk->icsk_ack.ato),
2074                   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2075                   tp->snd_cwnd,
2076                   state == TCP_LISTEN ?
2077                        fastopenq->max_qlen :
2078                        (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2079                   );
2080}
2081
2082static void get_timewait6_sock(struct seq_file *seq,
2083                               struct inet_timewait_sock *tw, int i)
2084{
2085        long delta = tw->tw_timer.expires - jiffies;
2086        const struct in6_addr *dest, *src;
2087        __u16 destp, srcp;
2088
2089        dest = &tw->tw_v6_daddr;
2090        src  = &tw->tw_v6_rcv_saddr;
2091        destp = ntohs(tw->tw_dport);
2092        srcp  = ntohs(tw->tw_sport);
2093
2094        seq_printf(seq,
2095                   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2096                   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2097                   i,
2098                   src->s6_addr32[0], src->s6_addr32[1],
2099                   src->s6_addr32[2], src->s6_addr32[3], srcp,
2100                   dest->s6_addr32[0], dest->s6_addr32[1],
2101                   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2102                   tw->tw_substate, 0, 0,
2103                   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2104                   refcount_read(&tw->tw_refcnt), tw);
2105}
2106
2107static int tcp6_seq_show(struct seq_file *seq, void *v)
2108{
2109        struct tcp_iter_state *st;
2110        struct sock *sk = v;
2111
2112        if (v == SEQ_START_TOKEN) {
2113                seq_puts(seq,
2114                         "  sl  "
2115                         "local_address                         "
2116                         "remote_address                        "
2117                         "st tx_queue rx_queue tr tm->when retrnsmt"
2118                         "   uid  timeout inode\n");
2119                goto out;
2120        }
2121        st = seq->private;
2122
2123        if (sk->sk_state == TCP_TIME_WAIT)
2124                get_timewait6_sock(seq, v, st->num);
2125        else if (sk->sk_state == TCP_NEW_SYN_RECV)
2126                get_openreq6(seq, v, st->num);
2127        else
2128                get_tcp6_sock(seq, v, st->num);
2129out:
2130        return 0;
2131}
2132
2133static const struct seq_operations tcp6_seq_ops = {
2134        .show           = tcp6_seq_show,
2135        .start          = tcp_seq_start,
2136        .next           = tcp_seq_next,
2137        .stop           = tcp_seq_stop,
2138};
2139
2140static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2141        .family         = AF_INET6,
2142};
2143
2144int __net_init tcp6_proc_init(struct net *net)
2145{
2146        if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2147                        sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2148                return -ENOMEM;
2149        return 0;
2150}
2151
2152void tcp6_proc_exit(struct net *net)
2153{
2154        remove_proc_entry("tcp6", net->proc_net);
2155}
2156#endif
2157
2158struct proto tcpv6_prot = {
2159        .name                   = "TCPv6",
2160        .owner                  = THIS_MODULE,
2161        .close                  = tcp_close,
2162        .pre_connect            = tcp_v6_pre_connect,
2163        .connect                = tcp_v6_connect,
2164        .disconnect             = tcp_disconnect,
2165        .accept                 = inet_csk_accept,
2166        .ioctl                  = tcp_ioctl,
2167        .init                   = tcp_v6_init_sock,
2168        .destroy                = tcp_v6_destroy_sock,
2169        .shutdown               = tcp_shutdown,
2170        .setsockopt             = tcp_setsockopt,
2171        .getsockopt             = tcp_getsockopt,
2172        .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2173        .keepalive              = tcp_set_keepalive,
2174        .recvmsg                = tcp_recvmsg,
2175        .sendmsg                = tcp_sendmsg,
2176        .sendpage               = tcp_sendpage,
2177        .backlog_rcv            = tcp_v6_do_rcv,
2178        .release_cb             = tcp_release_cb,
2179        .hash                   = inet6_hash,
2180        .unhash                 = inet_unhash,
2181        .get_port               = inet_csk_get_port,
2182#ifdef CONFIG_BPF_SYSCALL
2183        .psock_update_sk_prot   = tcp_bpf_update_proto,
2184#endif
2185        .enter_memory_pressure  = tcp_enter_memory_pressure,
2186        .leave_memory_pressure  = tcp_leave_memory_pressure,
2187        .stream_memory_free     = tcp_stream_memory_free,
2188        .sockets_allocated      = &tcp_sockets_allocated,
2189        .memory_allocated       = &tcp_memory_allocated,
2190        .memory_pressure        = &tcp_memory_pressure,
2191        .orphan_count           = &tcp_orphan_count,
2192        .sysctl_mem             = sysctl_tcp_mem,
2193        .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2194        .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2195        .max_header             = MAX_TCP_HEADER,
2196        .obj_size               = sizeof(struct tcp6_sock),
2197        .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2198        .twsk_prot              = &tcp6_timewait_sock_ops,
2199        .rsk_prot               = &tcp6_request_sock_ops,
2200        .h.hashinfo             = &tcp_hashinfo,
2201        .no_autobind            = true,
2202        .diag_destroy           = tcp_abort,
2203};
2204EXPORT_SYMBOL_GPL(tcpv6_prot);
2205
2206/* thinking of making this const? Don't.
2207 * early_demux can change based on sysctl.
2208 */
2209static struct inet6_protocol tcpv6_protocol = {
2210        .early_demux    =       tcp_v6_early_demux,
2211        .early_demux_handler =  tcp_v6_early_demux,
2212        .handler        =       tcp_v6_rcv,
2213        .err_handler    =       tcp_v6_err,
2214        .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2215};
2216
2217static struct inet_protosw tcpv6_protosw = {
2218        .type           =       SOCK_STREAM,
2219        .protocol       =       IPPROTO_TCP,
2220        .prot           =       &tcpv6_prot,
2221        .ops            =       &inet6_stream_ops,
2222        .flags          =       INET_PROTOSW_PERMANENT |
2223                                INET_PROTOSW_ICSK,
2224};
2225
2226static int __net_init tcpv6_net_init(struct net *net)
2227{
2228        return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2229                                    SOCK_RAW, IPPROTO_TCP, net);
2230}
2231
2232static void __net_exit tcpv6_net_exit(struct net *net)
2233{
2234        inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2235}
2236
2237static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2238{
2239        inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2240}
2241
2242static struct pernet_operations tcpv6_net_ops = {
2243        .init       = tcpv6_net_init,
2244        .exit       = tcpv6_net_exit,
2245        .exit_batch = tcpv6_net_exit_batch,
2246};
2247
2248int __init tcpv6_init(void)
2249{
2250        int ret;
2251
2252        ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2253        if (ret)
2254                goto out;
2255
2256        /* register inet6 protocol */
2257        ret = inet6_register_protosw(&tcpv6_protosw);
2258        if (ret)
2259                goto out_tcpv6_protocol;
2260
2261        ret = register_pernet_subsys(&tcpv6_net_ops);
2262        if (ret)
2263                goto out_tcpv6_protosw;
2264
2265        ret = mptcpv6_init();
2266        if (ret)
2267                goto out_tcpv6_pernet_subsys;
2268
2269out:
2270        return ret;
2271
2272out_tcpv6_pernet_subsys:
2273        unregister_pernet_subsys(&tcpv6_net_ops);
2274out_tcpv6_protosw:
2275        inet6_unregister_protosw(&tcpv6_protosw);
2276out_tcpv6_protocol:
2277        inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2278        goto out;
2279}
2280
2281void tcpv6_exit(void)
2282{
2283        unregister_pernet_subsys(&tcpv6_net_ops);
2284        inet6_unregister_protosw(&tcpv6_protosw);
2285        inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2286}
2287