linux/net/ipv6/tcp_ipv6.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *      TCP over IPv6
   4 *      Linux INET6 implementation
   5 *
   6 *      Authors:
   7 *      Pedro Roque             <roque@di.fc.ul.pt>
   8 *
   9 *      Based on:
  10 *      linux/net/ipv4/tcp.c
  11 *      linux/net/ipv4/tcp_input.c
  12 *      linux/net/ipv4/tcp_output.c
  13 *
  14 *      Fixes:
  15 *      Hideaki YOSHIFUJI       :       sin6_scope_id support
  16 *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  17 *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  18 *                                      a single port at the same time.
  19 *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
  20 */
  21
  22#include <linux/bottom_half.h>
  23#include <linux/module.h>
  24#include <linux/errno.h>
  25#include <linux/types.h>
  26#include <linux/socket.h>
  27#include <linux/sockios.h>
  28#include <linux/net.h>
  29#include <linux/jiffies.h>
  30#include <linux/in.h>
  31#include <linux/in6.h>
  32#include <linux/netdevice.h>
  33#include <linux/init.h>
  34#include <linux/jhash.h>
  35#include <linux/ipsec.h>
  36#include <linux/times.h>
  37#include <linux/slab.h>
  38#include <linux/uaccess.h>
  39#include <linux/ipv6.h>
  40#include <linux/icmpv6.h>
  41#include <linux/random.h>
  42#include <linux/indirect_call_wrapper.h>
  43
  44#include <net/tcp.h>
  45#include <net/ndisc.h>
  46#include <net/inet6_hashtables.h>
  47#include <net/inet6_connection_sock.h>
  48#include <net/ipv6.h>
  49#include <net/transp_v6.h>
  50#include <net/addrconf.h>
  51#include <net/ip6_route.h>
  52#include <net/ip6_checksum.h>
  53#include <net/inet_ecn.h>
  54#include <net/protocol.h>
  55#include <net/xfrm.h>
  56#include <net/snmp.h>
  57#include <net/dsfield.h>
  58#include <net/timewait_sock.h>
  59#include <net/inet_common.h>
  60#include <net/secure_seq.h>
  61#include <net/busy_poll.h>
  62
  63#include <linux/proc_fs.h>
  64#include <linux/seq_file.h>
  65
  66#include <crypto/hash.h>
  67#include <linux/scatterlist.h>
  68
  69#include <trace/events/tcp.h>
  70
  71static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
  72static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
  73                                      struct request_sock *req);
  74
  75static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
  76
  77static const struct inet_connection_sock_af_ops ipv6_mapped;
  78const struct inet_connection_sock_af_ops ipv6_specific;
  79#ifdef CONFIG_TCP_MD5SIG
  80static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
  81static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
  82#else
  83static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
  84                                                   const struct in6_addr *addr,
  85                                                   int l3index)
  86{
  87        return NULL;
  88}
  89#endif
  90
  91/* Helper returning the inet6 address from a given tcp socket.
  92 * It can be used in TCP stack instead of inet6_sk(sk).
  93 * This avoids a dereference and allow compiler optimizations.
  94 * It is a specialized version of inet6_sk_generic().
  95 */
  96static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
  97{
  98        unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
  99
 100        return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
 101}
 102
 103static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
 104{
 105        struct dst_entry *dst = skb_dst(skb);
 106
 107        if (dst && dst_hold_safe(dst)) {
 108                const struct rt6_info *rt = (const struct rt6_info *)dst;
 109
 110                rcu_assign_pointer(sk->sk_rx_dst, dst);
 111                sk->sk_rx_dst_ifindex = skb->skb_iif;
 112                sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
 113        }
 114}
 115
 116static u32 tcp_v6_init_seq(const struct sk_buff *skb)
 117{
 118        return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
 119                                ipv6_hdr(skb)->saddr.s6_addr32,
 120                                tcp_hdr(skb)->dest,
 121                                tcp_hdr(skb)->source);
 122}
 123
 124static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
 125{
 126        return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
 127                                   ipv6_hdr(skb)->saddr.s6_addr32);
 128}
 129
 130static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
 131                              int addr_len)
 132{
 133        /* This check is replicated from tcp_v6_connect() and intended to
 134         * prevent BPF program called below from accessing bytes that are out
 135         * of the bound specified by user in addr_len.
 136         */
 137        if (addr_len < SIN6_LEN_RFC2133)
 138                return -EINVAL;
 139
 140        sock_owned_by_me(sk);
 141
 142        return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
 143}
 144
 145static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 146                          int addr_len)
 147{
 148        struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
 149        struct inet_sock *inet = inet_sk(sk);
 150        struct inet_connection_sock *icsk = inet_csk(sk);
 151        struct ipv6_pinfo *np = tcp_inet6_sk(sk);
 152        struct tcp_sock *tp = tcp_sk(sk);
 153        struct in6_addr *saddr = NULL, *final_p, final;
 154        struct ipv6_txoptions *opt;
 155        struct flowi6 fl6;
 156        struct dst_entry *dst;
 157        int addr_type;
 158        int err;
 159        struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
 160
 161        if (addr_len < SIN6_LEN_RFC2133)
 162                return -EINVAL;
 163
 164        if (usin->sin6_family != AF_INET6)
 165                return -EAFNOSUPPORT;
 166
 167        memset(&fl6, 0, sizeof(fl6));
 168
 169        if (np->sndflow) {
 170                fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
 171                IP6_ECN_flow_init(fl6.flowlabel);
 172                if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
 173                        struct ip6_flowlabel *flowlabel;
 174                        flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
 175                        if (IS_ERR(flowlabel))
 176                                return -EINVAL;
 177                        fl6_sock_release(flowlabel);
 178                }
 179        }
 180
 181        /*
 182         *      connect() to INADDR_ANY means loopback (BSD'ism).
 183         */
 184
 185        if (ipv6_addr_any(&usin->sin6_addr)) {
 186                if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
 187                        ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
 188                                               &usin->sin6_addr);
 189                else
 190                        usin->sin6_addr = in6addr_loopback;
 191        }
 192
 193        addr_type = ipv6_addr_type(&usin->sin6_addr);
 194
 195        if (addr_type & IPV6_ADDR_MULTICAST)
 196                return -ENETUNREACH;
 197
 198        if (addr_type&IPV6_ADDR_LINKLOCAL) {
 199                if (addr_len >= sizeof(struct sockaddr_in6) &&
 200                    usin->sin6_scope_id) {
 201                        /* If interface is set while binding, indices
 202                         * must coincide.
 203                         */
 204                        if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
 205                                return -EINVAL;
 206
 207                        sk->sk_bound_dev_if = usin->sin6_scope_id;
 208                }
 209
 210                /* Connect to link-local address requires an interface */
 211                if (!sk->sk_bound_dev_if)
 212                        return -EINVAL;
 213        }
 214
 215        if (tp->rx_opt.ts_recent_stamp &&
 216            !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
 217                tp->rx_opt.ts_recent = 0;
 218                tp->rx_opt.ts_recent_stamp = 0;
 219                WRITE_ONCE(tp->write_seq, 0);
 220        }
 221
 222        sk->sk_v6_daddr = usin->sin6_addr;
 223        np->flow_label = fl6.flowlabel;
 224
 225        /*
 226         *      TCP over IPv4
 227         */
 228
 229        if (addr_type & IPV6_ADDR_MAPPED) {
 230                u32 exthdrlen = icsk->icsk_ext_hdr_len;
 231                struct sockaddr_in sin;
 232
 233                if (__ipv6_only_sock(sk))
 234                        return -ENETUNREACH;
 235
 236                sin.sin_family = AF_INET;
 237                sin.sin_port = usin->sin6_port;
 238                sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
 239
 240                icsk->icsk_af_ops = &ipv6_mapped;
 241                if (sk_is_mptcp(sk))
 242                        mptcpv6_handle_mapped(sk, true);
 243                sk->sk_backlog_rcv = tcp_v4_do_rcv;
 244#ifdef CONFIG_TCP_MD5SIG
 245                tp->af_specific = &tcp_sock_ipv6_mapped_specific;
 246#endif
 247
 248                err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
 249
 250                if (err) {
 251                        icsk->icsk_ext_hdr_len = exthdrlen;
 252                        icsk->icsk_af_ops = &ipv6_specific;
 253                        if (sk_is_mptcp(sk))
 254                                mptcpv6_handle_mapped(sk, false);
 255                        sk->sk_backlog_rcv = tcp_v6_do_rcv;
 256#ifdef CONFIG_TCP_MD5SIG
 257                        tp->af_specific = &tcp_sock_ipv6_specific;
 258#endif
 259                        goto failure;
 260                }
 261                np->saddr = sk->sk_v6_rcv_saddr;
 262
 263                return err;
 264        }
 265
 266        if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
 267                saddr = &sk->sk_v6_rcv_saddr;
 268
 269        fl6.flowi6_proto = IPPROTO_TCP;
 270        fl6.daddr = sk->sk_v6_daddr;
 271        fl6.saddr = saddr ? *saddr : np->saddr;
 272        fl6.flowi6_oif = sk->sk_bound_dev_if;
 273        fl6.flowi6_mark = sk->sk_mark;
 274        fl6.fl6_dport = usin->sin6_port;
 275        fl6.fl6_sport = inet->inet_sport;
 276        fl6.flowi6_uid = sk->sk_uid;
 277
 278        opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
 279        final_p = fl6_update_dst(&fl6, opt, &final);
 280
 281        security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
 282
 283        dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
 284        if (IS_ERR(dst)) {
 285                err = PTR_ERR(dst);
 286                goto failure;
 287        }
 288
 289        if (!saddr) {
 290                saddr = &fl6.saddr;
 291                sk->sk_v6_rcv_saddr = *saddr;
 292        }
 293
 294        /* set the source address */
 295        np->saddr = *saddr;
 296        inet->inet_rcv_saddr = LOOPBACK4_IPV6;
 297
 298        sk->sk_gso_type = SKB_GSO_TCPV6;
 299        ip6_dst_store(sk, dst, NULL, NULL);
 300
 301        icsk->icsk_ext_hdr_len = 0;
 302        if (opt)
 303                icsk->icsk_ext_hdr_len = opt->opt_flen +
 304                                         opt->opt_nflen;
 305
 306        tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 307
 308        inet->inet_dport = usin->sin6_port;
 309
 310        tcp_set_state(sk, TCP_SYN_SENT);
 311        err = inet6_hash_connect(tcp_death_row, sk);
 312        if (err)
 313                goto late_failure;
 314
 315        sk_set_txhash(sk);
 316
 317        if (likely(!tp->repair)) {
 318                if (!tp->write_seq)
 319                        WRITE_ONCE(tp->write_seq,
 320                                   secure_tcpv6_seq(np->saddr.s6_addr32,
 321                                                    sk->sk_v6_daddr.s6_addr32,
 322                                                    inet->inet_sport,
 323                                                    inet->inet_dport));
 324                tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
 325                                                   np->saddr.s6_addr32,
 326                                                   sk->sk_v6_daddr.s6_addr32);
 327        }
 328
 329        if (tcp_fastopen_defer_connect(sk, &err))
 330                return err;
 331        if (err)
 332                goto late_failure;
 333
 334        err = tcp_connect(sk);
 335        if (err)
 336                goto late_failure;
 337
 338        return 0;
 339
 340late_failure:
 341        tcp_set_state(sk, TCP_CLOSE);
 342failure:
 343        inet->inet_dport = 0;
 344        sk->sk_route_caps = 0;
 345        return err;
 346}
 347
 348static void tcp_v6_mtu_reduced(struct sock *sk)
 349{
 350        struct dst_entry *dst;
 351        u32 mtu;
 352
 353        if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
 354                return;
 355
 356        mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
 357
 358        /* Drop requests trying to increase our current mss.
 359         * Check done in __ip6_rt_update_pmtu() is too late.
 360         */
 361        if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
 362                return;
 363
 364        dst = inet6_csk_update_pmtu(sk, mtu);
 365        if (!dst)
 366                return;
 367
 368        if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
 369                tcp_sync_mss(sk, dst_mtu(dst));
 370                tcp_simple_retransmit(sk);
 371        }
 372}
 373
 374static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 375                u8 type, u8 code, int offset, __be32 info)
 376{
 377        const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
 378        const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
 379        struct net *net = dev_net(skb->dev);
 380        struct request_sock *fastopen;
 381        struct ipv6_pinfo *np;
 382        struct tcp_sock *tp;
 383        __u32 seq, snd_una;
 384        struct sock *sk;
 385        bool fatal;
 386        int err;
 387
 388        sk = __inet6_lookup_established(net, &tcp_hashinfo,
 389                                        &hdr->daddr, th->dest,
 390                                        &hdr->saddr, ntohs(th->source),
 391                                        skb->dev->ifindex, inet6_sdif(skb));
 392
 393        if (!sk) {
 394                __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
 395                                  ICMP6_MIB_INERRORS);
 396                return -ENOENT;
 397        }
 398
 399        if (sk->sk_state == TCP_TIME_WAIT) {
 400                inet_twsk_put(inet_twsk(sk));
 401                return 0;
 402        }
 403        seq = ntohl(th->seq);
 404        fatal = icmpv6_err_convert(type, code, &err);
 405        if (sk->sk_state == TCP_NEW_SYN_RECV) {
 406                tcp_req_err(sk, seq, fatal);
 407                return 0;
 408        }
 409
 410        bh_lock_sock(sk);
 411        if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
 412                __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
 413
 414        if (sk->sk_state == TCP_CLOSE)
 415                goto out;
 416
 417        if (static_branch_unlikely(&ip6_min_hopcount)) {
 418                /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
 419                if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
 420                        __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
 421                        goto out;
 422                }
 423        }
 424
 425        tp = tcp_sk(sk);
 426        /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
 427        fastopen = rcu_dereference(tp->fastopen_rsk);
 428        snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
 429        if (sk->sk_state != TCP_LISTEN &&
 430            !between(seq, snd_una, tp->snd_nxt)) {
 431                __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
 432                goto out;
 433        }
 434
 435        np = tcp_inet6_sk(sk);
 436
 437        if (type == NDISC_REDIRECT) {
 438                if (!sock_owned_by_user(sk)) {
 439                        struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
 440
 441                        if (dst)
 442                                dst->ops->redirect(dst, sk, skb);
 443                }
 444                goto out;
 445        }
 446
 447        if (type == ICMPV6_PKT_TOOBIG) {
 448                u32 mtu = ntohl(info);
 449
 450                /* We are not interested in TCP_LISTEN and open_requests
 451                 * (SYN-ACKs send out by Linux are always <576bytes so
 452                 * they should go through unfragmented).
 453                 */
 454                if (sk->sk_state == TCP_LISTEN)
 455                        goto out;
 456
 457                if (!ip6_sk_accept_pmtu(sk))
 458                        goto out;
 459
 460                if (mtu < IPV6_MIN_MTU)
 461                        goto out;
 462
 463                WRITE_ONCE(tp->mtu_info, mtu);
 464
 465                if (!sock_owned_by_user(sk))
 466                        tcp_v6_mtu_reduced(sk);
 467                else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
 468                                           &sk->sk_tsq_flags))
 469                        sock_hold(sk);
 470                goto out;
 471        }
 472
 473
 474        /* Might be for an request_sock */
 475        switch (sk->sk_state) {
 476        case TCP_SYN_SENT:
 477        case TCP_SYN_RECV:
 478                /* Only in fast or simultaneous open. If a fast open socket is
 479                 * already accepted it is treated as a connected one below.
 480                 */
 481                if (fastopen && !fastopen->sk)
 482                        break;
 483
 484                ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
 485
 486                if (!sock_owned_by_user(sk)) {
 487                        sk->sk_err = err;
 488                        sk_error_report(sk);            /* Wake people up to see the error (see connect in sock.c) */
 489
 490                        tcp_done(sk);
 491                } else
 492                        sk->sk_err_soft = err;
 493                goto out;
 494        case TCP_LISTEN:
 495                break;
 496        default:
 497                /* check if this ICMP message allows revert of backoff.
 498                 * (see RFC 6069)
 499                 */
 500                if (!fastopen && type == ICMPV6_DEST_UNREACH &&
 501                    code == ICMPV6_NOROUTE)
 502                        tcp_ld_RTO_revert(sk, seq);
 503        }
 504
 505        if (!sock_owned_by_user(sk) && np->recverr) {
 506                sk->sk_err = err;
 507                sk_error_report(sk);
 508        } else
 509                sk->sk_err_soft = err;
 510
 511out:
 512        bh_unlock_sock(sk);
 513        sock_put(sk);
 514        return 0;
 515}
 516
 517
 518static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
 519                              struct flowi *fl,
 520                              struct request_sock *req,
 521                              struct tcp_fastopen_cookie *foc,
 522                              enum tcp_synack_type synack_type,
 523                              struct sk_buff *syn_skb)
 524{
 525        struct inet_request_sock *ireq = inet_rsk(req);
 526        struct ipv6_pinfo *np = tcp_inet6_sk(sk);
 527        struct ipv6_txoptions *opt;
 528        struct flowi6 *fl6 = &fl->u.ip6;
 529        struct sk_buff *skb;
 530        int err = -ENOMEM;
 531        u8 tclass;
 532
 533        /* First, grab a route. */
 534        if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
 535                                               IPPROTO_TCP)) == NULL)
 536                goto done;
 537
 538        skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
 539
 540        if (skb) {
 541                __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
 542                                    &ireq->ir_v6_rmt_addr);
 543
 544                fl6->daddr = ireq->ir_v6_rmt_addr;
 545                if (np->repflow && ireq->pktopts)
 546                        fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
 547
 548                tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
 549                                (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
 550                                (np->tclass & INET_ECN_MASK) :
 551                                np->tclass;
 552
 553                if (!INET_ECN_is_capable(tclass) &&
 554                    tcp_bpf_ca_needs_ecn((struct sock *)req))
 555                        tclass |= INET_ECN_ECT_0;
 556
 557                rcu_read_lock();
 558                opt = ireq->ipv6_opt;
 559                if (!opt)
 560                        opt = rcu_dereference(np->opt);
 561                err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
 562                               tclass, sk->sk_priority);
 563                rcu_read_unlock();
 564                err = net_xmit_eval(err);
 565        }
 566
 567done:
 568        return err;
 569}
 570
 571
 572static void tcp_v6_reqsk_destructor(struct request_sock *req)
 573{
 574        kfree(inet_rsk(req)->ipv6_opt);
 575        consume_skb(inet_rsk(req)->pktopts);
 576}
 577
 578#ifdef CONFIG_TCP_MD5SIG
 579static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
 580                                                   const struct in6_addr *addr,
 581                                                   int l3index)
 582{
 583        return tcp_md5_do_lookup(sk, l3index,
 584                                 (union tcp_md5_addr *)addr, AF_INET6);
 585}
 586
 587static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
 588                                                const struct sock *addr_sk)
 589{
 590        int l3index;
 591
 592        l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
 593                                                 addr_sk->sk_bound_dev_if);
 594        return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
 595                                    l3index);
 596}
 597
 598static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
 599                                 sockptr_t optval, int optlen)
 600{
 601        struct tcp_md5sig cmd;
 602        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
 603        int l3index = 0;
 604        u8 prefixlen;
 605        u8 flags;
 606
 607        if (optlen < sizeof(cmd))
 608                return -EINVAL;
 609
 610        if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
 611                return -EFAULT;
 612
 613        if (sin6->sin6_family != AF_INET6)
 614                return -EINVAL;
 615
 616        flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
 617
 618        if (optname == TCP_MD5SIG_EXT &&
 619            cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
 620                prefixlen = cmd.tcpm_prefixlen;
 621                if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
 622                                        prefixlen > 32))
 623                        return -EINVAL;
 624        } else {
 625                prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
 626        }
 627
 628        if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
 629            cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
 630                struct net_device *dev;
 631
 632                rcu_read_lock();
 633                dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
 634                if (dev && netif_is_l3_master(dev))
 635                        l3index = dev->ifindex;
 636                rcu_read_unlock();
 637
 638                /* ok to reference set/not set outside of rcu;
 639                 * right now device MUST be an L3 master
 640                 */
 641                if (!dev || !l3index)
 642                        return -EINVAL;
 643        }
 644
 645        if (!cmd.tcpm_keylen) {
 646                if (ipv6_addr_v4mapped(&sin6->sin6_addr))
 647                        return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
 648                                              AF_INET, prefixlen,
 649                                              l3index, flags);
 650                return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
 651                                      AF_INET6, prefixlen, l3index, flags);
 652        }
 653
 654        if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
 655                return -EINVAL;
 656
 657        if (ipv6_addr_v4mapped(&sin6->sin6_addr))
 658                return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
 659                                      AF_INET, prefixlen, l3index, flags,
 660                                      cmd.tcpm_key, cmd.tcpm_keylen,
 661                                      GFP_KERNEL);
 662
 663        return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
 664                              AF_INET6, prefixlen, l3index, flags,
 665                              cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
 666}
 667
 668static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
 669                                   const struct in6_addr *daddr,
 670                                   const struct in6_addr *saddr,
 671                                   const struct tcphdr *th, int nbytes)
 672{
 673        struct tcp6_pseudohdr *bp;
 674        struct scatterlist sg;
 675        struct tcphdr *_th;
 676
 677        bp = hp->scratch;
 678        /* 1. TCP pseudo-header (RFC2460) */
 679        bp->saddr = *saddr;
 680        bp->daddr = *daddr;
 681        bp->protocol = cpu_to_be32(IPPROTO_TCP);
 682        bp->len = cpu_to_be32(nbytes);
 683
 684        _th = (struct tcphdr *)(bp + 1);
 685        memcpy(_th, th, sizeof(*th));
 686        _th->check = 0;
 687
 688        sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
 689        ahash_request_set_crypt(hp->md5_req, &sg, NULL,
 690                                sizeof(*bp) + sizeof(*th));
 691        return crypto_ahash_update(hp->md5_req);
 692}
 693
 694static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 695                               const struct in6_addr *daddr, struct in6_addr *saddr,
 696                               const struct tcphdr *th)
 697{
 698        struct tcp_md5sig_pool *hp;
 699        struct ahash_request *req;
 700
 701        hp = tcp_get_md5sig_pool();
 702        if (!hp)
 703                goto clear_hash_noput;
 704        req = hp->md5_req;
 705
 706        if (crypto_ahash_init(req))
 707                goto clear_hash;
 708        if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
 709                goto clear_hash;
 710        if (tcp_md5_hash_key(hp, key))
 711                goto clear_hash;
 712        ahash_request_set_crypt(req, NULL, md5_hash, 0);
 713        if (crypto_ahash_final(req))
 714                goto clear_hash;
 715
 716        tcp_put_md5sig_pool();
 717        return 0;
 718
 719clear_hash:
 720        tcp_put_md5sig_pool();
 721clear_hash_noput:
 722        memset(md5_hash, 0, 16);
 723        return 1;
 724}
 725
 726static int tcp_v6_md5_hash_skb(char *md5_hash,
 727                               const struct tcp_md5sig_key *key,
 728                               const struct sock *sk,
 729                               const struct sk_buff *skb)
 730{
 731        const struct in6_addr *saddr, *daddr;
 732        struct tcp_md5sig_pool *hp;
 733        struct ahash_request *req;
 734        const struct tcphdr *th = tcp_hdr(skb);
 735
 736        if (sk) { /* valid for establish/request sockets */
 737                saddr = &sk->sk_v6_rcv_saddr;
 738                daddr = &sk->sk_v6_daddr;
 739        } else {
 740                const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 741                saddr = &ip6h->saddr;
 742                daddr = &ip6h->daddr;
 743        }
 744
 745        hp = tcp_get_md5sig_pool();
 746        if (!hp)
 747                goto clear_hash_noput;
 748        req = hp->md5_req;
 749
 750        if (crypto_ahash_init(req))
 751                goto clear_hash;
 752
 753        if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
 754                goto clear_hash;
 755        if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
 756                goto clear_hash;
 757        if (tcp_md5_hash_key(hp, key))
 758                goto clear_hash;
 759        ahash_request_set_crypt(req, NULL, md5_hash, 0);
 760        if (crypto_ahash_final(req))
 761                goto clear_hash;
 762
 763        tcp_put_md5sig_pool();
 764        return 0;
 765
 766clear_hash:
 767        tcp_put_md5sig_pool();
 768clear_hash_noput:
 769        memset(md5_hash, 0, 16);
 770        return 1;
 771}
 772
 773#endif
 774
 775static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
 776                                    const struct sk_buff *skb,
 777                                    int dif, int sdif)
 778{
 779#ifdef CONFIG_TCP_MD5SIG
 780        const __u8 *hash_location = NULL;
 781        struct tcp_md5sig_key *hash_expected;
 782        const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 783        const struct tcphdr *th = tcp_hdr(skb);
 784        int genhash, l3index;
 785        u8 newhash[16];
 786
 787        /* sdif set, means packet ingressed via a device
 788         * in an L3 domain and dif is set to the l3mdev
 789         */
 790        l3index = sdif ? dif : 0;
 791
 792        hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
 793        hash_location = tcp_parse_md5sig_option(th);
 794
 795        /* We've parsed the options - do we have a hash? */
 796        if (!hash_expected && !hash_location)
 797                return false;
 798
 799        if (hash_expected && !hash_location) {
 800                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
 801                return true;
 802        }
 803
 804        if (!hash_expected && hash_location) {
 805                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
 806                return true;
 807        }
 808
 809        /* check the signature */
 810        genhash = tcp_v6_md5_hash_skb(newhash,
 811                                      hash_expected,
 812                                      NULL, skb);
 813
 814        if (genhash || memcmp(hash_location, newhash, 16) != 0) {
 815                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
 816                net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
 817                                     genhash ? "failed" : "mismatch",
 818                                     &ip6h->saddr, ntohs(th->source),
 819                                     &ip6h->daddr, ntohs(th->dest), l3index);
 820                return true;
 821        }
 822#endif
 823        return false;
 824}
 825
 826static void tcp_v6_init_req(struct request_sock *req,
 827                            const struct sock *sk_listener,
 828                            struct sk_buff *skb)
 829{
 830        bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
 831        struct inet_request_sock *ireq = inet_rsk(req);
 832        const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
 833
 834        ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
 835        ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
 836
 837        /* So that link locals have meaning */
 838        if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
 839            ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
 840                ireq->ir_iif = tcp_v6_iif(skb);
 841
 842        if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
 843            (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
 844             np->rxopt.bits.rxinfo ||
 845             np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
 846             np->rxopt.bits.rxohlim || np->repflow)) {
 847                refcount_inc(&skb->users);
 848                ireq->pktopts = skb;
 849        }
 850}
 851
 852static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
 853                                          struct sk_buff *skb,
 854                                          struct flowi *fl,
 855                                          struct request_sock *req)
 856{
 857        tcp_v6_init_req(req, sk, skb);
 858
 859        if (security_inet_conn_request(sk, skb, req))
 860                return NULL;
 861
 862        return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
 863}
 864
 865struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
 866        .family         =       AF_INET6,
 867        .obj_size       =       sizeof(struct tcp6_request_sock),
 868        .rtx_syn_ack    =       tcp_rtx_synack,
 869        .send_ack       =       tcp_v6_reqsk_send_ack,
 870        .destructor     =       tcp_v6_reqsk_destructor,
 871        .send_reset     =       tcp_v6_send_reset,
 872        .syn_ack_timeout =      tcp_syn_ack_timeout,
 873};
 874
 875const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
 876        .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
 877                                sizeof(struct ipv6hdr),
 878#ifdef CONFIG_TCP_MD5SIG
 879        .req_md5_lookup =       tcp_v6_md5_lookup,
 880        .calc_md5_hash  =       tcp_v6_md5_hash_skb,
 881#endif
 882#ifdef CONFIG_SYN_COOKIES
 883        .cookie_init_seq =      cookie_v6_init_sequence,
 884#endif
 885        .route_req      =       tcp_v6_route_req,
 886        .init_seq       =       tcp_v6_init_seq,
 887        .init_ts_off    =       tcp_v6_init_ts_off,
 888        .send_synack    =       tcp_v6_send_synack,
 889};
 890
 891static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
 892                                 u32 ack, u32 win, u32 tsval, u32 tsecr,
 893                                 int oif, struct tcp_md5sig_key *key, int rst,
 894                                 u8 tclass, __be32 label, u32 priority)
 895{
 896        const struct tcphdr *th = tcp_hdr(skb);
 897        struct tcphdr *t1;
 898        struct sk_buff *buff;
 899        struct flowi6 fl6;
 900        struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
 901        struct sock *ctl_sk = net->ipv6.tcp_sk;
 902        unsigned int tot_len = sizeof(struct tcphdr);
 903        __be32 mrst = 0, *topt;
 904        struct dst_entry *dst;
 905        __u32 mark = 0;
 906
 907        if (tsecr)
 908                tot_len += TCPOLEN_TSTAMP_ALIGNED;
 909#ifdef CONFIG_TCP_MD5SIG
 910        if (key)
 911                tot_len += TCPOLEN_MD5SIG_ALIGNED;
 912#endif
 913
 914#ifdef CONFIG_MPTCP
 915        if (rst && !key) {
 916                mrst = mptcp_reset_option(skb);
 917
 918                if (mrst)
 919                        tot_len += sizeof(__be32);
 920        }
 921#endif
 922
 923        buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
 924                         GFP_ATOMIC);
 925        if (!buff)
 926                return;
 927
 928        skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
 929
 930        t1 = skb_push(buff, tot_len);
 931        skb_reset_transport_header(buff);
 932
 933        /* Swap the send and the receive. */
 934        memset(t1, 0, sizeof(*t1));
 935        t1->dest = th->source;
 936        t1->source = th->dest;
 937        t1->doff = tot_len / 4;
 938        t1->seq = htonl(seq);
 939        t1->ack_seq = htonl(ack);
 940        t1->ack = !rst || !th->ack;
 941        t1->rst = rst;
 942        t1->window = htons(win);
 943
 944        topt = (__be32 *)(t1 + 1);
 945
 946        if (tsecr) {
 947                *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 948                                (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
 949                *topt++ = htonl(tsval);
 950                *topt++ = htonl(tsecr);
 951        }
 952
 953        if (mrst)
 954                *topt++ = mrst;
 955
 956#ifdef CONFIG_TCP_MD5SIG
 957        if (key) {
 958                *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 959                                (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
 960                tcp_v6_md5_hash_hdr((__u8 *)topt, key,
 961                                    &ipv6_hdr(skb)->saddr,
 962                                    &ipv6_hdr(skb)->daddr, t1);
 963        }
 964#endif
 965
 966        memset(&fl6, 0, sizeof(fl6));
 967        fl6.daddr = ipv6_hdr(skb)->saddr;
 968        fl6.saddr = ipv6_hdr(skb)->daddr;
 969        fl6.flowlabel = label;
 970
 971        buff->ip_summed = CHECKSUM_PARTIAL;
 972
 973        __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
 974
 975        fl6.flowi6_proto = IPPROTO_TCP;
 976        if (rt6_need_strict(&fl6.daddr) && !oif)
 977                fl6.flowi6_oif = tcp_v6_iif(skb);
 978        else {
 979                if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
 980                        oif = skb->skb_iif;
 981
 982                fl6.flowi6_oif = oif;
 983        }
 984
 985        if (sk) {
 986                if (sk->sk_state == TCP_TIME_WAIT) {
 987                        mark = inet_twsk(sk)->tw_mark;
 988                        /* autoflowlabel relies on buff->hash */
 989                        skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
 990                                     PKT_HASH_TYPE_L4);
 991                } else {
 992                        mark = sk->sk_mark;
 993                }
 994                buff->tstamp = tcp_transmit_time(sk);
 995        }
 996        fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
 997        fl6.fl6_dport = t1->dest;
 998        fl6.fl6_sport = t1->source;
 999        fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
1000        security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
1001
1002        /* Pass a socket to ip6_dst_lookup either it is for RST
1003         * Underlying function will use this to retrieve the network
1004         * namespace
1005         */
1006        dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
1007        if (!IS_ERR(dst)) {
1008                skb_dst_set(buff, dst);
1009                ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
1010                         tclass & ~INET_ECN_MASK, priority);
1011                TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
1012                if (rst)
1013                        TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
1014                return;
1015        }
1016
1017        kfree_skb(buff);
1018}
1019
1020static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1021{
1022        const struct tcphdr *th = tcp_hdr(skb);
1023        struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1024        u32 seq = 0, ack_seq = 0;
1025        struct tcp_md5sig_key *key = NULL;
1026#ifdef CONFIG_TCP_MD5SIG
1027        const __u8 *hash_location = NULL;
1028        unsigned char newhash[16];
1029        int genhash;
1030        struct sock *sk1 = NULL;
1031#endif
1032        __be32 label = 0;
1033        u32 priority = 0;
1034        struct net *net;
1035        int oif = 0;
1036
1037        if (th->rst)
1038                return;
1039
1040        /* If sk not NULL, it means we did a successful lookup and incoming
1041         * route had to be correct. prequeue might have dropped our dst.
1042         */
1043        if (!sk && !ipv6_unicast_destination(skb))
1044                return;
1045
1046        net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1047#ifdef CONFIG_TCP_MD5SIG
1048        rcu_read_lock();
1049        hash_location = tcp_parse_md5sig_option(th);
1050        if (sk && sk_fullsock(sk)) {
1051                int l3index;
1052
1053                /* sdif set, means packet ingressed via a device
1054                 * in an L3 domain and inet_iif is set to it.
1055                 */
1056                l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1057                key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1058        } else if (hash_location) {
1059                int dif = tcp_v6_iif_l3_slave(skb);
1060                int sdif = tcp_v6_sdif(skb);
1061                int l3index;
1062
1063                /*
1064                 * active side is lost. Try to find listening socket through
1065                 * source port, and then find md5 key through listening socket.
1066                 * we are not loose security here:
1067                 * Incoming packet is checked with md5 hash with finding key,
1068                 * no RST generated if md5 hash doesn't match.
1069                 */
1070                sk1 = inet6_lookup_listener(net,
1071                                           &tcp_hashinfo, NULL, 0,
1072                                           &ipv6h->saddr,
1073                                           th->source, &ipv6h->daddr,
1074                                           ntohs(th->source), dif, sdif);
1075                if (!sk1)
1076                        goto out;
1077
1078                /* sdif set, means packet ingressed via a device
1079                 * in an L3 domain and dif is set to it.
1080                 */
1081                l3index = tcp_v6_sdif(skb) ? dif : 0;
1082
1083                key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1084                if (!key)
1085                        goto out;
1086
1087                genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1088                if (genhash || memcmp(hash_location, newhash, 16) != 0)
1089                        goto out;
1090        }
1091#endif
1092
1093        if (th->ack)
1094                seq = ntohl(th->ack_seq);
1095        else
1096                ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1097                          (th->doff << 2);
1098
1099        if (sk) {
1100                oif = sk->sk_bound_dev_if;
1101                if (sk_fullsock(sk)) {
1102                        const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1103
1104                        trace_tcp_send_reset(sk, skb);
1105                        if (np->repflow)
1106                                label = ip6_flowlabel(ipv6h);
1107                        priority = sk->sk_priority;
1108                }
1109                if (sk->sk_state == TCP_TIME_WAIT) {
1110                        label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1111                        priority = inet_twsk(sk)->tw_priority;
1112                }
1113        } else {
1114                if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1115                        label = ip6_flowlabel(ipv6h);
1116        }
1117
1118        tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1119                             ipv6_get_dsfield(ipv6h), label, priority);
1120
1121#ifdef CONFIG_TCP_MD5SIG
1122out:
1123        rcu_read_unlock();
1124#endif
1125}
1126
1127static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1128                            u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1129                            struct tcp_md5sig_key *key, u8 tclass,
1130                            __be32 label, u32 priority)
1131{
1132        tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1133                             tclass, label, priority);
1134}
1135
1136static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1137{
1138        struct inet_timewait_sock *tw = inet_twsk(sk);
1139        struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1140
1141        tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1142                        tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1143                        tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1144                        tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1145                        tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1146
1147        inet_twsk_put(tw);
1148}
1149
1150static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1151                                  struct request_sock *req)
1152{
1153        int l3index;
1154
1155        l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1156
1157        /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1158         * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1159         */
1160        /* RFC 7323 2.3
1161         * The window field (SEG.WND) of every outgoing segment, with the
1162         * exception of <SYN> segments, MUST be right-shifted by
1163         * Rcv.Wind.Shift bits:
1164         */
1165        tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1166                        tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1167                        tcp_rsk(req)->rcv_nxt,
1168                        req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1169                        tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1170                        req->ts_recent, sk->sk_bound_dev_if,
1171                        tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1172                        ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1173}
1174
1175
1176static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1177{
1178#ifdef CONFIG_SYN_COOKIES
1179        const struct tcphdr *th = tcp_hdr(skb);
1180
1181        if (!th->syn)
1182                sk = cookie_v6_check(sk, skb);
1183#endif
1184        return sk;
1185}
1186
1187u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1188                         struct tcphdr *th, u32 *cookie)
1189{
1190        u16 mss = 0;
1191#ifdef CONFIG_SYN_COOKIES
1192        mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1193                                    &tcp_request_sock_ipv6_ops, sk, th);
1194        if (mss) {
1195                *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1196                tcp_synq_overflow(sk);
1197        }
1198#endif
1199        return mss;
1200}
1201
1202static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1203{
1204        if (skb->protocol == htons(ETH_P_IP))
1205                return tcp_v4_conn_request(sk, skb);
1206
1207        if (!ipv6_unicast_destination(skb))
1208                goto drop;
1209
1210        if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1211                __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1212                return 0;
1213        }
1214
1215        return tcp_conn_request(&tcp6_request_sock_ops,
1216                                &tcp_request_sock_ipv6_ops, sk, skb);
1217
1218drop:
1219        tcp_listendrop(sk);
1220        return 0; /* don't send reset */
1221}
1222
1223static void tcp_v6_restore_cb(struct sk_buff *skb)
1224{
1225        /* We need to move header back to the beginning if xfrm6_policy_check()
1226         * and tcp_v6_fill_cb() are going to be called again.
1227         * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1228         */
1229        memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1230                sizeof(struct inet6_skb_parm));
1231}
1232
1233static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1234                                         struct request_sock *req,
1235                                         struct dst_entry *dst,
1236                                         struct request_sock *req_unhash,
1237                                         bool *own_req)
1238{
1239        struct inet_request_sock *ireq;
1240        struct ipv6_pinfo *newnp;
1241        const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1242        struct ipv6_txoptions *opt;
1243        struct inet_sock *newinet;
1244        bool found_dup_sk = false;
1245        struct tcp_sock *newtp;
1246        struct sock *newsk;
1247#ifdef CONFIG_TCP_MD5SIG
1248        struct tcp_md5sig_key *key;
1249        int l3index;
1250#endif
1251        struct flowi6 fl6;
1252
1253        if (skb->protocol == htons(ETH_P_IP)) {
1254                /*
1255                 *      v6 mapped
1256                 */
1257
1258                newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1259                                             req_unhash, own_req);
1260
1261                if (!newsk)
1262                        return NULL;
1263
1264                inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1265
1266                newnp = tcp_inet6_sk(newsk);
1267                newtp = tcp_sk(newsk);
1268
1269                memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1270
1271                newnp->saddr = newsk->sk_v6_rcv_saddr;
1272
1273                inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1274                if (sk_is_mptcp(newsk))
1275                        mptcpv6_handle_mapped(newsk, true);
1276                newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1277#ifdef CONFIG_TCP_MD5SIG
1278                newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1279#endif
1280
1281                newnp->ipv6_mc_list = NULL;
1282                newnp->ipv6_ac_list = NULL;
1283                newnp->ipv6_fl_list = NULL;
1284                newnp->pktoptions  = NULL;
1285                newnp->opt         = NULL;
1286                newnp->mcast_oif   = inet_iif(skb);
1287                newnp->mcast_hops  = ip_hdr(skb)->ttl;
1288                newnp->rcv_flowinfo = 0;
1289                if (np->repflow)
1290                        newnp->flow_label = 0;
1291
1292                /*
1293                 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1294                 * here, tcp_create_openreq_child now does this for us, see the comment in
1295                 * that function for the gory details. -acme
1296                 */
1297
1298                /* It is tricky place. Until this moment IPv4 tcp
1299                   worked with IPv6 icsk.icsk_af_ops.
1300                   Sync it now.
1301                 */
1302                tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1303
1304                return newsk;
1305        }
1306
1307        ireq = inet_rsk(req);
1308
1309        if (sk_acceptq_is_full(sk))
1310                goto out_overflow;
1311
1312        if (!dst) {
1313                dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1314                if (!dst)
1315                        goto out;
1316        }
1317
1318        newsk = tcp_create_openreq_child(sk, req, skb);
1319        if (!newsk)
1320                goto out_nonewsk;
1321
1322        /*
1323         * No need to charge this sock to the relevant IPv6 refcnt debug socks
1324         * count here, tcp_create_openreq_child now does this for us, see the
1325         * comment in that function for the gory details. -acme
1326         */
1327
1328        newsk->sk_gso_type = SKB_GSO_TCPV6;
1329        ip6_dst_store(newsk, dst, NULL, NULL);
1330        inet6_sk_rx_dst_set(newsk, skb);
1331
1332        inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1333
1334        newtp = tcp_sk(newsk);
1335        newinet = inet_sk(newsk);
1336        newnp = tcp_inet6_sk(newsk);
1337
1338        memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1339
1340        newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1341        newnp->saddr = ireq->ir_v6_loc_addr;
1342        newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1343        newsk->sk_bound_dev_if = ireq->ir_iif;
1344
1345        /* Now IPv6 options...
1346
1347           First: no IPv4 options.
1348         */
1349        newinet->inet_opt = NULL;
1350        newnp->ipv6_mc_list = NULL;
1351        newnp->ipv6_ac_list = NULL;
1352        newnp->ipv6_fl_list = NULL;
1353
1354        /* Clone RX bits */
1355        newnp->rxopt.all = np->rxopt.all;
1356
1357        newnp->pktoptions = NULL;
1358        newnp->opt        = NULL;
1359        newnp->mcast_oif  = tcp_v6_iif(skb);
1360        newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1361        newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1362        if (np->repflow)
1363                newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1364
1365        /* Set ToS of the new socket based upon the value of incoming SYN.
1366         * ECT bits are set later in tcp_init_transfer().
1367         */
1368        if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
1369                newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1370
1371        /* Clone native IPv6 options from listening socket (if any)
1372
1373           Yes, keeping reference count would be much more clever,
1374           but we make one more one thing there: reattach optmem
1375           to newsk.
1376         */
1377        opt = ireq->ipv6_opt;
1378        if (!opt)
1379                opt = rcu_dereference(np->opt);
1380        if (opt) {
1381                opt = ipv6_dup_options(newsk, opt);
1382                RCU_INIT_POINTER(newnp->opt, opt);
1383        }
1384        inet_csk(newsk)->icsk_ext_hdr_len = 0;
1385        if (opt)
1386                inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1387                                                    opt->opt_flen;
1388
1389        tcp_ca_openreq_child(newsk, dst);
1390
1391        tcp_sync_mss(newsk, dst_mtu(dst));
1392        newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1393
1394        tcp_initialize_rcv_mss(newsk);
1395
1396        newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1397        newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1398
1399#ifdef CONFIG_TCP_MD5SIG
1400        l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1401
1402        /* Copy over the MD5 key from the original socket */
1403        key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1404        if (key) {
1405                /* We're using one, so create a matching key
1406                 * on the newsk structure. If we fail to get
1407                 * memory, then we end up not copying the key
1408                 * across. Shucks.
1409                 */
1410                tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1411                               AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
1412                               sk_gfp_mask(sk, GFP_ATOMIC));
1413        }
1414#endif
1415
1416        if (__inet_inherit_port(sk, newsk) < 0) {
1417                inet_csk_prepare_forced_close(newsk);
1418                tcp_done(newsk);
1419                goto out;
1420        }
1421        *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1422                                       &found_dup_sk);
1423        if (*own_req) {
1424                tcp_move_syn(newtp, req);
1425
1426                /* Clone pktoptions received with SYN, if we own the req */
1427                if (ireq->pktopts) {
1428                        newnp->pktoptions = skb_clone(ireq->pktopts,
1429                                                      sk_gfp_mask(sk, GFP_ATOMIC));
1430                        consume_skb(ireq->pktopts);
1431                        ireq->pktopts = NULL;
1432                        if (newnp->pktoptions) {
1433                                tcp_v6_restore_cb(newnp->pktoptions);
1434                                skb_set_owner_r(newnp->pktoptions, newsk);
1435                        }
1436                }
1437        } else {
1438                if (!req_unhash && found_dup_sk) {
1439                        /* This code path should only be executed in the
1440                         * syncookie case only
1441                         */
1442                        bh_unlock_sock(newsk);
1443                        sock_put(newsk);
1444                        newsk = NULL;
1445                }
1446        }
1447
1448        return newsk;
1449
1450out_overflow:
1451        __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1452out_nonewsk:
1453        dst_release(dst);
1454out:
1455        tcp_listendrop(sk);
1456        return NULL;
1457}
1458
1459INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1460                                                           u32));
1461/* The socket must have it's spinlock held when we get
1462 * here, unless it is a TCP_LISTEN socket.
1463 *
1464 * We have a potential double-lock case here, so even when
1465 * doing backlog processing we use the BH locking scheme.
1466 * This is because we cannot sleep with the original spinlock
1467 * held.
1468 */
1469static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1470{
1471        struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1472        struct sk_buff *opt_skb = NULL;
1473        struct tcp_sock *tp;
1474
1475        /* Imagine: socket is IPv6. IPv4 packet arrives,
1476           goes to IPv4 receive handler and backlogged.
1477           From backlog it always goes here. Kerboom...
1478           Fortunately, tcp_rcv_established and rcv_established
1479           handle them correctly, but it is not case with
1480           tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1481         */
1482
1483        if (skb->protocol == htons(ETH_P_IP))
1484                return tcp_v4_do_rcv(sk, skb);
1485
1486        /*
1487         *      socket locking is here for SMP purposes as backlog rcv
1488         *      is currently called with bh processing disabled.
1489         */
1490
1491        /* Do Stevens' IPV6_PKTOPTIONS.
1492
1493           Yes, guys, it is the only place in our code, where we
1494           may make it not affecting IPv4.
1495           The rest of code is protocol independent,
1496           and I do not like idea to uglify IPv4.
1497
1498           Actually, all the idea behind IPV6_PKTOPTIONS
1499           looks not very well thought. For now we latch
1500           options, received in the last packet, enqueued
1501           by tcp. Feel free to propose better solution.
1502                                               --ANK (980728)
1503         */
1504        if (np->rxopt.all)
1505                opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1506
1507        if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1508                struct dst_entry *dst;
1509
1510                dst = rcu_dereference_protected(sk->sk_rx_dst,
1511                                                lockdep_sock_is_held(sk));
1512
1513                sock_rps_save_rxhash(sk, skb);
1514                sk_mark_napi_id(sk, skb);
1515                if (dst) {
1516                        if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1517                            INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1518                                            dst, sk->sk_rx_dst_cookie) == NULL) {
1519                                RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1520                                dst_release(dst);
1521                        }
1522                }
1523
1524                tcp_rcv_established(sk, skb);
1525                if (opt_skb)
1526                        goto ipv6_pktoptions;
1527                return 0;
1528        }
1529
1530        if (tcp_checksum_complete(skb))
1531                goto csum_err;
1532
1533        if (sk->sk_state == TCP_LISTEN) {
1534                struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1535
1536                if (!nsk)
1537                        goto discard;
1538
1539                if (nsk != sk) {
1540                        if (tcp_child_process(sk, nsk, skb))
1541                                goto reset;
1542                        if (opt_skb)
1543                                __kfree_skb(opt_skb);
1544                        return 0;
1545                }
1546        } else
1547                sock_rps_save_rxhash(sk, skb);
1548
1549        if (tcp_rcv_state_process(sk, skb))
1550                goto reset;
1551        if (opt_skb)
1552                goto ipv6_pktoptions;
1553        return 0;
1554
1555reset:
1556        tcp_v6_send_reset(sk, skb);
1557discard:
1558        if (opt_skb)
1559                __kfree_skb(opt_skb);
1560        kfree_skb(skb);
1561        return 0;
1562csum_err:
1563        trace_tcp_bad_csum(skb);
1564        TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1565        TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1566        goto discard;
1567
1568
1569ipv6_pktoptions:
1570        /* Do you ask, what is it?
1571
1572           1. skb was enqueued by tcp.
1573           2. skb is added to tail of read queue, rather than out of order.
1574           3. socket is not in passive state.
1575           4. Finally, it really contains options, which user wants to receive.
1576         */
1577        tp = tcp_sk(sk);
1578        if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1579            !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1580                if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1581                        np->mcast_oif = tcp_v6_iif(opt_skb);
1582                if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1583                        np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1584                if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1585                        np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1586                if (np->repflow)
1587                        np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1588                if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1589                        skb_set_owner_r(opt_skb, sk);
1590                        tcp_v6_restore_cb(opt_skb);
1591                        opt_skb = xchg(&np->pktoptions, opt_skb);
1592                } else {
1593                        __kfree_skb(opt_skb);
1594                        opt_skb = xchg(&np->pktoptions, NULL);
1595                }
1596        }
1597
1598        consume_skb(opt_skb);
1599        return 0;
1600}
1601
1602static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1603                           const struct tcphdr *th)
1604{
1605        /* This is tricky: we move IP6CB at its correct location into
1606         * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1607         * _decode_session6() uses IP6CB().
1608         * barrier() makes sure compiler won't play aliasing games.
1609         */
1610        memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1611                sizeof(struct inet6_skb_parm));
1612        barrier();
1613
1614        TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1615        TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1616                                    skb->len - th->doff*4);
1617        TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1618        TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1619        TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1620        TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1621        TCP_SKB_CB(skb)->sacked = 0;
1622        TCP_SKB_CB(skb)->has_rxtstamp =
1623                        skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1624}
1625
1626INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1627{
1628        int sdif = inet6_sdif(skb);
1629        int dif = inet6_iif(skb);
1630        const struct tcphdr *th;
1631        const struct ipv6hdr *hdr;
1632        bool refcounted;
1633        struct sock *sk;
1634        int ret;
1635        struct net *net = dev_net(skb->dev);
1636
1637        if (skb->pkt_type != PACKET_HOST)
1638                goto discard_it;
1639
1640        /*
1641         *      Count it even if it's bad.
1642         */
1643        __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1644
1645        if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1646                goto discard_it;
1647
1648        th = (const struct tcphdr *)skb->data;
1649
1650        if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1651                goto bad_packet;
1652        if (!pskb_may_pull(skb, th->doff*4))
1653                goto discard_it;
1654
1655        if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1656                goto csum_error;
1657
1658        th = (const struct tcphdr *)skb->data;
1659        hdr = ipv6_hdr(skb);
1660
1661lookup:
1662        sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1663                                th->source, th->dest, inet6_iif(skb), sdif,
1664                                &refcounted);
1665        if (!sk)
1666                goto no_tcp_socket;
1667
1668process:
1669        if (sk->sk_state == TCP_TIME_WAIT)
1670                goto do_time_wait;
1671
1672        if (sk->sk_state == TCP_NEW_SYN_RECV) {
1673                struct request_sock *req = inet_reqsk(sk);
1674                bool req_stolen = false;
1675                struct sock *nsk;
1676
1677                sk = req->rsk_listener;
1678                if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1679                        sk_drops_add(sk, skb);
1680                        reqsk_put(req);
1681                        goto discard_it;
1682                }
1683                if (tcp_checksum_complete(skb)) {
1684                        reqsk_put(req);
1685                        goto csum_error;
1686                }
1687                if (unlikely(sk->sk_state != TCP_LISTEN)) {
1688                        nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1689                        if (!nsk) {
1690                                inet_csk_reqsk_queue_drop_and_put(sk, req);
1691                                goto lookup;
1692                        }
1693                        sk = nsk;
1694                        /* reuseport_migrate_sock() has already held one sk_refcnt
1695                         * before returning.
1696                         */
1697                } else {
1698                        sock_hold(sk);
1699                }
1700                refcounted = true;
1701                nsk = NULL;
1702                if (!tcp_filter(sk, skb)) {
1703                        th = (const struct tcphdr *)skb->data;
1704                        hdr = ipv6_hdr(skb);
1705                        tcp_v6_fill_cb(skb, hdr, th);
1706                        nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1707                }
1708                if (!nsk) {
1709                        reqsk_put(req);
1710                        if (req_stolen) {
1711                                /* Another cpu got exclusive access to req
1712                                 * and created a full blown socket.
1713                                 * Try to feed this packet to this socket
1714                                 * instead of discarding it.
1715                                 */
1716                                tcp_v6_restore_cb(skb);
1717                                sock_put(sk);
1718                                goto lookup;
1719                        }
1720                        goto discard_and_relse;
1721                }
1722                if (nsk == sk) {
1723                        reqsk_put(req);
1724                        tcp_v6_restore_cb(skb);
1725                } else if (tcp_child_process(sk, nsk, skb)) {
1726                        tcp_v6_send_reset(nsk, skb);
1727                        goto discard_and_relse;
1728                } else {
1729                        sock_put(sk);
1730                        return 0;
1731                }
1732        }
1733
1734        if (static_branch_unlikely(&ip6_min_hopcount)) {
1735                /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1736                if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
1737                        __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1738                        goto discard_and_relse;
1739                }
1740        }
1741
1742        if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1743                goto discard_and_relse;
1744
1745        if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1746                goto discard_and_relse;
1747
1748        if (tcp_filter(sk, skb))
1749                goto discard_and_relse;
1750        th = (const struct tcphdr *)skb->data;
1751        hdr = ipv6_hdr(skb);
1752        tcp_v6_fill_cb(skb, hdr, th);
1753
1754        skb->dev = NULL;
1755
1756        if (sk->sk_state == TCP_LISTEN) {
1757                ret = tcp_v6_do_rcv(sk, skb);
1758                goto put_and_return;
1759        }
1760
1761        sk_incoming_cpu_update(sk);
1762
1763        bh_lock_sock_nested(sk);
1764        tcp_segs_in(tcp_sk(sk), skb);
1765        ret = 0;
1766        if (!sock_owned_by_user(sk)) {
1767                ret = tcp_v6_do_rcv(sk, skb);
1768        } else {
1769                if (tcp_add_backlog(sk, skb))
1770                        goto discard_and_relse;
1771        }
1772        bh_unlock_sock(sk);
1773put_and_return:
1774        if (refcounted)
1775                sock_put(sk);
1776        return ret ? -1 : 0;
1777
1778no_tcp_socket:
1779        if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1780                goto discard_it;
1781
1782        tcp_v6_fill_cb(skb, hdr, th);
1783
1784        if (tcp_checksum_complete(skb)) {
1785csum_error:
1786                trace_tcp_bad_csum(skb);
1787                __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1788bad_packet:
1789                __TCP_INC_STATS(net, TCP_MIB_INERRS);
1790        } else {
1791                tcp_v6_send_reset(NULL, skb);
1792        }
1793
1794discard_it:
1795        kfree_skb(skb);
1796        return 0;
1797
1798discard_and_relse:
1799        sk_drops_add(sk, skb);
1800        if (refcounted)
1801                sock_put(sk);
1802        goto discard_it;
1803
1804do_time_wait:
1805        if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1806                inet_twsk_put(inet_twsk(sk));
1807                goto discard_it;
1808        }
1809
1810        tcp_v6_fill_cb(skb, hdr, th);
1811
1812        if (tcp_checksum_complete(skb)) {
1813                inet_twsk_put(inet_twsk(sk));
1814                goto csum_error;
1815        }
1816
1817        switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1818        case TCP_TW_SYN:
1819        {
1820                struct sock *sk2;
1821
1822                sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1823                                            skb, __tcp_hdrlen(th),
1824                                            &ipv6_hdr(skb)->saddr, th->source,
1825                                            &ipv6_hdr(skb)->daddr,
1826                                            ntohs(th->dest),
1827                                            tcp_v6_iif_l3_slave(skb),
1828                                            sdif);
1829                if (sk2) {
1830                        struct inet_timewait_sock *tw = inet_twsk(sk);
1831                        inet_twsk_deschedule_put(tw);
1832                        sk = sk2;
1833                        tcp_v6_restore_cb(skb);
1834                        refcounted = false;
1835                        goto process;
1836                }
1837        }
1838                /* to ACK */
1839                fallthrough;
1840        case TCP_TW_ACK:
1841                tcp_v6_timewait_ack(sk, skb);
1842                break;
1843        case TCP_TW_RST:
1844                tcp_v6_send_reset(sk, skb);
1845                inet_twsk_deschedule_put(inet_twsk(sk));
1846                goto discard_it;
1847        case TCP_TW_SUCCESS:
1848                ;
1849        }
1850        goto discard_it;
1851}
1852
1853INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1854{
1855        const struct ipv6hdr *hdr;
1856        const struct tcphdr *th;
1857        struct sock *sk;
1858
1859        if (skb->pkt_type != PACKET_HOST)
1860                return;
1861
1862        if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1863                return;
1864
1865        hdr = ipv6_hdr(skb);
1866        th = tcp_hdr(skb);
1867
1868        if (th->doff < sizeof(struct tcphdr) / 4)
1869                return;
1870
1871        /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1872        sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1873                                        &hdr->saddr, th->source,
1874                                        &hdr->daddr, ntohs(th->dest),
1875                                        inet6_iif(skb), inet6_sdif(skb));
1876        if (sk) {
1877                skb->sk = sk;
1878                skb->destructor = sock_edemux;
1879                if (sk_fullsock(sk)) {
1880                        struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1881
1882                        if (dst)
1883                                dst = dst_check(dst, sk->sk_rx_dst_cookie);
1884                        if (dst &&
1885                            sk->sk_rx_dst_ifindex == skb->skb_iif)
1886                                skb_dst_set_noref(skb, dst);
1887                }
1888        }
1889}
1890
1891static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1892        .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1893        .twsk_unique    = tcp_twsk_unique,
1894        .twsk_destructor = tcp_twsk_destructor,
1895};
1896
1897INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1898{
1899        struct ipv6_pinfo *np = inet6_sk(sk);
1900
1901        __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1902}
1903
1904const struct inet_connection_sock_af_ops ipv6_specific = {
1905        .queue_xmit        = inet6_csk_xmit,
1906        .send_check        = tcp_v6_send_check,
1907        .rebuild_header    = inet6_sk_rebuild_header,
1908        .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1909        .conn_request      = tcp_v6_conn_request,
1910        .syn_recv_sock     = tcp_v6_syn_recv_sock,
1911        .net_header_len    = sizeof(struct ipv6hdr),
1912        .net_frag_header_len = sizeof(struct frag_hdr),
1913        .setsockopt        = ipv6_setsockopt,
1914        .getsockopt        = ipv6_getsockopt,
1915        .addr2sockaddr     = inet6_csk_addr2sockaddr,
1916        .sockaddr_len      = sizeof(struct sockaddr_in6),
1917        .mtu_reduced       = tcp_v6_mtu_reduced,
1918};
1919
1920#ifdef CONFIG_TCP_MD5SIG
1921static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1922        .md5_lookup     =       tcp_v6_md5_lookup,
1923        .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1924        .md5_parse      =       tcp_v6_parse_md5_keys,
1925};
1926#endif
1927
1928/*
1929 *      TCP over IPv4 via INET6 API
1930 */
1931static const struct inet_connection_sock_af_ops ipv6_mapped = {
1932        .queue_xmit        = ip_queue_xmit,
1933        .send_check        = tcp_v4_send_check,
1934        .rebuild_header    = inet_sk_rebuild_header,
1935        .sk_rx_dst_set     = inet_sk_rx_dst_set,
1936        .conn_request      = tcp_v6_conn_request,
1937        .syn_recv_sock     = tcp_v6_syn_recv_sock,
1938        .net_header_len    = sizeof(struct iphdr),
1939        .setsockopt        = ipv6_setsockopt,
1940        .getsockopt        = ipv6_getsockopt,
1941        .addr2sockaddr     = inet6_csk_addr2sockaddr,
1942        .sockaddr_len      = sizeof(struct sockaddr_in6),
1943        .mtu_reduced       = tcp_v4_mtu_reduced,
1944};
1945
1946#ifdef CONFIG_TCP_MD5SIG
1947static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1948        .md5_lookup     =       tcp_v4_md5_lookup,
1949        .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1950        .md5_parse      =       tcp_v6_parse_md5_keys,
1951};
1952#endif
1953
1954/* NOTE: A lot of things set to zero explicitly by call to
1955 *       sk_alloc() so need not be done here.
1956 */
1957static int tcp_v6_init_sock(struct sock *sk)
1958{
1959        struct inet_connection_sock *icsk = inet_csk(sk);
1960
1961        tcp_init_sock(sk);
1962
1963        icsk->icsk_af_ops = &ipv6_specific;
1964
1965#ifdef CONFIG_TCP_MD5SIG
1966        tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1967#endif
1968
1969        return 0;
1970}
1971
1972static void tcp_v6_destroy_sock(struct sock *sk)
1973{
1974        tcp_v4_destroy_sock(sk);
1975        inet6_destroy_sock(sk);
1976}
1977
1978#ifdef CONFIG_PROC_FS
1979/* Proc filesystem TCPv6 sock list dumping. */
1980static void get_openreq6(struct seq_file *seq,
1981                         const struct request_sock *req, int i)
1982{
1983        long ttd = req->rsk_timer.expires - jiffies;
1984        const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1985        const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1986
1987        if (ttd < 0)
1988                ttd = 0;
1989
1990        seq_printf(seq,
1991                   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1992                   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1993                   i,
1994                   src->s6_addr32[0], src->s6_addr32[1],
1995                   src->s6_addr32[2], src->s6_addr32[3],
1996                   inet_rsk(req)->ir_num,
1997                   dest->s6_addr32[0], dest->s6_addr32[1],
1998                   dest->s6_addr32[2], dest->s6_addr32[3],
1999                   ntohs(inet_rsk(req)->ir_rmt_port),
2000                   TCP_SYN_RECV,
2001                   0, 0, /* could print option size, but that is af dependent. */
2002                   1,   /* timers active (only the expire timer) */
2003                   jiffies_to_clock_t(ttd),
2004                   req->num_timeout,
2005                   from_kuid_munged(seq_user_ns(seq),
2006                                    sock_i_uid(req->rsk_listener)),
2007                   0,  /* non standard timer */
2008                   0, /* open_requests have no inode */
2009                   0, req);
2010}
2011
2012static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2013{
2014        const struct in6_addr *dest, *src;
2015        __u16 destp, srcp;
2016        int timer_active;
2017        unsigned long timer_expires;
2018        const struct inet_sock *inet = inet_sk(sp);
2019        const struct tcp_sock *tp = tcp_sk(sp);
2020        const struct inet_connection_sock *icsk = inet_csk(sp);
2021        const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2022        int rx_queue;
2023        int state;
2024
2025        dest  = &sp->sk_v6_daddr;
2026        src   = &sp->sk_v6_rcv_saddr;
2027        destp = ntohs(inet->inet_dport);
2028        srcp  = ntohs(inet->inet_sport);
2029
2030        if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2031            icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2032            icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2033                timer_active    = 1;
2034                timer_expires   = icsk->icsk_timeout;
2035        } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2036                timer_active    = 4;
2037                timer_expires   = icsk->icsk_timeout;
2038        } else if (timer_pending(&sp->sk_timer)) {
2039                timer_active    = 2;
2040                timer_expires   = sp->sk_timer.expires;
2041        } else {
2042                timer_active    = 0;
2043                timer_expires = jiffies;
2044        }
2045
2046        state = inet_sk_state_load(sp);
2047        if (state == TCP_LISTEN)
2048                rx_queue = READ_ONCE(sp->sk_ack_backlog);
2049        else
2050                /* Because we don't lock the socket,
2051                 * we might find a transient negative value.
2052                 */
2053                rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2054                                      READ_ONCE(tp->copied_seq), 0);
2055
2056        seq_printf(seq,
2057                   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2058                   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2059                   i,
2060                   src->s6_addr32[0], src->s6_addr32[1],
2061                   src->s6_addr32[2], src->s6_addr32[3], srcp,
2062                   dest->s6_addr32[0], dest->s6_addr32[1],
2063                   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2064                   state,
2065                   READ_ONCE(tp->write_seq) - tp->snd_una,
2066                   rx_queue,
2067                   timer_active,
2068                   jiffies_delta_to_clock_t(timer_expires - jiffies),
2069                   icsk->icsk_retransmits,
2070                   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2071                   icsk->icsk_probes_out,
2072                   sock_i_ino(sp),
2073                   refcount_read(&sp->sk_refcnt), sp,
2074                   jiffies_to_clock_t(icsk->icsk_rto),
2075                   jiffies_to_clock_t(icsk->icsk_ack.ato),
2076                   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2077                   tp->snd_cwnd,
2078                   state == TCP_LISTEN ?
2079                        fastopenq->max_qlen :
2080                        (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2081                   );
2082}
2083
2084static void get_timewait6_sock(struct seq_file *seq,
2085                               struct inet_timewait_sock *tw, int i)
2086{
2087        long delta = tw->tw_timer.expires - jiffies;
2088        const struct in6_addr *dest, *src;
2089        __u16 destp, srcp;
2090
2091        dest = &tw->tw_v6_daddr;
2092        src  = &tw->tw_v6_rcv_saddr;
2093        destp = ntohs(tw->tw_dport);
2094        srcp  = ntohs(tw->tw_sport);
2095
2096        seq_printf(seq,
2097                   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2098                   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2099                   i,
2100                   src->s6_addr32[0], src->s6_addr32[1],
2101                   src->s6_addr32[2], src->s6_addr32[3], srcp,
2102                   dest->s6_addr32[0], dest->s6_addr32[1],
2103                   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2104                   tw->tw_substate, 0, 0,
2105                   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2106                   refcount_read(&tw->tw_refcnt), tw);
2107}
2108
2109static int tcp6_seq_show(struct seq_file *seq, void *v)
2110{
2111        struct tcp_iter_state *st;
2112        struct sock *sk = v;
2113
2114        if (v == SEQ_START_TOKEN) {
2115                seq_puts(seq,
2116                         "  sl  "
2117                         "local_address                         "
2118                         "remote_address                        "
2119                         "st tx_queue rx_queue tr tm->when retrnsmt"
2120                         "   uid  timeout inode\n");
2121                goto out;
2122        }
2123        st = seq->private;
2124
2125        if (sk->sk_state == TCP_TIME_WAIT)
2126                get_timewait6_sock(seq, v, st->num);
2127        else if (sk->sk_state == TCP_NEW_SYN_RECV)
2128                get_openreq6(seq, v, st->num);
2129        else
2130                get_tcp6_sock(seq, v, st->num);
2131out:
2132        return 0;
2133}
2134
2135static const struct seq_operations tcp6_seq_ops = {
2136        .show           = tcp6_seq_show,
2137        .start          = tcp_seq_start,
2138        .next           = tcp_seq_next,
2139        .stop           = tcp_seq_stop,
2140};
2141
2142static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2143        .family         = AF_INET6,
2144};
2145
2146int __net_init tcp6_proc_init(struct net *net)
2147{
2148        if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2149                        sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2150                return -ENOMEM;
2151        return 0;
2152}
2153
2154void tcp6_proc_exit(struct net *net)
2155{
2156        remove_proc_entry("tcp6", net->proc_net);
2157}
2158#endif
2159
2160struct proto tcpv6_prot = {
2161        .name                   = "TCPv6",
2162        .owner                  = THIS_MODULE,
2163        .close                  = tcp_close,
2164        .pre_connect            = tcp_v6_pre_connect,
2165        .connect                = tcp_v6_connect,
2166        .disconnect             = tcp_disconnect,
2167        .accept                 = inet_csk_accept,
2168        .ioctl                  = tcp_ioctl,
2169        .init                   = tcp_v6_init_sock,
2170        .destroy                = tcp_v6_destroy_sock,
2171        .shutdown               = tcp_shutdown,
2172        .setsockopt             = tcp_setsockopt,
2173        .getsockopt             = tcp_getsockopt,
2174        .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2175        .keepalive              = tcp_set_keepalive,
2176        .recvmsg                = tcp_recvmsg,
2177        .sendmsg                = tcp_sendmsg,
2178        .sendpage               = tcp_sendpage,
2179        .backlog_rcv            = tcp_v6_do_rcv,
2180        .release_cb             = tcp_release_cb,
2181        .hash                   = inet6_hash,
2182        .unhash                 = inet_unhash,
2183        .get_port               = inet_csk_get_port,
2184#ifdef CONFIG_BPF_SYSCALL
2185        .psock_update_sk_prot   = tcp_bpf_update_proto,
2186#endif
2187        .enter_memory_pressure  = tcp_enter_memory_pressure,
2188        .leave_memory_pressure  = tcp_leave_memory_pressure,
2189        .stream_memory_free     = tcp_stream_memory_free,
2190        .sockets_allocated      = &tcp_sockets_allocated,
2191        .memory_allocated       = &tcp_memory_allocated,
2192        .memory_pressure        = &tcp_memory_pressure,
2193        .orphan_count           = &tcp_orphan_count,
2194        .sysctl_mem             = sysctl_tcp_mem,
2195        .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2196        .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2197        .max_header             = MAX_TCP_HEADER,
2198        .obj_size               = sizeof(struct tcp6_sock),
2199        .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2200        .twsk_prot              = &tcp6_timewait_sock_ops,
2201        .rsk_prot               = &tcp6_request_sock_ops,
2202        .h.hashinfo             = &tcp_hashinfo,
2203        .no_autobind            = true,
2204        .diag_destroy           = tcp_abort,
2205};
2206EXPORT_SYMBOL_GPL(tcpv6_prot);
2207
2208/* thinking of making this const? Don't.
2209 * early_demux can change based on sysctl.
2210 */
2211static struct inet6_protocol tcpv6_protocol = {
2212        .early_demux    =       tcp_v6_early_demux,
2213        .early_demux_handler =  tcp_v6_early_demux,
2214        .handler        =       tcp_v6_rcv,
2215        .err_handler    =       tcp_v6_err,
2216        .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2217};
2218
2219static struct inet_protosw tcpv6_protosw = {
2220        .type           =       SOCK_STREAM,
2221        .protocol       =       IPPROTO_TCP,
2222        .prot           =       &tcpv6_prot,
2223        .ops            =       &inet6_stream_ops,
2224        .flags          =       INET_PROTOSW_PERMANENT |
2225                                INET_PROTOSW_ICSK,
2226};
2227
2228static int __net_init tcpv6_net_init(struct net *net)
2229{
2230        return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2231                                    SOCK_RAW, IPPROTO_TCP, net);
2232}
2233
2234static void __net_exit tcpv6_net_exit(struct net *net)
2235{
2236        inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2237}
2238
2239static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2240{
2241        inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2242}
2243
2244static struct pernet_operations tcpv6_net_ops = {
2245        .init       = tcpv6_net_init,
2246        .exit       = tcpv6_net_exit,
2247        .exit_batch = tcpv6_net_exit_batch,
2248};
2249
2250int __init tcpv6_init(void)
2251{
2252        int ret;
2253
2254        ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2255        if (ret)
2256                goto out;
2257
2258        /* register inet6 protocol */
2259        ret = inet6_register_protosw(&tcpv6_protosw);
2260        if (ret)
2261                goto out_tcpv6_protocol;
2262
2263        ret = register_pernet_subsys(&tcpv6_net_ops);
2264        if (ret)
2265                goto out_tcpv6_protosw;
2266
2267        ret = mptcpv6_init();
2268        if (ret)
2269                goto out_tcpv6_pernet_subsys;
2270
2271out:
2272        return ret;
2273
2274out_tcpv6_pernet_subsys:
2275        unregister_pernet_subsys(&tcpv6_net_ops);
2276out_tcpv6_protosw:
2277        inet6_unregister_protosw(&tcpv6_protosw);
2278out_tcpv6_protocol:
2279        inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2280        goto out;
2281}
2282
2283void tcpv6_exit(void)
2284{
2285        unregister_pernet_subsys(&tcpv6_net_ops);
2286        inet6_unregister_protosw(&tcpv6_protosw);
2287        inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2288}
2289