linux/net/ipv6/tcp_ipv6.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *      TCP over IPv6
   4 *      Linux INET6 implementation
   5 *
   6 *      Authors:
   7 *      Pedro Roque             <roque@di.fc.ul.pt>
   8 *
   9 *      Based on:
  10 *      linux/net/ipv4/tcp.c
  11 *      linux/net/ipv4/tcp_input.c
  12 *      linux/net/ipv4/tcp_output.c
  13 *
  14 *      Fixes:
  15 *      Hideaki YOSHIFUJI       :       sin6_scope_id support
  16 *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  17 *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  18 *                                      a single port at the same time.
  19 *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
  20 */
  21
  22#include <linux/bottom_half.h>
  23#include <linux/module.h>
  24#include <linux/errno.h>
  25#include <linux/types.h>
  26#include <linux/socket.h>
  27#include <linux/sockios.h>
  28#include <linux/net.h>
  29#include <linux/jiffies.h>
  30#include <linux/in.h>
  31#include <linux/in6.h>
  32#include <linux/netdevice.h>
  33#include <linux/init.h>
  34#include <linux/jhash.h>
  35#include <linux/ipsec.h>
  36#include <linux/times.h>
  37#include <linux/slab.h>
  38#include <linux/uaccess.h>
  39#include <linux/ipv6.h>
  40#include <linux/icmpv6.h>
  41#include <linux/random.h>
  42#include <linux/indirect_call_wrapper.h>
  43
  44#include <net/tcp.h>
  45#include <net/ndisc.h>
  46#include <net/inet6_hashtables.h>
  47#include <net/inet6_connection_sock.h>
  48#include <net/ipv6.h>
  49#include <net/transp_v6.h>
  50#include <net/addrconf.h>
  51#include <net/ip6_route.h>
  52#include <net/ip6_checksum.h>
  53#include <net/inet_ecn.h>
  54#include <net/protocol.h>
  55#include <net/xfrm.h>
  56#include <net/snmp.h>
  57#include <net/dsfield.h>
  58#include <net/timewait_sock.h>
  59#include <net/inet_common.h>
  60#include <net/secure_seq.h>
  61#include <net/busy_poll.h>
  62
  63#include <linux/proc_fs.h>
  64#include <linux/seq_file.h>
  65
  66#include <crypto/hash.h>
  67#include <linux/scatterlist.h>
  68
  69#include <trace/events/tcp.h>
  70
  71static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
  72static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
  73                                      struct request_sock *req);
  74
  75static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
  76
  77static const struct inet_connection_sock_af_ops ipv6_mapped;
  78const struct inet_connection_sock_af_ops ipv6_specific;
  79#ifdef CONFIG_TCP_MD5SIG
  80static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
  81static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
  82#else
  83static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
  84                                                   const struct in6_addr *addr,
  85                                                   int l3index)
  86{
  87        return NULL;
  88}
  89#endif
  90
  91/* Helper returning the inet6 address from a given tcp socket.
  92 * It can be used in TCP stack instead of inet6_sk(sk).
  93 * This avoids a dereference and allow compiler optimizations.
  94 * It is a specialized version of inet6_sk_generic().
  95 */
  96static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
  97{
  98        unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
  99
 100        return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
 101}
 102
 103static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
 104{
 105        struct dst_entry *dst = skb_dst(skb);
 106
 107        if (dst && dst_hold_safe(dst)) {
 108                const struct rt6_info *rt = (const struct rt6_info *)dst;
 109
 110                sk->sk_rx_dst = dst;
 111                inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
 112                tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
 113        }
 114}
 115
 116static u32 tcp_v6_init_seq(const struct sk_buff *skb)
 117{
 118        return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
 119                                ipv6_hdr(skb)->saddr.s6_addr32,
 120                                tcp_hdr(skb)->dest,
 121                                tcp_hdr(skb)->source);
 122}
 123
 124static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
 125{
 126        return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
 127                                   ipv6_hdr(skb)->saddr.s6_addr32);
 128}
 129
 130static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
 131                              int addr_len)
 132{
 133        /* This check is replicated from tcp_v6_connect() and intended to
 134         * prevent BPF program called below from accessing bytes that are out
 135         * of the bound specified by user in addr_len.
 136         */
 137        if (addr_len < SIN6_LEN_RFC2133)
 138                return -EINVAL;
 139
 140        sock_owned_by_me(sk);
 141
 142        return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
 143}
 144
 145static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 146                          int addr_len)
 147{
 148        struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
 149        struct inet_sock *inet = inet_sk(sk);
 150        struct inet_connection_sock *icsk = inet_csk(sk);
 151        struct ipv6_pinfo *np = tcp_inet6_sk(sk);
 152        struct tcp_sock *tp = tcp_sk(sk);
 153        struct in6_addr *saddr = NULL, *final_p, final;
 154        struct ipv6_txoptions *opt;
 155        struct flowi6 fl6;
 156        struct dst_entry *dst;
 157        int addr_type;
 158        int err;
 159        struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
 160
 161        if (addr_len < SIN6_LEN_RFC2133)
 162                return -EINVAL;
 163
 164        if (usin->sin6_family != AF_INET6)
 165                return -EAFNOSUPPORT;
 166
 167        memset(&fl6, 0, sizeof(fl6));
 168
 169        if (np->sndflow) {
 170                fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
 171                IP6_ECN_flow_init(fl6.flowlabel);
 172                if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
 173                        struct ip6_flowlabel *flowlabel;
 174                        flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
 175                        if (IS_ERR(flowlabel))
 176                                return -EINVAL;
 177                        fl6_sock_release(flowlabel);
 178                }
 179        }
 180
 181        /*
 182         *      connect() to INADDR_ANY means loopback (BSD'ism).
 183         */
 184
 185        if (ipv6_addr_any(&usin->sin6_addr)) {
 186                if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
 187                        ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
 188                                               &usin->sin6_addr);
 189                else
 190                        usin->sin6_addr = in6addr_loopback;
 191        }
 192
 193        addr_type = ipv6_addr_type(&usin->sin6_addr);
 194
 195        if (addr_type & IPV6_ADDR_MULTICAST)
 196                return -ENETUNREACH;
 197
 198        if (addr_type&IPV6_ADDR_LINKLOCAL) {
 199                if (addr_len >= sizeof(struct sockaddr_in6) &&
 200                    usin->sin6_scope_id) {
 201                        /* If interface is set while binding, indices
 202                         * must coincide.
 203                         */
 204                        if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
 205                                return -EINVAL;
 206
 207                        sk->sk_bound_dev_if = usin->sin6_scope_id;
 208                }
 209
 210                /* Connect to link-local address requires an interface */
 211                if (!sk->sk_bound_dev_if)
 212                        return -EINVAL;
 213        }
 214
 215        if (tp->rx_opt.ts_recent_stamp &&
 216            !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
 217                tp->rx_opt.ts_recent = 0;
 218                tp->rx_opt.ts_recent_stamp = 0;
 219                WRITE_ONCE(tp->write_seq, 0);
 220        }
 221
 222        sk->sk_v6_daddr = usin->sin6_addr;
 223        np->flow_label = fl6.flowlabel;
 224
 225        /*
 226         *      TCP over IPv4
 227         */
 228
 229        if (addr_type & IPV6_ADDR_MAPPED) {
 230                u32 exthdrlen = icsk->icsk_ext_hdr_len;
 231                struct sockaddr_in sin;
 232
 233                if (__ipv6_only_sock(sk))
 234                        return -ENETUNREACH;
 235
 236                sin.sin_family = AF_INET;
 237                sin.sin_port = usin->sin6_port;
 238                sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
 239
 240                icsk->icsk_af_ops = &ipv6_mapped;
 241                if (sk_is_mptcp(sk))
 242                        mptcpv6_handle_mapped(sk, true);
 243                sk->sk_backlog_rcv = tcp_v4_do_rcv;
 244#ifdef CONFIG_TCP_MD5SIG
 245                tp->af_specific = &tcp_sock_ipv6_mapped_specific;
 246#endif
 247
 248                err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
 249
 250                if (err) {
 251                        icsk->icsk_ext_hdr_len = exthdrlen;
 252                        icsk->icsk_af_ops = &ipv6_specific;
 253                        if (sk_is_mptcp(sk))
 254                                mptcpv6_handle_mapped(sk, false);
 255                        sk->sk_backlog_rcv = tcp_v6_do_rcv;
 256#ifdef CONFIG_TCP_MD5SIG
 257                        tp->af_specific = &tcp_sock_ipv6_specific;
 258#endif
 259                        goto failure;
 260                }
 261                np->saddr = sk->sk_v6_rcv_saddr;
 262
 263                return err;
 264        }
 265
 266        if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
 267                saddr = &sk->sk_v6_rcv_saddr;
 268
 269        fl6.flowi6_proto = IPPROTO_TCP;
 270        fl6.daddr = sk->sk_v6_daddr;
 271        fl6.saddr = saddr ? *saddr : np->saddr;
 272        fl6.flowi6_oif = sk->sk_bound_dev_if;
 273        fl6.flowi6_mark = sk->sk_mark;
 274        fl6.fl6_dport = usin->sin6_port;
 275        fl6.fl6_sport = inet->inet_sport;
 276        fl6.flowi6_uid = sk->sk_uid;
 277
 278        opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
 279        final_p = fl6_update_dst(&fl6, opt, &final);
 280
 281        security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
 282
 283        dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
 284        if (IS_ERR(dst)) {
 285                err = PTR_ERR(dst);
 286                goto failure;
 287        }
 288
 289        if (!saddr) {
 290                saddr = &fl6.saddr;
 291                sk->sk_v6_rcv_saddr = *saddr;
 292        }
 293
 294        /* set the source address */
 295        np->saddr = *saddr;
 296        inet->inet_rcv_saddr = LOOPBACK4_IPV6;
 297
 298        sk->sk_gso_type = SKB_GSO_TCPV6;
 299        ip6_dst_store(sk, dst, NULL, NULL);
 300
 301        icsk->icsk_ext_hdr_len = 0;
 302        if (opt)
 303                icsk->icsk_ext_hdr_len = opt->opt_flen +
 304                                         opt->opt_nflen;
 305
 306        tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 307
 308        inet->inet_dport = usin->sin6_port;
 309
 310        tcp_set_state(sk, TCP_SYN_SENT);
 311        err = inet6_hash_connect(tcp_death_row, sk);
 312        if (err)
 313                goto late_failure;
 314
 315        sk_set_txhash(sk);
 316
 317        if (likely(!tp->repair)) {
 318                if (!tp->write_seq)
 319                        WRITE_ONCE(tp->write_seq,
 320                                   secure_tcpv6_seq(np->saddr.s6_addr32,
 321                                                    sk->sk_v6_daddr.s6_addr32,
 322                                                    inet->inet_sport,
 323                                                    inet->inet_dport));
 324                tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
 325                                                   np->saddr.s6_addr32,
 326                                                   sk->sk_v6_daddr.s6_addr32);
 327        }
 328
 329        if (tcp_fastopen_defer_connect(sk, &err))
 330                return err;
 331        if (err)
 332                goto late_failure;
 333
 334        err = tcp_connect(sk);
 335        if (err)
 336                goto late_failure;
 337
 338        return 0;
 339
 340late_failure:
 341        tcp_set_state(sk, TCP_CLOSE);
 342failure:
 343        inet->inet_dport = 0;
 344        sk->sk_route_caps = 0;
 345        return err;
 346}
 347
 348static void tcp_v6_mtu_reduced(struct sock *sk)
 349{
 350        struct dst_entry *dst;
 351        u32 mtu;
 352
 353        if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
 354                return;
 355
 356        mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
 357
 358        /* Drop requests trying to increase our current mss.
 359         * Check done in __ip6_rt_update_pmtu() is too late.
 360         */
 361        if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
 362                return;
 363
 364        dst = inet6_csk_update_pmtu(sk, mtu);
 365        if (!dst)
 366                return;
 367
 368        if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
 369                tcp_sync_mss(sk, dst_mtu(dst));
 370                tcp_simple_retransmit(sk);
 371        }
 372}
 373
 374static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 375                u8 type, u8 code, int offset, __be32 info)
 376{
 377        const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
 378        const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
 379        struct net *net = dev_net(skb->dev);
 380        struct request_sock *fastopen;
 381        struct ipv6_pinfo *np;
 382        struct tcp_sock *tp;
 383        __u32 seq, snd_una;
 384        struct sock *sk;
 385        bool fatal;
 386        int err;
 387
 388        sk = __inet6_lookup_established(net, &tcp_hashinfo,
 389                                        &hdr->daddr, th->dest,
 390                                        &hdr->saddr, ntohs(th->source),
 391                                        skb->dev->ifindex, inet6_sdif(skb));
 392
 393        if (!sk) {
 394                __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
 395                                  ICMP6_MIB_INERRORS);
 396                return -ENOENT;
 397        }
 398
 399        if (sk->sk_state == TCP_TIME_WAIT) {
 400                inet_twsk_put(inet_twsk(sk));
 401                return 0;
 402        }
 403        seq = ntohl(th->seq);
 404        fatal = icmpv6_err_convert(type, code, &err);
 405        if (sk->sk_state == TCP_NEW_SYN_RECV) {
 406                tcp_req_err(sk, seq, fatal);
 407                return 0;
 408        }
 409
 410        bh_lock_sock(sk);
 411        if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
 412                __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
 413
 414        if (sk->sk_state == TCP_CLOSE)
 415                goto out;
 416
 417        if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
 418                __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
 419                goto out;
 420        }
 421
 422        tp = tcp_sk(sk);
 423        /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
 424        fastopen = rcu_dereference(tp->fastopen_rsk);
 425        snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
 426        if (sk->sk_state != TCP_LISTEN &&
 427            !between(seq, snd_una, tp->snd_nxt)) {
 428                __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
 429                goto out;
 430        }
 431
 432        np = tcp_inet6_sk(sk);
 433
 434        if (type == NDISC_REDIRECT) {
 435                if (!sock_owned_by_user(sk)) {
 436                        struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
 437
 438                        if (dst)
 439                                dst->ops->redirect(dst, sk, skb);
 440                }
 441                goto out;
 442        }
 443
 444        if (type == ICMPV6_PKT_TOOBIG) {
 445                u32 mtu = ntohl(info);
 446
 447                /* We are not interested in TCP_LISTEN and open_requests
 448                 * (SYN-ACKs send out by Linux are always <576bytes so
 449                 * they should go through unfragmented).
 450                 */
 451                if (sk->sk_state == TCP_LISTEN)
 452                        goto out;
 453
 454                if (!ip6_sk_accept_pmtu(sk))
 455                        goto out;
 456
 457                if (mtu < IPV6_MIN_MTU)
 458                        goto out;
 459
 460                WRITE_ONCE(tp->mtu_info, mtu);
 461
 462                if (!sock_owned_by_user(sk))
 463                        tcp_v6_mtu_reduced(sk);
 464                else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
 465                                           &sk->sk_tsq_flags))
 466                        sock_hold(sk);
 467                goto out;
 468        }
 469
 470
 471        /* Might be for an request_sock */
 472        switch (sk->sk_state) {
 473        case TCP_SYN_SENT:
 474        case TCP_SYN_RECV:
 475                /* Only in fast or simultaneous open. If a fast open socket is
 476                 * already accepted it is treated as a connected one below.
 477                 */
 478                if (fastopen && !fastopen->sk)
 479                        break;
 480
 481                ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
 482
 483                if (!sock_owned_by_user(sk)) {
 484                        sk->sk_err = err;
 485                        sk_error_report(sk);            /* Wake people up to see the error (see connect in sock.c) */
 486
 487                        tcp_done(sk);
 488                } else
 489                        sk->sk_err_soft = err;
 490                goto out;
 491        case TCP_LISTEN:
 492                break;
 493        default:
 494                /* check if this ICMP message allows revert of backoff.
 495                 * (see RFC 6069)
 496                 */
 497                if (!fastopen && type == ICMPV6_DEST_UNREACH &&
 498                    code == ICMPV6_NOROUTE)
 499                        tcp_ld_RTO_revert(sk, seq);
 500        }
 501
 502        if (!sock_owned_by_user(sk) && np->recverr) {
 503                sk->sk_err = err;
 504                sk_error_report(sk);
 505        } else
 506                sk->sk_err_soft = err;
 507
 508out:
 509        bh_unlock_sock(sk);
 510        sock_put(sk);
 511        return 0;
 512}
 513
 514
 515static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
 516                              struct flowi *fl,
 517                              struct request_sock *req,
 518                              struct tcp_fastopen_cookie *foc,
 519                              enum tcp_synack_type synack_type,
 520                              struct sk_buff *syn_skb)
 521{
 522        struct inet_request_sock *ireq = inet_rsk(req);
 523        struct ipv6_pinfo *np = tcp_inet6_sk(sk);
 524        struct ipv6_txoptions *opt;
 525        struct flowi6 *fl6 = &fl->u.ip6;
 526        struct sk_buff *skb;
 527        int err = -ENOMEM;
 528        u8 tclass;
 529
 530        /* First, grab a route. */
 531        if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
 532                                               IPPROTO_TCP)) == NULL)
 533                goto done;
 534
 535        skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
 536
 537        if (skb) {
 538                __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
 539                                    &ireq->ir_v6_rmt_addr);
 540
 541                fl6->daddr = ireq->ir_v6_rmt_addr;
 542                if (np->repflow && ireq->pktopts)
 543                        fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
 544
 545                tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
 546                                (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
 547                                (np->tclass & INET_ECN_MASK) :
 548                                np->tclass;
 549
 550                if (!INET_ECN_is_capable(tclass) &&
 551                    tcp_bpf_ca_needs_ecn((struct sock *)req))
 552                        tclass |= INET_ECN_ECT_0;
 553
 554                rcu_read_lock();
 555                opt = ireq->ipv6_opt;
 556                if (!opt)
 557                        opt = rcu_dereference(np->opt);
 558                err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
 559                               tclass, sk->sk_priority);
 560                rcu_read_unlock();
 561                err = net_xmit_eval(err);
 562        }
 563
 564done:
 565        return err;
 566}
 567
 568
 569static void tcp_v6_reqsk_destructor(struct request_sock *req)
 570{
 571        kfree(inet_rsk(req)->ipv6_opt);
 572        kfree_skb(inet_rsk(req)->pktopts);
 573}
 574
 575#ifdef CONFIG_TCP_MD5SIG
 576static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
 577                                                   const struct in6_addr *addr,
 578                                                   int l3index)
 579{
 580        return tcp_md5_do_lookup(sk, l3index,
 581                                 (union tcp_md5_addr *)addr, AF_INET6);
 582}
 583
 584static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
 585                                                const struct sock *addr_sk)
 586{
 587        int l3index;
 588
 589        l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
 590                                                 addr_sk->sk_bound_dev_if);
 591        return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
 592                                    l3index);
 593}
 594
 595static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
 596                                 sockptr_t optval, int optlen)
 597{
 598        struct tcp_md5sig cmd;
 599        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
 600        int l3index = 0;
 601        u8 prefixlen;
 602
 603        if (optlen < sizeof(cmd))
 604                return -EINVAL;
 605
 606        if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
 607                return -EFAULT;
 608
 609        if (sin6->sin6_family != AF_INET6)
 610                return -EINVAL;
 611
 612        if (optname == TCP_MD5SIG_EXT &&
 613            cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
 614                prefixlen = cmd.tcpm_prefixlen;
 615                if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
 616                                        prefixlen > 32))
 617                        return -EINVAL;
 618        } else {
 619                prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
 620        }
 621
 622        if (optname == TCP_MD5SIG_EXT &&
 623            cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
 624                struct net_device *dev;
 625
 626                rcu_read_lock();
 627                dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
 628                if (dev && netif_is_l3_master(dev))
 629                        l3index = dev->ifindex;
 630                rcu_read_unlock();
 631
 632                /* ok to reference set/not set outside of rcu;
 633                 * right now device MUST be an L3 master
 634                 */
 635                if (!dev || !l3index)
 636                        return -EINVAL;
 637        }
 638
 639        if (!cmd.tcpm_keylen) {
 640                if (ipv6_addr_v4mapped(&sin6->sin6_addr))
 641                        return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
 642                                              AF_INET, prefixlen,
 643                                              l3index);
 644                return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
 645                                      AF_INET6, prefixlen, l3index);
 646        }
 647
 648        if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
 649                return -EINVAL;
 650
 651        if (ipv6_addr_v4mapped(&sin6->sin6_addr))
 652                return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
 653                                      AF_INET, prefixlen, l3index,
 654                                      cmd.tcpm_key, cmd.tcpm_keylen,
 655                                      GFP_KERNEL);
 656
 657        return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
 658                              AF_INET6, prefixlen, l3index,
 659                              cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
 660}
 661
 662static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
 663                                   const struct in6_addr *daddr,
 664                                   const struct in6_addr *saddr,
 665                                   const struct tcphdr *th, int nbytes)
 666{
 667        struct tcp6_pseudohdr *bp;
 668        struct scatterlist sg;
 669        struct tcphdr *_th;
 670
 671        bp = hp->scratch;
 672        /* 1. TCP pseudo-header (RFC2460) */
 673        bp->saddr = *saddr;
 674        bp->daddr = *daddr;
 675        bp->protocol = cpu_to_be32(IPPROTO_TCP);
 676        bp->len = cpu_to_be32(nbytes);
 677
 678        _th = (struct tcphdr *)(bp + 1);
 679        memcpy(_th, th, sizeof(*th));
 680        _th->check = 0;
 681
 682        sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
 683        ahash_request_set_crypt(hp->md5_req, &sg, NULL,
 684                                sizeof(*bp) + sizeof(*th));
 685        return crypto_ahash_update(hp->md5_req);
 686}
 687
 688static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 689                               const struct in6_addr *daddr, struct in6_addr *saddr,
 690                               const struct tcphdr *th)
 691{
 692        struct tcp_md5sig_pool *hp;
 693        struct ahash_request *req;
 694
 695        hp = tcp_get_md5sig_pool();
 696        if (!hp)
 697                goto clear_hash_noput;
 698        req = hp->md5_req;
 699
 700        if (crypto_ahash_init(req))
 701                goto clear_hash;
 702        if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
 703                goto clear_hash;
 704        if (tcp_md5_hash_key(hp, key))
 705                goto clear_hash;
 706        ahash_request_set_crypt(req, NULL, md5_hash, 0);
 707        if (crypto_ahash_final(req))
 708                goto clear_hash;
 709
 710        tcp_put_md5sig_pool();
 711        return 0;
 712
 713clear_hash:
 714        tcp_put_md5sig_pool();
 715clear_hash_noput:
 716        memset(md5_hash, 0, 16);
 717        return 1;
 718}
 719
 720static int tcp_v6_md5_hash_skb(char *md5_hash,
 721                               const struct tcp_md5sig_key *key,
 722                               const struct sock *sk,
 723                               const struct sk_buff *skb)
 724{
 725        const struct in6_addr *saddr, *daddr;
 726        struct tcp_md5sig_pool *hp;
 727        struct ahash_request *req;
 728        const struct tcphdr *th = tcp_hdr(skb);
 729
 730        if (sk) { /* valid for establish/request sockets */
 731                saddr = &sk->sk_v6_rcv_saddr;
 732                daddr = &sk->sk_v6_daddr;
 733        } else {
 734                const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 735                saddr = &ip6h->saddr;
 736                daddr = &ip6h->daddr;
 737        }
 738
 739        hp = tcp_get_md5sig_pool();
 740        if (!hp)
 741                goto clear_hash_noput;
 742        req = hp->md5_req;
 743
 744        if (crypto_ahash_init(req))
 745                goto clear_hash;
 746
 747        if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
 748                goto clear_hash;
 749        if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
 750                goto clear_hash;
 751        if (tcp_md5_hash_key(hp, key))
 752                goto clear_hash;
 753        ahash_request_set_crypt(req, NULL, md5_hash, 0);
 754        if (crypto_ahash_final(req))
 755                goto clear_hash;
 756
 757        tcp_put_md5sig_pool();
 758        return 0;
 759
 760clear_hash:
 761        tcp_put_md5sig_pool();
 762clear_hash_noput:
 763        memset(md5_hash, 0, 16);
 764        return 1;
 765}
 766
 767#endif
 768
 769static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
 770                                    const struct sk_buff *skb,
 771                                    int dif, int sdif)
 772{
 773#ifdef CONFIG_TCP_MD5SIG
 774        const __u8 *hash_location = NULL;
 775        struct tcp_md5sig_key *hash_expected;
 776        const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 777        const struct tcphdr *th = tcp_hdr(skb);
 778        int genhash, l3index;
 779        u8 newhash[16];
 780
 781        /* sdif set, means packet ingressed via a device
 782         * in an L3 domain and dif is set to the l3mdev
 783         */
 784        l3index = sdif ? dif : 0;
 785
 786        hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
 787        hash_location = tcp_parse_md5sig_option(th);
 788
 789        /* We've parsed the options - do we have a hash? */
 790        if (!hash_expected && !hash_location)
 791                return false;
 792
 793        if (hash_expected && !hash_location) {
 794                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
 795                return true;
 796        }
 797
 798        if (!hash_expected && hash_location) {
 799                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
 800                return true;
 801        }
 802
 803        /* check the signature */
 804        genhash = tcp_v6_md5_hash_skb(newhash,
 805                                      hash_expected,
 806                                      NULL, skb);
 807
 808        if (genhash || memcmp(hash_location, newhash, 16) != 0) {
 809                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
 810                net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
 811                                     genhash ? "failed" : "mismatch",
 812                                     &ip6h->saddr, ntohs(th->source),
 813                                     &ip6h->daddr, ntohs(th->dest), l3index);
 814                return true;
 815        }
 816#endif
 817        return false;
 818}
 819
 820static void tcp_v6_init_req(struct request_sock *req,
 821                            const struct sock *sk_listener,
 822                            struct sk_buff *skb)
 823{
 824        bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
 825        struct inet_request_sock *ireq = inet_rsk(req);
 826        const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
 827
 828        ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
 829        ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
 830
 831        /* So that link locals have meaning */
 832        if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
 833            ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
 834                ireq->ir_iif = tcp_v6_iif(skb);
 835
 836        if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
 837            (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
 838             np->rxopt.bits.rxinfo ||
 839             np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
 840             np->rxopt.bits.rxohlim || np->repflow)) {
 841                refcount_inc(&skb->users);
 842                ireq->pktopts = skb;
 843        }
 844}
 845
 846static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
 847                                          struct sk_buff *skb,
 848                                          struct flowi *fl,
 849                                          struct request_sock *req)
 850{
 851        tcp_v6_init_req(req, sk, skb);
 852
 853        if (security_inet_conn_request(sk, skb, req))
 854                return NULL;
 855
 856        return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
 857}
 858
 859struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
 860        .family         =       AF_INET6,
 861        .obj_size       =       sizeof(struct tcp6_request_sock),
 862        .rtx_syn_ack    =       tcp_rtx_synack,
 863        .send_ack       =       tcp_v6_reqsk_send_ack,
 864        .destructor     =       tcp_v6_reqsk_destructor,
 865        .send_reset     =       tcp_v6_send_reset,
 866        .syn_ack_timeout =      tcp_syn_ack_timeout,
 867};
 868
 869const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
 870        .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
 871                                sizeof(struct ipv6hdr),
 872#ifdef CONFIG_TCP_MD5SIG
 873        .req_md5_lookup =       tcp_v6_md5_lookup,
 874        .calc_md5_hash  =       tcp_v6_md5_hash_skb,
 875#endif
 876#ifdef CONFIG_SYN_COOKIES
 877        .cookie_init_seq =      cookie_v6_init_sequence,
 878#endif
 879        .route_req      =       tcp_v6_route_req,
 880        .init_seq       =       tcp_v6_init_seq,
 881        .init_ts_off    =       tcp_v6_init_ts_off,
 882        .send_synack    =       tcp_v6_send_synack,
 883};
 884
 885static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
 886                                 u32 ack, u32 win, u32 tsval, u32 tsecr,
 887                                 int oif, struct tcp_md5sig_key *key, int rst,
 888                                 u8 tclass, __be32 label, u32 priority)
 889{
 890        const struct tcphdr *th = tcp_hdr(skb);
 891        struct tcphdr *t1;
 892        struct sk_buff *buff;
 893        struct flowi6 fl6;
 894        struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
 895        struct sock *ctl_sk = net->ipv6.tcp_sk;
 896        unsigned int tot_len = sizeof(struct tcphdr);
 897        __be32 mrst = 0, *topt;
 898        struct dst_entry *dst;
 899        __u32 mark = 0;
 900
 901        if (tsecr)
 902                tot_len += TCPOLEN_TSTAMP_ALIGNED;
 903#ifdef CONFIG_TCP_MD5SIG
 904        if (key)
 905                tot_len += TCPOLEN_MD5SIG_ALIGNED;
 906#endif
 907
 908#ifdef CONFIG_MPTCP
 909        if (rst && !key) {
 910                mrst = mptcp_reset_option(skb);
 911
 912                if (mrst)
 913                        tot_len += sizeof(__be32);
 914        }
 915#endif
 916
 917        buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
 918                         GFP_ATOMIC);
 919        if (!buff)
 920                return;
 921
 922        skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
 923
 924        t1 = skb_push(buff, tot_len);
 925        skb_reset_transport_header(buff);
 926
 927        /* Swap the send and the receive. */
 928        memset(t1, 0, sizeof(*t1));
 929        t1->dest = th->source;
 930        t1->source = th->dest;
 931        t1->doff = tot_len / 4;
 932        t1->seq = htonl(seq);
 933        t1->ack_seq = htonl(ack);
 934        t1->ack = !rst || !th->ack;
 935        t1->rst = rst;
 936        t1->window = htons(win);
 937
 938        topt = (__be32 *)(t1 + 1);
 939
 940        if (tsecr) {
 941                *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 942                                (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
 943                *topt++ = htonl(tsval);
 944                *topt++ = htonl(tsecr);
 945        }
 946
 947        if (mrst)
 948                *topt++ = mrst;
 949
 950#ifdef CONFIG_TCP_MD5SIG
 951        if (key) {
 952                *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 953                                (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
 954                tcp_v6_md5_hash_hdr((__u8 *)topt, key,
 955                                    &ipv6_hdr(skb)->saddr,
 956                                    &ipv6_hdr(skb)->daddr, t1);
 957        }
 958#endif
 959
 960        memset(&fl6, 0, sizeof(fl6));
 961        fl6.daddr = ipv6_hdr(skb)->saddr;
 962        fl6.saddr = ipv6_hdr(skb)->daddr;
 963        fl6.flowlabel = label;
 964
 965        buff->ip_summed = CHECKSUM_PARTIAL;
 966        buff->csum = 0;
 967
 968        __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
 969
 970        fl6.flowi6_proto = IPPROTO_TCP;
 971        if (rt6_need_strict(&fl6.daddr) && !oif)
 972                fl6.flowi6_oif = tcp_v6_iif(skb);
 973        else {
 974                if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
 975                        oif = skb->skb_iif;
 976
 977                fl6.flowi6_oif = oif;
 978        }
 979
 980        if (sk) {
 981                if (sk->sk_state == TCP_TIME_WAIT) {
 982                        mark = inet_twsk(sk)->tw_mark;
 983                        /* autoflowlabel relies on buff->hash */
 984                        skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
 985                                     PKT_HASH_TYPE_L4);
 986                } else {
 987                        mark = sk->sk_mark;
 988                }
 989                buff->tstamp = tcp_transmit_time(sk);
 990        }
 991        fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
 992        fl6.fl6_dport = t1->dest;
 993        fl6.fl6_sport = t1->source;
 994        fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
 995        security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
 996
 997        /* Pass a socket to ip6_dst_lookup either it is for RST
 998         * Underlying function will use this to retrieve the network
 999         * namespace
1000         */
1001        dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
1002        if (!IS_ERR(dst)) {
1003                skb_dst_set(buff, dst);
1004                ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
1005                         tclass & ~INET_ECN_MASK, priority);
1006                TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
1007                if (rst)
1008                        TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
1009                return;
1010        }
1011
1012        kfree_skb(buff);
1013}
1014
1015static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1016{
1017        const struct tcphdr *th = tcp_hdr(skb);
1018        struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1019        u32 seq = 0, ack_seq = 0;
1020        struct tcp_md5sig_key *key = NULL;
1021#ifdef CONFIG_TCP_MD5SIG
1022        const __u8 *hash_location = NULL;
1023        unsigned char newhash[16];
1024        int genhash;
1025        struct sock *sk1 = NULL;
1026#endif
1027        __be32 label = 0;
1028        u32 priority = 0;
1029        struct net *net;
1030        int oif = 0;
1031
1032        if (th->rst)
1033                return;
1034
1035        /* If sk not NULL, it means we did a successful lookup and incoming
1036         * route had to be correct. prequeue might have dropped our dst.
1037         */
1038        if (!sk && !ipv6_unicast_destination(skb))
1039                return;
1040
1041        net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1042#ifdef CONFIG_TCP_MD5SIG
1043        rcu_read_lock();
1044        hash_location = tcp_parse_md5sig_option(th);
1045        if (sk && sk_fullsock(sk)) {
1046                int l3index;
1047
1048                /* sdif set, means packet ingressed via a device
1049                 * in an L3 domain and inet_iif is set to it.
1050                 */
1051                l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1052                key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1053        } else if (hash_location) {
1054                int dif = tcp_v6_iif_l3_slave(skb);
1055                int sdif = tcp_v6_sdif(skb);
1056                int l3index;
1057
1058                /*
1059                 * active side is lost. Try to find listening socket through
1060                 * source port, and then find md5 key through listening socket.
1061                 * we are not loose security here:
1062                 * Incoming packet is checked with md5 hash with finding key,
1063                 * no RST generated if md5 hash doesn't match.
1064                 */
1065                sk1 = inet6_lookup_listener(net,
1066                                           &tcp_hashinfo, NULL, 0,
1067                                           &ipv6h->saddr,
1068                                           th->source, &ipv6h->daddr,
1069                                           ntohs(th->source), dif, sdif);
1070                if (!sk1)
1071                        goto out;
1072
1073                /* sdif set, means packet ingressed via a device
1074                 * in an L3 domain and dif is set to it.
1075                 */
1076                l3index = tcp_v6_sdif(skb) ? dif : 0;
1077
1078                key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1079                if (!key)
1080                        goto out;
1081
1082                genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1083                if (genhash || memcmp(hash_location, newhash, 16) != 0)
1084                        goto out;
1085        }
1086#endif
1087
1088        if (th->ack)
1089                seq = ntohl(th->ack_seq);
1090        else
1091                ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1092                          (th->doff << 2);
1093
1094        if (sk) {
1095                oif = sk->sk_bound_dev_if;
1096                if (sk_fullsock(sk)) {
1097                        const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1098
1099                        trace_tcp_send_reset(sk, skb);
1100                        if (np->repflow)
1101                                label = ip6_flowlabel(ipv6h);
1102                        priority = sk->sk_priority;
1103                }
1104                if (sk->sk_state == TCP_TIME_WAIT) {
1105                        label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1106                        priority = inet_twsk(sk)->tw_priority;
1107                }
1108        } else {
1109                if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1110                        label = ip6_flowlabel(ipv6h);
1111        }
1112
1113        tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1114                             ipv6_get_dsfield(ipv6h), label, priority);
1115
1116#ifdef CONFIG_TCP_MD5SIG
1117out:
1118        rcu_read_unlock();
1119#endif
1120}
1121
1122static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1123                            u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1124                            struct tcp_md5sig_key *key, u8 tclass,
1125                            __be32 label, u32 priority)
1126{
1127        tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1128                             tclass, label, priority);
1129}
1130
1131static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1132{
1133        struct inet_timewait_sock *tw = inet_twsk(sk);
1134        struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1135
1136        tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1137                        tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1138                        tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1139                        tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1140                        tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1141
1142        inet_twsk_put(tw);
1143}
1144
1145static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1146                                  struct request_sock *req)
1147{
1148        int l3index;
1149
1150        l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1151
1152        /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1153         * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1154         */
1155        /* RFC 7323 2.3
1156         * The window field (SEG.WND) of every outgoing segment, with the
1157         * exception of <SYN> segments, MUST be right-shifted by
1158         * Rcv.Wind.Shift bits:
1159         */
1160        tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1161                        tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1162                        tcp_rsk(req)->rcv_nxt,
1163                        req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1164                        tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1165                        req->ts_recent, sk->sk_bound_dev_if,
1166                        tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1167                        ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1168}
1169
1170
1171static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1172{
1173#ifdef CONFIG_SYN_COOKIES
1174        const struct tcphdr *th = tcp_hdr(skb);
1175
1176        if (!th->syn)
1177                sk = cookie_v6_check(sk, skb);
1178#endif
1179        return sk;
1180}
1181
1182u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1183                         struct tcphdr *th, u32 *cookie)
1184{
1185        u16 mss = 0;
1186#ifdef CONFIG_SYN_COOKIES
1187        mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1188                                    &tcp_request_sock_ipv6_ops, sk, th);
1189        if (mss) {
1190                *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1191                tcp_synq_overflow(sk);
1192        }
1193#endif
1194        return mss;
1195}
1196
1197static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1198{
1199        if (skb->protocol == htons(ETH_P_IP))
1200                return tcp_v4_conn_request(sk, skb);
1201
1202        if (!ipv6_unicast_destination(skb))
1203                goto drop;
1204
1205        if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1206                __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1207                return 0;
1208        }
1209
1210        return tcp_conn_request(&tcp6_request_sock_ops,
1211                                &tcp_request_sock_ipv6_ops, sk, skb);
1212
1213drop:
1214        tcp_listendrop(sk);
1215        return 0; /* don't send reset */
1216}
1217
1218static void tcp_v6_restore_cb(struct sk_buff *skb)
1219{
1220        /* We need to move header back to the beginning if xfrm6_policy_check()
1221         * and tcp_v6_fill_cb() are going to be called again.
1222         * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1223         */
1224        memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1225                sizeof(struct inet6_skb_parm));
1226}
1227
1228static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1229                                         struct request_sock *req,
1230                                         struct dst_entry *dst,
1231                                         struct request_sock *req_unhash,
1232                                         bool *own_req)
1233{
1234        struct inet_request_sock *ireq;
1235        struct ipv6_pinfo *newnp;
1236        const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1237        struct ipv6_txoptions *opt;
1238        struct inet_sock *newinet;
1239        bool found_dup_sk = false;
1240        struct tcp_sock *newtp;
1241        struct sock *newsk;
1242#ifdef CONFIG_TCP_MD5SIG
1243        struct tcp_md5sig_key *key;
1244        int l3index;
1245#endif
1246        struct flowi6 fl6;
1247
1248        if (skb->protocol == htons(ETH_P_IP)) {
1249                /*
1250                 *      v6 mapped
1251                 */
1252
1253                newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1254                                             req_unhash, own_req);
1255
1256                if (!newsk)
1257                        return NULL;
1258
1259                inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1260
1261                newinet = inet_sk(newsk);
1262                newnp = tcp_inet6_sk(newsk);
1263                newtp = tcp_sk(newsk);
1264
1265                memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1266
1267                newnp->saddr = newsk->sk_v6_rcv_saddr;
1268
1269                inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1270                if (sk_is_mptcp(newsk))
1271                        mptcpv6_handle_mapped(newsk, true);
1272                newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1273#ifdef CONFIG_TCP_MD5SIG
1274                newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1275#endif
1276
1277                newnp->ipv6_mc_list = NULL;
1278                newnp->ipv6_ac_list = NULL;
1279                newnp->ipv6_fl_list = NULL;
1280                newnp->pktoptions  = NULL;
1281                newnp->opt         = NULL;
1282                newnp->mcast_oif   = inet_iif(skb);
1283                newnp->mcast_hops  = ip_hdr(skb)->ttl;
1284                newnp->rcv_flowinfo = 0;
1285                if (np->repflow)
1286                        newnp->flow_label = 0;
1287
1288                /*
1289                 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1290                 * here, tcp_create_openreq_child now does this for us, see the comment in
1291                 * that function for the gory details. -acme
1292                 */
1293
1294                /* It is tricky place. Until this moment IPv4 tcp
1295                   worked with IPv6 icsk.icsk_af_ops.
1296                   Sync it now.
1297                 */
1298                tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1299
1300                return newsk;
1301        }
1302
1303        ireq = inet_rsk(req);
1304
1305        if (sk_acceptq_is_full(sk))
1306                goto out_overflow;
1307
1308        if (!dst) {
1309                dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1310                if (!dst)
1311                        goto out;
1312        }
1313
1314        newsk = tcp_create_openreq_child(sk, req, skb);
1315        if (!newsk)
1316                goto out_nonewsk;
1317
1318        /*
1319         * No need to charge this sock to the relevant IPv6 refcnt debug socks
1320         * count here, tcp_create_openreq_child now does this for us, see the
1321         * comment in that function for the gory details. -acme
1322         */
1323
1324        newsk->sk_gso_type = SKB_GSO_TCPV6;
1325        ip6_dst_store(newsk, dst, NULL, NULL);
1326        inet6_sk_rx_dst_set(newsk, skb);
1327
1328        inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1329
1330        newtp = tcp_sk(newsk);
1331        newinet = inet_sk(newsk);
1332        newnp = tcp_inet6_sk(newsk);
1333
1334        memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1335
1336        newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1337        newnp->saddr = ireq->ir_v6_loc_addr;
1338        newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1339        newsk->sk_bound_dev_if = ireq->ir_iif;
1340
1341        /* Now IPv6 options...
1342
1343           First: no IPv4 options.
1344         */
1345        newinet->inet_opt = NULL;
1346        newnp->ipv6_mc_list = NULL;
1347        newnp->ipv6_ac_list = NULL;
1348        newnp->ipv6_fl_list = NULL;
1349
1350        /* Clone RX bits */
1351        newnp->rxopt.all = np->rxopt.all;
1352
1353        newnp->pktoptions = NULL;
1354        newnp->opt        = NULL;
1355        newnp->mcast_oif  = tcp_v6_iif(skb);
1356        newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1357        newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1358        if (np->repflow)
1359                newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1360
1361        /* Set ToS of the new socket based upon the value of incoming SYN.
1362         * ECT bits are set later in tcp_init_transfer().
1363         */
1364        if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
1365                newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1366
1367        /* Clone native IPv6 options from listening socket (if any)
1368
1369           Yes, keeping reference count would be much more clever,
1370           but we make one more one thing there: reattach optmem
1371           to newsk.
1372         */
1373        opt = ireq->ipv6_opt;
1374        if (!opt)
1375                opt = rcu_dereference(np->opt);
1376        if (opt) {
1377                opt = ipv6_dup_options(newsk, opt);
1378                RCU_INIT_POINTER(newnp->opt, opt);
1379        }
1380        inet_csk(newsk)->icsk_ext_hdr_len = 0;
1381        if (opt)
1382                inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1383                                                    opt->opt_flen;
1384
1385        tcp_ca_openreq_child(newsk, dst);
1386
1387        tcp_sync_mss(newsk, dst_mtu(dst));
1388        newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1389
1390        tcp_initialize_rcv_mss(newsk);
1391
1392        newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1393        newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1394
1395#ifdef CONFIG_TCP_MD5SIG
1396        l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1397
1398        /* Copy over the MD5 key from the original socket */
1399        key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1400        if (key) {
1401                /* We're using one, so create a matching key
1402                 * on the newsk structure. If we fail to get
1403                 * memory, then we end up not copying the key
1404                 * across. Shucks.
1405                 */
1406                tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1407                               AF_INET6, 128, l3index, key->key, key->keylen,
1408                               sk_gfp_mask(sk, GFP_ATOMIC));
1409        }
1410#endif
1411
1412        if (__inet_inherit_port(sk, newsk) < 0) {
1413                inet_csk_prepare_forced_close(newsk);
1414                tcp_done(newsk);
1415                goto out;
1416        }
1417        *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1418                                       &found_dup_sk);
1419        if (*own_req) {
1420                tcp_move_syn(newtp, req);
1421
1422                /* Clone pktoptions received with SYN, if we own the req */
1423                if (ireq->pktopts) {
1424                        newnp->pktoptions = skb_clone(ireq->pktopts,
1425                                                      sk_gfp_mask(sk, GFP_ATOMIC));
1426                        consume_skb(ireq->pktopts);
1427                        ireq->pktopts = NULL;
1428                        if (newnp->pktoptions) {
1429                                tcp_v6_restore_cb(newnp->pktoptions);
1430                                skb_set_owner_r(newnp->pktoptions, newsk);
1431                        }
1432                }
1433        } else {
1434                if (!req_unhash && found_dup_sk) {
1435                        /* This code path should only be executed in the
1436                         * syncookie case only
1437                         */
1438                        bh_unlock_sock(newsk);
1439                        sock_put(newsk);
1440                        newsk = NULL;
1441                }
1442        }
1443
1444        return newsk;
1445
1446out_overflow:
1447        __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1448out_nonewsk:
1449        dst_release(dst);
1450out:
1451        tcp_listendrop(sk);
1452        return NULL;
1453}
1454
1455INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1456                                                           u32));
1457/* The socket must have it's spinlock held when we get
1458 * here, unless it is a TCP_LISTEN socket.
1459 *
1460 * We have a potential double-lock case here, so even when
1461 * doing backlog processing we use the BH locking scheme.
1462 * This is because we cannot sleep with the original spinlock
1463 * held.
1464 */
1465static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1466{
1467        struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1468        struct sk_buff *opt_skb = NULL;
1469        struct tcp_sock *tp;
1470
1471        /* Imagine: socket is IPv6. IPv4 packet arrives,
1472           goes to IPv4 receive handler and backlogged.
1473           From backlog it always goes here. Kerboom...
1474           Fortunately, tcp_rcv_established and rcv_established
1475           handle them correctly, but it is not case with
1476           tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1477         */
1478
1479        if (skb->protocol == htons(ETH_P_IP))
1480                return tcp_v4_do_rcv(sk, skb);
1481
1482        /*
1483         *      socket locking is here for SMP purposes as backlog rcv
1484         *      is currently called with bh processing disabled.
1485         */
1486
1487        /* Do Stevens' IPV6_PKTOPTIONS.
1488
1489           Yes, guys, it is the only place in our code, where we
1490           may make it not affecting IPv4.
1491           The rest of code is protocol independent,
1492           and I do not like idea to uglify IPv4.
1493
1494           Actually, all the idea behind IPV6_PKTOPTIONS
1495           looks not very well thought. For now we latch
1496           options, received in the last packet, enqueued
1497           by tcp. Feel free to propose better solution.
1498                                               --ANK (980728)
1499         */
1500        if (np->rxopt.all)
1501                opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1502
1503        if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1504                struct dst_entry *dst = sk->sk_rx_dst;
1505
1506                sock_rps_save_rxhash(sk, skb);
1507                sk_mark_napi_id(sk, skb);
1508                if (dst) {
1509                        if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1510                            INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1511                                            dst, np->rx_dst_cookie) == NULL) {
1512                                dst_release(dst);
1513                                sk->sk_rx_dst = NULL;
1514                        }
1515                }
1516
1517                tcp_rcv_established(sk, skb);
1518                if (opt_skb)
1519                        goto ipv6_pktoptions;
1520                return 0;
1521        }
1522
1523        if (tcp_checksum_complete(skb))
1524                goto csum_err;
1525
1526        if (sk->sk_state == TCP_LISTEN) {
1527                struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1528
1529                if (!nsk)
1530                        goto discard;
1531
1532                if (nsk != sk) {
1533                        if (tcp_child_process(sk, nsk, skb))
1534                                goto reset;
1535                        if (opt_skb)
1536                                __kfree_skb(opt_skb);
1537                        return 0;
1538                }
1539        } else
1540                sock_rps_save_rxhash(sk, skb);
1541
1542        if (tcp_rcv_state_process(sk, skb))
1543                goto reset;
1544        if (opt_skb)
1545                goto ipv6_pktoptions;
1546        return 0;
1547
1548reset:
1549        tcp_v6_send_reset(sk, skb);
1550discard:
1551        if (opt_skb)
1552                __kfree_skb(opt_skb);
1553        kfree_skb(skb);
1554        return 0;
1555csum_err:
1556        trace_tcp_bad_csum(skb);
1557        TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1558        TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1559        goto discard;
1560
1561
1562ipv6_pktoptions:
1563        /* Do you ask, what is it?
1564
1565           1. skb was enqueued by tcp.
1566           2. skb is added to tail of read queue, rather than out of order.
1567           3. socket is not in passive state.
1568           4. Finally, it really contains options, which user wants to receive.
1569         */
1570        tp = tcp_sk(sk);
1571        if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1572            !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1573                if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1574                        np->mcast_oif = tcp_v6_iif(opt_skb);
1575                if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1576                        np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1577                if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1578                        np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1579                if (np->repflow)
1580                        np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1581                if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1582                        skb_set_owner_r(opt_skb, sk);
1583                        tcp_v6_restore_cb(opt_skb);
1584                        opt_skb = xchg(&np->pktoptions, opt_skb);
1585                } else {
1586                        __kfree_skb(opt_skb);
1587                        opt_skb = xchg(&np->pktoptions, NULL);
1588                }
1589        }
1590
1591        kfree_skb(opt_skb);
1592        return 0;
1593}
1594
1595static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1596                           const struct tcphdr *th)
1597{
1598        /* This is tricky: we move IP6CB at its correct location into
1599         * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1600         * _decode_session6() uses IP6CB().
1601         * barrier() makes sure compiler won't play aliasing games.
1602         */
1603        memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1604                sizeof(struct inet6_skb_parm));
1605        barrier();
1606
1607        TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1608        TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1609                                    skb->len - th->doff*4);
1610        TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1611        TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1612        TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1613        TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1614        TCP_SKB_CB(skb)->sacked = 0;
1615        TCP_SKB_CB(skb)->has_rxtstamp =
1616                        skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1617}
1618
1619INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1620{
1621        struct sk_buff *skb_to_free;
1622        int sdif = inet6_sdif(skb);
1623        int dif = inet6_iif(skb);
1624        const struct tcphdr *th;
1625        const struct ipv6hdr *hdr;
1626        bool refcounted;
1627        struct sock *sk;
1628        int ret;
1629        struct net *net = dev_net(skb->dev);
1630
1631        if (skb->pkt_type != PACKET_HOST)
1632                goto discard_it;
1633
1634        /*
1635         *      Count it even if it's bad.
1636         */
1637        __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1638
1639        if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1640                goto discard_it;
1641
1642        th = (const struct tcphdr *)skb->data;
1643
1644        if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1645                goto bad_packet;
1646        if (!pskb_may_pull(skb, th->doff*4))
1647                goto discard_it;
1648
1649        if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1650                goto csum_error;
1651
1652        th = (const struct tcphdr *)skb->data;
1653        hdr = ipv6_hdr(skb);
1654
1655lookup:
1656        sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1657                                th->source, th->dest, inet6_iif(skb), sdif,
1658                                &refcounted);
1659        if (!sk)
1660                goto no_tcp_socket;
1661
1662process:
1663        if (sk->sk_state == TCP_TIME_WAIT)
1664                goto do_time_wait;
1665
1666        if (sk->sk_state == TCP_NEW_SYN_RECV) {
1667                struct request_sock *req = inet_reqsk(sk);
1668                bool req_stolen = false;
1669                struct sock *nsk;
1670
1671                sk = req->rsk_listener;
1672                if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1673                        sk_drops_add(sk, skb);
1674                        reqsk_put(req);
1675                        goto discard_it;
1676                }
1677                if (tcp_checksum_complete(skb)) {
1678                        reqsk_put(req);
1679                        goto csum_error;
1680                }
1681                if (unlikely(sk->sk_state != TCP_LISTEN)) {
1682                        nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1683                        if (!nsk) {
1684                                inet_csk_reqsk_queue_drop_and_put(sk, req);
1685                                goto lookup;
1686                        }
1687                        sk = nsk;
1688                        /* reuseport_migrate_sock() has already held one sk_refcnt
1689                         * before returning.
1690                         */
1691                } else {
1692                        sock_hold(sk);
1693                }
1694                refcounted = true;
1695                nsk = NULL;
1696                if (!tcp_filter(sk, skb)) {
1697                        th = (const struct tcphdr *)skb->data;
1698                        hdr = ipv6_hdr(skb);
1699                        tcp_v6_fill_cb(skb, hdr, th);
1700                        nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1701                }
1702                if (!nsk) {
1703                        reqsk_put(req);
1704                        if (req_stolen) {
1705                                /* Another cpu got exclusive access to req
1706                                 * and created a full blown socket.
1707                                 * Try to feed this packet to this socket
1708                                 * instead of discarding it.
1709                                 */
1710                                tcp_v6_restore_cb(skb);
1711                                sock_put(sk);
1712                                goto lookup;
1713                        }
1714                        goto discard_and_relse;
1715                }
1716                if (nsk == sk) {
1717                        reqsk_put(req);
1718                        tcp_v6_restore_cb(skb);
1719                } else if (tcp_child_process(sk, nsk, skb)) {
1720                        tcp_v6_send_reset(nsk, skb);
1721                        goto discard_and_relse;
1722                } else {
1723                        sock_put(sk);
1724                        return 0;
1725                }
1726        }
1727        if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1728                __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1729                goto discard_and_relse;
1730        }
1731
1732        if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1733                goto discard_and_relse;
1734
1735        if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1736                goto discard_and_relse;
1737
1738        if (tcp_filter(sk, skb))
1739                goto discard_and_relse;
1740        th = (const struct tcphdr *)skb->data;
1741        hdr = ipv6_hdr(skb);
1742        tcp_v6_fill_cb(skb, hdr, th);
1743
1744        skb->dev = NULL;
1745
1746        if (sk->sk_state == TCP_LISTEN) {
1747                ret = tcp_v6_do_rcv(sk, skb);
1748                goto put_and_return;
1749        }
1750
1751        sk_incoming_cpu_update(sk);
1752
1753        bh_lock_sock_nested(sk);
1754        tcp_segs_in(tcp_sk(sk), skb);
1755        ret = 0;
1756        if (!sock_owned_by_user(sk)) {
1757                skb_to_free = sk->sk_rx_skb_cache;
1758                sk->sk_rx_skb_cache = NULL;
1759                ret = tcp_v6_do_rcv(sk, skb);
1760        } else {
1761                if (tcp_add_backlog(sk, skb))
1762                        goto discard_and_relse;
1763                skb_to_free = NULL;
1764        }
1765        bh_unlock_sock(sk);
1766        if (skb_to_free)
1767                __kfree_skb(skb_to_free);
1768put_and_return:
1769        if (refcounted)
1770                sock_put(sk);
1771        return ret ? -1 : 0;
1772
1773no_tcp_socket:
1774        if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1775                goto discard_it;
1776
1777        tcp_v6_fill_cb(skb, hdr, th);
1778
1779        if (tcp_checksum_complete(skb)) {
1780csum_error:
1781                trace_tcp_bad_csum(skb);
1782                __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1783bad_packet:
1784                __TCP_INC_STATS(net, TCP_MIB_INERRS);
1785        } else {
1786                tcp_v6_send_reset(NULL, skb);
1787        }
1788
1789discard_it:
1790        kfree_skb(skb);
1791        return 0;
1792
1793discard_and_relse:
1794        sk_drops_add(sk, skb);
1795        if (refcounted)
1796                sock_put(sk);
1797        goto discard_it;
1798
1799do_time_wait:
1800        if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1801                inet_twsk_put(inet_twsk(sk));
1802                goto discard_it;
1803        }
1804
1805        tcp_v6_fill_cb(skb, hdr, th);
1806
1807        if (tcp_checksum_complete(skb)) {
1808                inet_twsk_put(inet_twsk(sk));
1809                goto csum_error;
1810        }
1811
1812        switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1813        case TCP_TW_SYN:
1814        {
1815                struct sock *sk2;
1816
1817                sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1818                                            skb, __tcp_hdrlen(th),
1819                                            &ipv6_hdr(skb)->saddr, th->source,
1820                                            &ipv6_hdr(skb)->daddr,
1821                                            ntohs(th->dest),
1822                                            tcp_v6_iif_l3_slave(skb),
1823                                            sdif);
1824                if (sk2) {
1825                        struct inet_timewait_sock *tw = inet_twsk(sk);
1826                        inet_twsk_deschedule_put(tw);
1827                        sk = sk2;
1828                        tcp_v6_restore_cb(skb);
1829                        refcounted = false;
1830                        goto process;
1831                }
1832        }
1833                /* to ACK */
1834                fallthrough;
1835        case TCP_TW_ACK:
1836                tcp_v6_timewait_ack(sk, skb);
1837                break;
1838        case TCP_TW_RST:
1839                tcp_v6_send_reset(sk, skb);
1840                inet_twsk_deschedule_put(inet_twsk(sk));
1841                goto discard_it;
1842        case TCP_TW_SUCCESS:
1843                ;
1844        }
1845        goto discard_it;
1846}
1847
1848INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1849{
1850        const struct ipv6hdr *hdr;
1851        const struct tcphdr *th;
1852        struct sock *sk;
1853
1854        if (skb->pkt_type != PACKET_HOST)
1855                return;
1856
1857        if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1858                return;
1859
1860        hdr = ipv6_hdr(skb);
1861        th = tcp_hdr(skb);
1862
1863        if (th->doff < sizeof(struct tcphdr) / 4)
1864                return;
1865
1866        /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1867        sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1868                                        &hdr->saddr, th->source,
1869                                        &hdr->daddr, ntohs(th->dest),
1870                                        inet6_iif(skb), inet6_sdif(skb));
1871        if (sk) {
1872                skb->sk = sk;
1873                skb->destructor = sock_edemux;
1874                if (sk_fullsock(sk)) {
1875                        struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1876
1877                        if (dst)
1878                                dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1879                        if (dst &&
1880                            inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1881                                skb_dst_set_noref(skb, dst);
1882                }
1883        }
1884}
1885
1886static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1887        .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1888        .twsk_unique    = tcp_twsk_unique,
1889        .twsk_destructor = tcp_twsk_destructor,
1890};
1891
1892INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1893{
1894        struct ipv6_pinfo *np = inet6_sk(sk);
1895
1896        __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1897}
1898
1899const struct inet_connection_sock_af_ops ipv6_specific = {
1900        .queue_xmit        = inet6_csk_xmit,
1901        .send_check        = tcp_v6_send_check,
1902        .rebuild_header    = inet6_sk_rebuild_header,
1903        .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1904        .conn_request      = tcp_v6_conn_request,
1905        .syn_recv_sock     = tcp_v6_syn_recv_sock,
1906        .net_header_len    = sizeof(struct ipv6hdr),
1907        .net_frag_header_len = sizeof(struct frag_hdr),
1908        .setsockopt        = ipv6_setsockopt,
1909        .getsockopt        = ipv6_getsockopt,
1910        .addr2sockaddr     = inet6_csk_addr2sockaddr,
1911        .sockaddr_len      = sizeof(struct sockaddr_in6),
1912        .mtu_reduced       = tcp_v6_mtu_reduced,
1913};
1914
1915#ifdef CONFIG_TCP_MD5SIG
1916static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1917        .md5_lookup     =       tcp_v6_md5_lookup,
1918        .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1919        .md5_parse      =       tcp_v6_parse_md5_keys,
1920};
1921#endif
1922
1923/*
1924 *      TCP over IPv4 via INET6 API
1925 */
1926static const struct inet_connection_sock_af_ops ipv6_mapped = {
1927        .queue_xmit        = ip_queue_xmit,
1928        .send_check        = tcp_v4_send_check,
1929        .rebuild_header    = inet_sk_rebuild_header,
1930        .sk_rx_dst_set     = inet_sk_rx_dst_set,
1931        .conn_request      = tcp_v6_conn_request,
1932        .syn_recv_sock     = tcp_v6_syn_recv_sock,
1933        .net_header_len    = sizeof(struct iphdr),
1934        .setsockopt        = ipv6_setsockopt,
1935        .getsockopt        = ipv6_getsockopt,
1936        .addr2sockaddr     = inet6_csk_addr2sockaddr,
1937        .sockaddr_len      = sizeof(struct sockaddr_in6),
1938        .mtu_reduced       = tcp_v4_mtu_reduced,
1939};
1940
1941#ifdef CONFIG_TCP_MD5SIG
1942static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1943        .md5_lookup     =       tcp_v4_md5_lookup,
1944        .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1945        .md5_parse      =       tcp_v6_parse_md5_keys,
1946};
1947#endif
1948
1949/* NOTE: A lot of things set to zero explicitly by call to
1950 *       sk_alloc() so need not be done here.
1951 */
1952static int tcp_v6_init_sock(struct sock *sk)
1953{
1954        struct inet_connection_sock *icsk = inet_csk(sk);
1955
1956        tcp_init_sock(sk);
1957
1958        icsk->icsk_af_ops = &ipv6_specific;
1959
1960#ifdef CONFIG_TCP_MD5SIG
1961        tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1962#endif
1963
1964        return 0;
1965}
1966
1967static void tcp_v6_destroy_sock(struct sock *sk)
1968{
1969        tcp_v4_destroy_sock(sk);
1970        inet6_destroy_sock(sk);
1971}
1972
1973#ifdef CONFIG_PROC_FS
1974/* Proc filesystem TCPv6 sock list dumping. */
1975static void get_openreq6(struct seq_file *seq,
1976                         const struct request_sock *req, int i)
1977{
1978        long ttd = req->rsk_timer.expires - jiffies;
1979        const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1980        const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1981
1982        if (ttd < 0)
1983                ttd = 0;
1984
1985        seq_printf(seq,
1986                   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1987                   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1988                   i,
1989                   src->s6_addr32[0], src->s6_addr32[1],
1990                   src->s6_addr32[2], src->s6_addr32[3],
1991                   inet_rsk(req)->ir_num,
1992                   dest->s6_addr32[0], dest->s6_addr32[1],
1993                   dest->s6_addr32[2], dest->s6_addr32[3],
1994                   ntohs(inet_rsk(req)->ir_rmt_port),
1995                   TCP_SYN_RECV,
1996                   0, 0, /* could print option size, but that is af dependent. */
1997                   1,   /* timers active (only the expire timer) */
1998                   jiffies_to_clock_t(ttd),
1999                   req->num_timeout,
2000                   from_kuid_munged(seq_user_ns(seq),
2001                                    sock_i_uid(req->rsk_listener)),
2002                   0,  /* non standard timer */
2003                   0, /* open_requests have no inode */
2004                   0, req);
2005}
2006
2007static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2008{
2009        const struct in6_addr *dest, *src;
2010        __u16 destp, srcp;
2011        int timer_active;
2012        unsigned long timer_expires;
2013        const struct inet_sock *inet = inet_sk(sp);
2014        const struct tcp_sock *tp = tcp_sk(sp);
2015        const struct inet_connection_sock *icsk = inet_csk(sp);
2016        const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2017        int rx_queue;
2018        int state;
2019
2020        dest  = &sp->sk_v6_daddr;
2021        src   = &sp->sk_v6_rcv_saddr;
2022        destp = ntohs(inet->inet_dport);
2023        srcp  = ntohs(inet->inet_sport);
2024
2025        if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2026            icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2027            icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2028                timer_active    = 1;
2029                timer_expires   = icsk->icsk_timeout;
2030        } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2031                timer_active    = 4;
2032                timer_expires   = icsk->icsk_timeout;
2033        } else if (timer_pending(&sp->sk_timer)) {
2034                timer_active    = 2;
2035                timer_expires   = sp->sk_timer.expires;
2036        } else {
2037                timer_active    = 0;
2038                timer_expires = jiffies;
2039        }
2040
2041        state = inet_sk_state_load(sp);
2042        if (state == TCP_LISTEN)
2043                rx_queue = READ_ONCE(sp->sk_ack_backlog);
2044        else
2045                /* Because we don't lock the socket,
2046                 * we might find a transient negative value.
2047                 */
2048                rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2049                                      READ_ONCE(tp->copied_seq), 0);
2050
2051        seq_printf(seq,
2052                   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2053                   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2054                   i,
2055                   src->s6_addr32[0], src->s6_addr32[1],
2056                   src->s6_addr32[2], src->s6_addr32[3], srcp,
2057                   dest->s6_addr32[0], dest->s6_addr32[1],
2058                   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2059                   state,
2060                   READ_ONCE(tp->write_seq) - tp->snd_una,
2061                   rx_queue,
2062                   timer_active,
2063                   jiffies_delta_to_clock_t(timer_expires - jiffies),
2064                   icsk->icsk_retransmits,
2065                   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2066                   icsk->icsk_probes_out,
2067                   sock_i_ino(sp),
2068                   refcount_read(&sp->sk_refcnt), sp,
2069                   jiffies_to_clock_t(icsk->icsk_rto),
2070                   jiffies_to_clock_t(icsk->icsk_ack.ato),
2071                   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2072                   tp->snd_cwnd,
2073                   state == TCP_LISTEN ?
2074                        fastopenq->max_qlen :
2075                        (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2076                   );
2077}
2078
2079static void get_timewait6_sock(struct seq_file *seq,
2080                               struct inet_timewait_sock *tw, int i)
2081{
2082        long delta = tw->tw_timer.expires - jiffies;
2083        const struct in6_addr *dest, *src;
2084        __u16 destp, srcp;
2085
2086        dest = &tw->tw_v6_daddr;
2087        src  = &tw->tw_v6_rcv_saddr;
2088        destp = ntohs(tw->tw_dport);
2089        srcp  = ntohs(tw->tw_sport);
2090
2091        seq_printf(seq,
2092                   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2093                   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2094                   i,
2095                   src->s6_addr32[0], src->s6_addr32[1],
2096                   src->s6_addr32[2], src->s6_addr32[3], srcp,
2097                   dest->s6_addr32[0], dest->s6_addr32[1],
2098                   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2099                   tw->tw_substate, 0, 0,
2100                   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2101                   refcount_read(&tw->tw_refcnt), tw);
2102}
2103
2104static int tcp6_seq_show(struct seq_file *seq, void *v)
2105{
2106        struct tcp_iter_state *st;
2107        struct sock *sk = v;
2108
2109        if (v == SEQ_START_TOKEN) {
2110                seq_puts(seq,
2111                         "  sl  "
2112                         "local_address                         "
2113                         "remote_address                        "
2114                         "st tx_queue rx_queue tr tm->when retrnsmt"
2115                         "   uid  timeout inode\n");
2116                goto out;
2117        }
2118        st = seq->private;
2119
2120        if (sk->sk_state == TCP_TIME_WAIT)
2121                get_timewait6_sock(seq, v, st->num);
2122        else if (sk->sk_state == TCP_NEW_SYN_RECV)
2123                get_openreq6(seq, v, st->num);
2124        else
2125                get_tcp6_sock(seq, v, st->num);
2126out:
2127        return 0;
2128}
2129
2130static const struct seq_operations tcp6_seq_ops = {
2131        .show           = tcp6_seq_show,
2132        .start          = tcp_seq_start,
2133        .next           = tcp_seq_next,
2134        .stop           = tcp_seq_stop,
2135};
2136
2137static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2138        .family         = AF_INET6,
2139};
2140
2141int __net_init tcp6_proc_init(struct net *net)
2142{
2143        if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2144                        sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2145                return -ENOMEM;
2146        return 0;
2147}
2148
2149void tcp6_proc_exit(struct net *net)
2150{
2151        remove_proc_entry("tcp6", net->proc_net);
2152}
2153#endif
2154
2155struct proto tcpv6_prot = {
2156        .name                   = "TCPv6",
2157        .owner                  = THIS_MODULE,
2158        .close                  = tcp_close,
2159        .pre_connect            = tcp_v6_pre_connect,
2160        .connect                = tcp_v6_connect,
2161        .disconnect             = tcp_disconnect,
2162        .accept                 = inet_csk_accept,
2163        .ioctl                  = tcp_ioctl,
2164        .init                   = tcp_v6_init_sock,
2165        .destroy                = tcp_v6_destroy_sock,
2166        .shutdown               = tcp_shutdown,
2167        .setsockopt             = tcp_setsockopt,
2168        .getsockopt             = tcp_getsockopt,
2169        .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2170        .keepalive              = tcp_set_keepalive,
2171        .recvmsg                = tcp_recvmsg,
2172        .sendmsg                = tcp_sendmsg,
2173        .sendpage               = tcp_sendpage,
2174        .backlog_rcv            = tcp_v6_do_rcv,
2175        .release_cb             = tcp_release_cb,
2176        .hash                   = inet6_hash,
2177        .unhash                 = inet_unhash,
2178        .get_port               = inet_csk_get_port,
2179#ifdef CONFIG_BPF_SYSCALL
2180        .psock_update_sk_prot   = tcp_bpf_update_proto,
2181#endif
2182        .enter_memory_pressure  = tcp_enter_memory_pressure,
2183        .leave_memory_pressure  = tcp_leave_memory_pressure,
2184        .stream_memory_free     = tcp_stream_memory_free,
2185        .sockets_allocated      = &tcp_sockets_allocated,
2186        .memory_allocated       = &tcp_memory_allocated,
2187        .memory_pressure        = &tcp_memory_pressure,
2188        .orphan_count           = &tcp_orphan_count,
2189        .sysctl_mem             = sysctl_tcp_mem,
2190        .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2191        .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2192        .max_header             = MAX_TCP_HEADER,
2193        .obj_size               = sizeof(struct tcp6_sock),
2194        .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2195        .twsk_prot              = &tcp6_timewait_sock_ops,
2196        .rsk_prot               = &tcp6_request_sock_ops,
2197        .h.hashinfo             = &tcp_hashinfo,
2198        .no_autobind            = true,
2199        .diag_destroy           = tcp_abort,
2200};
2201EXPORT_SYMBOL_GPL(tcpv6_prot);
2202
2203/* thinking of making this const? Don't.
2204 * early_demux can change based on sysctl.
2205 */
2206static struct inet6_protocol tcpv6_protocol = {
2207        .early_demux    =       tcp_v6_early_demux,
2208        .early_demux_handler =  tcp_v6_early_demux,
2209        .handler        =       tcp_v6_rcv,
2210        .err_handler    =       tcp_v6_err,
2211        .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2212};
2213
2214static struct inet_protosw tcpv6_protosw = {
2215        .type           =       SOCK_STREAM,
2216        .protocol       =       IPPROTO_TCP,
2217        .prot           =       &tcpv6_prot,
2218        .ops            =       &inet6_stream_ops,
2219        .flags          =       INET_PROTOSW_PERMANENT |
2220                                INET_PROTOSW_ICSK,
2221};
2222
2223static int __net_init tcpv6_net_init(struct net *net)
2224{
2225        return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2226                                    SOCK_RAW, IPPROTO_TCP, net);
2227}
2228
2229static void __net_exit tcpv6_net_exit(struct net *net)
2230{
2231        inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2232}
2233
2234static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2235{
2236        inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2237}
2238
2239static struct pernet_operations tcpv6_net_ops = {
2240        .init       = tcpv6_net_init,
2241        .exit       = tcpv6_net_exit,
2242        .exit_batch = tcpv6_net_exit_batch,
2243};
2244
2245int __init tcpv6_init(void)
2246{
2247        int ret;
2248
2249        ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2250        if (ret)
2251                goto out;
2252
2253        /* register inet6 protocol */
2254        ret = inet6_register_protosw(&tcpv6_protosw);
2255        if (ret)
2256                goto out_tcpv6_protocol;
2257
2258        ret = register_pernet_subsys(&tcpv6_net_ops);
2259        if (ret)
2260                goto out_tcpv6_protosw;
2261
2262        ret = mptcpv6_init();
2263        if (ret)
2264                goto out_tcpv6_pernet_subsys;
2265
2266out:
2267        return ret;
2268
2269out_tcpv6_pernet_subsys:
2270        unregister_pernet_subsys(&tcpv6_net_ops);
2271out_tcpv6_protosw:
2272        inet6_unregister_protosw(&tcpv6_protosw);
2273out_tcpv6_protocol:
2274        inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2275        goto out;
2276}
2277
2278void tcpv6_exit(void)
2279{
2280        unregister_pernet_subsys(&tcpv6_net_ops);
2281        inet6_unregister_protosw(&tcpv6_protosw);
2282        inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2283}
2284