linux/net/mptcp/subflow.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/* Multipath TCP
   3 *
   4 * Copyright (c) 2017 - 2019, Intel Corporation.
   5 */
   6
   7#define pr_fmt(fmt) "MPTCP: " fmt
   8
   9#include <linux/kernel.h>
  10#include <linux/module.h>
  11#include <linux/netdevice.h>
  12#include <crypto/algapi.h>
  13#include <crypto/sha.h>
  14#include <net/sock.h>
  15#include <net/inet_common.h>
  16#include <net/inet_hashtables.h>
  17#include <net/protocol.h>
  18#include <net/tcp.h>
  19#if IS_ENABLED(CONFIG_MPTCP_IPV6)
  20#include <net/ip6_route.h>
  21#endif
  22#include <net/mptcp.h>
  23#include <uapi/linux/mptcp.h>
  24#include "protocol.h"
  25#include "mib.h"
  26
  27static void SUBFLOW_REQ_INC_STATS(struct request_sock *req,
  28                                  enum linux_mptcp_mib_field field)
  29{
  30        MPTCP_INC_STATS(sock_net(req_to_sk(req)), field);
  31}
  32
  33static void subflow_req_destructor(struct request_sock *req)
  34{
  35        struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
  36
  37        pr_debug("subflow_req=%p", subflow_req);
  38
  39        if (subflow_req->msk)
  40                sock_put((struct sock *)subflow_req->msk);
  41
  42        mptcp_token_destroy_request(req);
  43        tcp_request_sock_ops.destructor(req);
  44}
  45
  46static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
  47                                  void *hmac)
  48{
  49        u8 msg[8];
  50
  51        put_unaligned_be32(nonce1, &msg[0]);
  52        put_unaligned_be32(nonce2, &msg[4]);
  53
  54        mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
  55}
  56
  57static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk)
  58{
  59        return mptcp_is_fully_established((void *)msk) &&
  60               READ_ONCE(msk->pm.accept_subflow);
  61}
  62
  63/* validate received token and create truncated hmac and nonce for SYN-ACK */
  64static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
  65                                                     const struct sk_buff *skb)
  66{
  67        struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
  68        u8 hmac[SHA256_DIGEST_SIZE];
  69        struct mptcp_sock *msk;
  70        int local_id;
  71
  72        msk = mptcp_token_get_sock(subflow_req->token);
  73        if (!msk) {
  74                SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN);
  75                return NULL;
  76        }
  77
  78        local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)req);
  79        if (local_id < 0) {
  80                sock_put((struct sock *)msk);
  81                return NULL;
  82        }
  83        subflow_req->local_id = local_id;
  84
  85        get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
  86
  87        subflow_generate_hmac(msk->local_key, msk->remote_key,
  88                              subflow_req->local_nonce,
  89                              subflow_req->remote_nonce, hmac);
  90
  91        subflow_req->thmac = get_unaligned_be64(hmac);
  92        return msk;
  93}
  94
  95static int __subflow_init_req(struct request_sock *req, const struct sock *sk_listener)
  96{
  97        struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
  98
  99        subflow_req->mp_capable = 0;
 100        subflow_req->mp_join = 0;
 101        subflow_req->msk = NULL;
 102        mptcp_token_init_request(req);
 103
 104#ifdef CONFIG_TCP_MD5SIG
 105        /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
 106         * TCP option space.
 107         */
 108        if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
 109                return -EINVAL;
 110#endif
 111
 112        return 0;
 113}
 114
 115static void subflow_init_req(struct request_sock *req,
 116                             const struct sock *sk_listener,
 117                             struct sk_buff *skb)
 118{
 119        struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
 120        struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
 121        struct mptcp_options_received mp_opt;
 122        int ret;
 123
 124        pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
 125
 126        ret = __subflow_init_req(req, sk_listener);
 127        if (ret)
 128                return;
 129
 130        mptcp_get_options(skb, &mp_opt);
 131
 132        if (mp_opt.mp_capable) {
 133                SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
 134
 135                if (mp_opt.mp_join)
 136                        return;
 137        } else if (mp_opt.mp_join) {
 138                SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
 139        }
 140
 141        if (mp_opt.mp_capable && listener->request_mptcp) {
 142                int err, retries = 4;
 143
 144                subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
 145again:
 146                do {
 147                        get_random_bytes(&subflow_req->local_key, sizeof(subflow_req->local_key));
 148                } while (subflow_req->local_key == 0);
 149
 150                if (unlikely(req->syncookie)) {
 151                        mptcp_crypto_key_sha(subflow_req->local_key,
 152                                             &subflow_req->token,
 153                                             &subflow_req->idsn);
 154                        if (mptcp_token_exists(subflow_req->token)) {
 155                                if (retries-- > 0)
 156                                        goto again;
 157                        } else {
 158                                subflow_req->mp_capable = 1;
 159                        }
 160                        return;
 161                }
 162
 163                err = mptcp_token_new_request(req);
 164                if (err == 0)
 165                        subflow_req->mp_capable = 1;
 166                else if (retries-- > 0)
 167                        goto again;
 168
 169        } else if (mp_opt.mp_join && listener->request_mptcp) {
 170                subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
 171                subflow_req->mp_join = 1;
 172                subflow_req->backup = mp_opt.backup;
 173                subflow_req->remote_id = mp_opt.join_id;
 174                subflow_req->token = mp_opt.token;
 175                subflow_req->remote_nonce = mp_opt.nonce;
 176                subflow_req->msk = subflow_token_join_request(req, skb);
 177
 178                if (unlikely(req->syncookie) && subflow_req->msk) {
 179                        if (mptcp_can_accept_new_subflow(subflow_req->msk))
 180                                subflow_init_req_cookie_join_save(subflow_req, skb);
 181                }
 182
 183                pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token,
 184                         subflow_req->remote_nonce, subflow_req->msk);
 185        }
 186}
 187
 188int mptcp_subflow_init_cookie_req(struct request_sock *req,
 189                                  const struct sock *sk_listener,
 190                                  struct sk_buff *skb)
 191{
 192        struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
 193        struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
 194        struct mptcp_options_received mp_opt;
 195        int err;
 196
 197        err = __subflow_init_req(req, sk_listener);
 198        if (err)
 199                return err;
 200
 201        mptcp_get_options(skb, &mp_opt);
 202
 203        if (mp_opt.mp_capable && mp_opt.mp_join)
 204                return -EINVAL;
 205
 206        if (mp_opt.mp_capable && listener->request_mptcp) {
 207                if (mp_opt.sndr_key == 0)
 208                        return -EINVAL;
 209
 210                subflow_req->local_key = mp_opt.rcvr_key;
 211                err = mptcp_token_new_request(req);
 212                if (err)
 213                        return err;
 214
 215                subflow_req->mp_capable = 1;
 216                subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1;
 217        } else if (mp_opt.mp_join && listener->request_mptcp) {
 218                if (!mptcp_token_join_cookie_init_state(subflow_req, skb))
 219                        return -EINVAL;
 220
 221                if (mptcp_can_accept_new_subflow(subflow_req->msk))
 222                        subflow_req->mp_join = 1;
 223
 224                subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1;
 225        }
 226
 227        return 0;
 228}
 229EXPORT_SYMBOL_GPL(mptcp_subflow_init_cookie_req);
 230
 231static void subflow_v4_init_req(struct request_sock *req,
 232                                const struct sock *sk_listener,
 233                                struct sk_buff *skb)
 234{
 235        tcp_rsk(req)->is_mptcp = 1;
 236
 237        tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb);
 238
 239        subflow_init_req(req, sk_listener, skb);
 240}
 241
 242#if IS_ENABLED(CONFIG_MPTCP_IPV6)
 243static void subflow_v6_init_req(struct request_sock *req,
 244                                const struct sock *sk_listener,
 245                                struct sk_buff *skb)
 246{
 247        tcp_rsk(req)->is_mptcp = 1;
 248
 249        tcp_request_sock_ipv6_ops.init_req(req, sk_listener, skb);
 250
 251        subflow_init_req(req, sk_listener, skb);
 252}
 253#endif
 254
 255/* validate received truncated hmac and create hmac for third ACK */
 256static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
 257{
 258        u8 hmac[SHA256_DIGEST_SIZE];
 259        u64 thmac;
 260
 261        subflow_generate_hmac(subflow->remote_key, subflow->local_key,
 262                              subflow->remote_nonce, subflow->local_nonce,
 263                              hmac);
 264
 265        thmac = get_unaligned_be64(hmac);
 266        pr_debug("subflow=%p, token=%u, thmac=%llu, subflow->thmac=%llu\n",
 267                 subflow, subflow->token,
 268                 (unsigned long long)thmac,
 269                 (unsigned long long)subflow->thmac);
 270
 271        return thmac == subflow->thmac;
 272}
 273
 274void mptcp_subflow_reset(struct sock *ssk)
 275{
 276        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
 277        struct sock *sk = subflow->conn;
 278
 279        tcp_set_state(ssk, TCP_CLOSE);
 280        tcp_send_active_reset(ssk, GFP_ATOMIC);
 281        tcp_done(ssk);
 282        if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) &&
 283            schedule_work(&mptcp_sk(sk)->work))
 284                sock_hold(sk);
 285}
 286
 287static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
 288{
 289        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 290        struct mptcp_options_received mp_opt;
 291        struct sock *parent = subflow->conn;
 292
 293        subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
 294
 295        if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
 296                inet_sk_state_store(parent, TCP_ESTABLISHED);
 297                parent->sk_state_change(parent);
 298        }
 299
 300        /* be sure no special action on any packet other than syn-ack */
 301        if (subflow->conn_finished)
 302                return;
 303
 304        subflow->rel_write_seq = 1;
 305        subflow->conn_finished = 1;
 306        subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
 307        pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
 308
 309        mptcp_get_options(skb, &mp_opt);
 310        if (subflow->request_mptcp) {
 311                if (!mp_opt.mp_capable) {
 312                        MPTCP_INC_STATS(sock_net(sk),
 313                                        MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
 314                        mptcp_do_fallback(sk);
 315                        pr_fallback(mptcp_sk(subflow->conn));
 316                        goto fallback;
 317                }
 318
 319                subflow->mp_capable = 1;
 320                subflow->can_ack = 1;
 321                subflow->remote_key = mp_opt.sndr_key;
 322                pr_debug("subflow=%p, remote_key=%llu", subflow,
 323                         subflow->remote_key);
 324                mptcp_finish_connect(sk);
 325        } else if (subflow->request_join) {
 326                u8 hmac[SHA256_DIGEST_SIZE];
 327
 328                if (!mp_opt.mp_join)
 329                        goto do_reset;
 330
 331                subflow->thmac = mp_opt.thmac;
 332                subflow->remote_nonce = mp_opt.nonce;
 333                pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
 334                         subflow->thmac, subflow->remote_nonce);
 335
 336                if (!subflow_thmac_valid(subflow)) {
 337                        MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
 338                        goto do_reset;
 339                }
 340
 341                subflow_generate_hmac(subflow->local_key, subflow->remote_key,
 342                                      subflow->local_nonce,
 343                                      subflow->remote_nonce,
 344                                      hmac);
 345                memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
 346
 347                if (!mptcp_finish_join(sk))
 348                        goto do_reset;
 349
 350                subflow->mp_join = 1;
 351                MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
 352        } else if (mptcp_check_fallback(sk)) {
 353fallback:
 354                mptcp_rcv_space_init(mptcp_sk(parent), sk);
 355        }
 356        return;
 357
 358do_reset:
 359        mptcp_subflow_reset(sk);
 360}
 361
 362struct request_sock_ops mptcp_subflow_request_sock_ops;
 363EXPORT_SYMBOL_GPL(mptcp_subflow_request_sock_ops);
 364static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
 365
 366static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 367{
 368        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 369
 370        pr_debug("subflow=%p", subflow);
 371
 372        /* Never answer to SYNs sent to broadcast or multicast */
 373        if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
 374                goto drop;
 375
 376        return tcp_conn_request(&mptcp_subflow_request_sock_ops,
 377                                &subflow_request_sock_ipv4_ops,
 378                                sk, skb);
 379drop:
 380        tcp_listendrop(sk);
 381        return 0;
 382}
 383
 384#if IS_ENABLED(CONFIG_MPTCP_IPV6)
 385static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops;
 386static struct inet_connection_sock_af_ops subflow_v6_specific;
 387static struct inet_connection_sock_af_ops subflow_v6m_specific;
 388
 389static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 390{
 391        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 392
 393        pr_debug("subflow=%p", subflow);
 394
 395        if (skb->protocol == htons(ETH_P_IP))
 396                return subflow_v4_conn_request(sk, skb);
 397
 398        if (!ipv6_unicast_destination(skb))
 399                goto drop;
 400
 401        return tcp_conn_request(&mptcp_subflow_request_sock_ops,
 402                                &subflow_request_sock_ipv6_ops, sk, skb);
 403
 404drop:
 405        tcp_listendrop(sk);
 406        return 0; /* don't send reset */
 407}
 408#endif
 409
 410/* validate hmac received in third ACK */
 411static bool subflow_hmac_valid(const struct request_sock *req,
 412                               const struct mptcp_options_received *mp_opt)
 413{
 414        const struct mptcp_subflow_request_sock *subflow_req;
 415        u8 hmac[SHA256_DIGEST_SIZE];
 416        struct mptcp_sock *msk;
 417
 418        subflow_req = mptcp_subflow_rsk(req);
 419        msk = subflow_req->msk;
 420        if (!msk)
 421                return false;
 422
 423        subflow_generate_hmac(msk->remote_key, msk->local_key,
 424                              subflow_req->remote_nonce,
 425                              subflow_req->local_nonce, hmac);
 426
 427        return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN);
 428}
 429
 430static void mptcp_sock_destruct(struct sock *sk)
 431{
 432        /* if new mptcp socket isn't accepted, it is free'd
 433         * from the tcp listener sockets request queue, linked
 434         * from req->sk.  The tcp socket is released.
 435         * This calls the ULP release function which will
 436         * also remove the mptcp socket, via
 437         * sock_put(ctx->conn).
 438         *
 439         * Problem is that the mptcp socket will be in
 440         * ESTABLISHED state and will not have the SOCK_DEAD flag.
 441         * Both result in warnings from inet_sock_destruct.
 442         */
 443
 444        if (sk->sk_state == TCP_ESTABLISHED) {
 445                sk->sk_state = TCP_CLOSE;
 446                WARN_ON_ONCE(sk->sk_socket);
 447                sock_orphan(sk);
 448        }
 449
 450        mptcp_destroy_common(mptcp_sk(sk));
 451        inet_sock_destruct(sk);
 452}
 453
 454static void mptcp_force_close(struct sock *sk)
 455{
 456        inet_sk_state_store(sk, TCP_CLOSE);
 457        sk_common_release(sk);
 458}
 459
 460static void subflow_ulp_fallback(struct sock *sk,
 461                                 struct mptcp_subflow_context *old_ctx)
 462{
 463        struct inet_connection_sock *icsk = inet_csk(sk);
 464
 465        mptcp_subflow_tcp_fallback(sk, old_ctx);
 466        icsk->icsk_ulp_ops = NULL;
 467        rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
 468        tcp_sk(sk)->is_mptcp = 0;
 469}
 470
 471static void subflow_drop_ctx(struct sock *ssk)
 472{
 473        struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk);
 474
 475        if (!ctx)
 476                return;
 477
 478        subflow_ulp_fallback(ssk, ctx);
 479        if (ctx->conn)
 480                sock_put(ctx->conn);
 481
 482        kfree_rcu(ctx, rcu);
 483}
 484
 485void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
 486                                     struct mptcp_options_received *mp_opt)
 487{
 488        struct mptcp_sock *msk = mptcp_sk(subflow->conn);
 489
 490        subflow->remote_key = mp_opt->sndr_key;
 491        subflow->fully_established = 1;
 492        subflow->can_ack = 1;
 493        WRITE_ONCE(msk->fully_established, true);
 494}
 495
 496static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 497                                          struct sk_buff *skb,
 498                                          struct request_sock *req,
 499                                          struct dst_entry *dst,
 500                                          struct request_sock *req_unhash,
 501                                          bool *own_req)
 502{
 503        struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
 504        struct mptcp_subflow_request_sock *subflow_req;
 505        struct mptcp_options_received mp_opt;
 506        bool fallback, fallback_is_fatal;
 507        struct sock *new_msk = NULL;
 508        struct sock *child;
 509
 510        pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
 511
 512        /* After child creation we must look for 'mp_capable' even when options
 513         * are not parsed
 514         */
 515        mp_opt.mp_capable = 0;
 516
 517        /* hopefully temporary handling for MP_JOIN+syncookie */
 518        subflow_req = mptcp_subflow_rsk(req);
 519        fallback_is_fatal = tcp_rsk(req)->is_mptcp && subflow_req->mp_join;
 520        fallback = !tcp_rsk(req)->is_mptcp;
 521        if (fallback)
 522                goto create_child;
 523
 524        /* if the sk is MP_CAPABLE, we try to fetch the client key */
 525        if (subflow_req->mp_capable) {
 526                if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) {
 527                        /* here we can receive and accept an in-window,
 528                         * out-of-order pkt, which will not carry the MP_CAPABLE
 529                         * opt even on mptcp enabled paths
 530                         */
 531                        goto create_msk;
 532                }
 533
 534                mptcp_get_options(skb, &mp_opt);
 535                if (!mp_opt.mp_capable) {
 536                        fallback = true;
 537                        goto create_child;
 538                }
 539
 540create_msk:
 541                new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
 542                if (!new_msk)
 543                        fallback = true;
 544        } else if (subflow_req->mp_join) {
 545                mptcp_get_options(skb, &mp_opt);
 546                if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) ||
 547                    !mptcp_can_accept_new_subflow(subflow_req->msk)) {
 548                        SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
 549                        fallback = true;
 550                }
 551        }
 552
 553create_child:
 554        child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
 555                                                     req_unhash, own_req);
 556
 557        if (child && *own_req) {
 558                struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child);
 559
 560                tcp_rsk(req)->drop_req = false;
 561
 562                /* we need to fallback on ctx allocation failure and on pre-reqs
 563                 * checking above. In the latter scenario we additionally need
 564                 * to reset the context to non MPTCP status.
 565                 */
 566                if (!ctx || fallback) {
 567                        if (fallback_is_fatal)
 568                                goto dispose_child;
 569
 570                        subflow_drop_ctx(child);
 571                        goto out;
 572                }
 573
 574                if (ctx->mp_capable) {
 575                        /* this can't race with mptcp_close(), as the msk is
 576                         * not yet exposted to user-space
 577                         */
 578                        inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED);
 579
 580                        /* new mpc subflow takes ownership of the newly
 581                         * created mptcp socket
 582                         */
 583                        new_msk->sk_destruct = mptcp_sock_destruct;
 584                        mptcp_pm_new_connection(mptcp_sk(new_msk), 1);
 585                        mptcp_token_accept(subflow_req, mptcp_sk(new_msk));
 586                        ctx->conn = new_msk;
 587                        new_msk = NULL;
 588
 589                        /* with OoO packets we can reach here without ingress
 590                         * mpc option
 591                         */
 592                        if (mp_opt.mp_capable)
 593                                mptcp_subflow_fully_established(ctx, &mp_opt);
 594                } else if (ctx->mp_join) {
 595                        struct mptcp_sock *owner;
 596
 597                        owner = subflow_req->msk;
 598                        if (!owner)
 599                                goto dispose_child;
 600
 601                        /* move the msk reference ownership to the subflow */
 602                        subflow_req->msk = NULL;
 603                        ctx->conn = (struct sock *)owner;
 604                        if (!mptcp_finish_join(child))
 605                                goto dispose_child;
 606
 607                        SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
 608                        tcp_rsk(req)->drop_req = true;
 609                }
 610        }
 611
 612out:
 613        /* dispose of the left over mptcp master, if any */
 614        if (unlikely(new_msk))
 615                mptcp_force_close(new_msk);
 616
 617        /* check for expected invariant - should never trigger, just help
 618         * catching eariler subtle bugs
 619         */
 620        WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp &&
 621                     (!mptcp_subflow_ctx(child) ||
 622                      !mptcp_subflow_ctx(child)->conn));
 623        return child;
 624
 625dispose_child:
 626        subflow_drop_ctx(child);
 627        tcp_rsk(req)->drop_req = true;
 628        inet_csk_prepare_for_destroy_sock(child);
 629        tcp_done(child);
 630        req->rsk_ops->send_reset(sk, skb);
 631
 632        /* The last child reference will be released by the caller */
 633        return child;
 634}
 635
 636static struct inet_connection_sock_af_ops subflow_specific;
 637
 638enum mapping_status {
 639        MAPPING_OK,
 640        MAPPING_INVALID,
 641        MAPPING_EMPTY,
 642        MAPPING_DATA_FIN,
 643        MAPPING_DUMMY
 644};
 645
 646static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
 647{
 648        if ((u32)seq == (u32)old_seq)
 649                return old_seq;
 650
 651        /* Assume map covers data not mapped yet. */
 652        return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
 653}
 654
 655static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
 656{
 657        WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
 658                  ssn, subflow->map_subflow_seq, subflow->map_data_len);
 659}
 660
 661static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
 662{
 663        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
 664        unsigned int skb_consumed;
 665
 666        skb_consumed = tcp_sk(ssk)->copied_seq - TCP_SKB_CB(skb)->seq;
 667        if (WARN_ON_ONCE(skb_consumed >= skb->len))
 668                return true;
 669
 670        return skb->len - skb_consumed <= subflow->map_data_len -
 671                                          mptcp_subflow_get_map_offset(subflow);
 672}
 673
 674static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
 675{
 676        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
 677        u32 ssn = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
 678
 679        if (unlikely(before(ssn, subflow->map_subflow_seq))) {
 680                /* Mapping covers data later in the subflow stream,
 681                 * currently unsupported.
 682                 */
 683                warn_bad_map(subflow, ssn);
 684                return false;
 685        }
 686        if (unlikely(!before(ssn, subflow->map_subflow_seq +
 687                                  subflow->map_data_len))) {
 688                /* Mapping does covers past subflow data, invalid */
 689                warn_bad_map(subflow, ssn + skb->len);
 690                return false;
 691        }
 692        return true;
 693}
 694
 695static enum mapping_status get_mapping_status(struct sock *ssk,
 696                                              struct mptcp_sock *msk)
 697{
 698        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
 699        struct mptcp_ext *mpext;
 700        struct sk_buff *skb;
 701        u16 data_len;
 702        u64 map_seq;
 703
 704        skb = skb_peek(&ssk->sk_receive_queue);
 705        if (!skb)
 706                return MAPPING_EMPTY;
 707
 708        if (mptcp_check_fallback(ssk))
 709                return MAPPING_DUMMY;
 710
 711        mpext = mptcp_get_ext(skb);
 712        if (!mpext || !mpext->use_map) {
 713                if (!subflow->map_valid && !skb->len) {
 714                        /* the TCP stack deliver 0 len FIN pkt to the receive
 715                         * queue, that is the only 0len pkts ever expected here,
 716                         * and we can admit no mapping only for 0 len pkts
 717                         */
 718                        if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
 719                                WARN_ONCE(1, "0len seq %d:%d flags %x",
 720                                          TCP_SKB_CB(skb)->seq,
 721                                          TCP_SKB_CB(skb)->end_seq,
 722                                          TCP_SKB_CB(skb)->tcp_flags);
 723                        sk_eat_skb(ssk, skb);
 724                        return MAPPING_EMPTY;
 725                }
 726
 727                if (!subflow->map_valid)
 728                        return MAPPING_INVALID;
 729
 730                goto validate_seq;
 731        }
 732
 733        pr_debug("seq=%llu is64=%d ssn=%u data_len=%u data_fin=%d",
 734                 mpext->data_seq, mpext->dsn64, mpext->subflow_seq,
 735                 mpext->data_len, mpext->data_fin);
 736
 737        data_len = mpext->data_len;
 738        if (data_len == 0) {
 739                pr_err("Infinite mapping not handled");
 740                MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
 741                return MAPPING_INVALID;
 742        }
 743
 744        if (mpext->data_fin == 1) {
 745                if (data_len == 1) {
 746                        bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq,
 747                                                                 mpext->dsn64);
 748                        pr_debug("DATA_FIN with no payload seq=%llu", mpext->data_seq);
 749                        if (subflow->map_valid) {
 750                                /* A DATA_FIN might arrive in a DSS
 751                                 * option before the previous mapping
 752                                 * has been fully consumed. Continue
 753                                 * handling the existing mapping.
 754                                 */
 755                                skb_ext_del(skb, SKB_EXT_MPTCP);
 756                                return MAPPING_OK;
 757                        } else {
 758                                if (updated && schedule_work(&msk->work))
 759                                        sock_hold((struct sock *)msk);
 760
 761                                return MAPPING_DATA_FIN;
 762                        }
 763                } else {
 764                        u64 data_fin_seq = mpext->data_seq + data_len - 1;
 765
 766                        /* If mpext->data_seq is a 32-bit value, data_fin_seq
 767                         * must also be limited to 32 bits.
 768                         */
 769                        if (!mpext->dsn64)
 770                                data_fin_seq &= GENMASK_ULL(31, 0);
 771
 772                        mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64);
 773                        pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d",
 774                                 data_fin_seq, mpext->dsn64);
 775                }
 776
 777                /* Adjust for DATA_FIN using 1 byte of sequence space */
 778                data_len--;
 779        }
 780
 781        if (!mpext->dsn64) {
 782                map_seq = expand_seq(subflow->map_seq, subflow->map_data_len,
 783                                     mpext->data_seq);
 784                pr_debug("expanded seq=%llu", subflow->map_seq);
 785        } else {
 786                map_seq = mpext->data_seq;
 787        }
 788        WRITE_ONCE(mptcp_sk(subflow->conn)->use_64bit_ack, !!mpext->dsn64);
 789
 790        if (subflow->map_valid) {
 791                /* Allow replacing only with an identical map */
 792                if (subflow->map_seq == map_seq &&
 793                    subflow->map_subflow_seq == mpext->subflow_seq &&
 794                    subflow->map_data_len == data_len) {
 795                        skb_ext_del(skb, SKB_EXT_MPTCP);
 796                        return MAPPING_OK;
 797                }
 798
 799                /* If this skb data are fully covered by the current mapping,
 800                 * the new map would need caching, which is not supported
 801                 */
 802                if (skb_is_fully_mapped(ssk, skb)) {
 803                        MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSNOMATCH);
 804                        return MAPPING_INVALID;
 805                }
 806
 807                /* will validate the next map after consuming the current one */
 808                return MAPPING_OK;
 809        }
 810
 811        subflow->map_seq = map_seq;
 812        subflow->map_subflow_seq = mpext->subflow_seq;
 813        subflow->map_data_len = data_len;
 814        subflow->map_valid = 1;
 815        subflow->mpc_map = mpext->mpc_map;
 816        pr_debug("new map seq=%llu subflow_seq=%u data_len=%u",
 817                 subflow->map_seq, subflow->map_subflow_seq,
 818                 subflow->map_data_len);
 819
 820validate_seq:
 821        /* we revalidate valid mapping on new skb, because we must ensure
 822         * the current skb is completely covered by the available mapping
 823         */
 824        if (!validate_mapping(ssk, skb))
 825                return MAPPING_INVALID;
 826
 827        skb_ext_del(skb, SKB_EXT_MPTCP);
 828        return MAPPING_OK;
 829}
 830
 831static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
 832                                       u64 limit)
 833{
 834        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
 835        bool fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
 836        u32 incr;
 837
 838        incr = limit >= skb->len ? skb->len + fin : limit;
 839
 840        pr_debug("discarding=%d len=%d seq=%d", incr, skb->len,
 841                 subflow->map_subflow_seq);
 842        MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DUPDATA);
 843        tcp_sk(ssk)->copied_seq += incr;
 844        if (!before(tcp_sk(ssk)->copied_seq, TCP_SKB_CB(skb)->end_seq))
 845                sk_eat_skb(ssk, skb);
 846        if (mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len)
 847                subflow->map_valid = 0;
 848        if (incr)
 849                tcp_cleanup_rbuf(ssk, incr);
 850}
 851
 852static bool subflow_check_data_avail(struct sock *ssk)
 853{
 854        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
 855        enum mapping_status status;
 856        struct mptcp_sock *msk;
 857        struct sk_buff *skb;
 858
 859        pr_debug("msk=%p ssk=%p data_avail=%d skb=%p", subflow->conn, ssk,
 860                 subflow->data_avail, skb_peek(&ssk->sk_receive_queue));
 861        if (!skb_peek(&ssk->sk_receive_queue))
 862                subflow->data_avail = 0;
 863        if (subflow->data_avail)
 864                return true;
 865
 866        msk = mptcp_sk(subflow->conn);
 867        for (;;) {
 868                u64 ack_seq;
 869                u64 old_ack;
 870
 871                status = get_mapping_status(ssk, msk);
 872                pr_debug("msk=%p ssk=%p status=%d", msk, ssk, status);
 873                if (status == MAPPING_INVALID) {
 874                        ssk->sk_err = EBADMSG;
 875                        goto fatal;
 876                }
 877                if (status == MAPPING_DUMMY) {
 878                        __mptcp_do_fallback(msk);
 879                        skb = skb_peek(&ssk->sk_receive_queue);
 880                        subflow->map_valid = 1;
 881                        subflow->map_seq = READ_ONCE(msk->ack_seq);
 882                        subflow->map_data_len = skb->len;
 883                        subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq -
 884                                                   subflow->ssn_offset;
 885                        subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
 886                        return true;
 887                }
 888
 889                if (status != MAPPING_OK)
 890                        return false;
 891
 892                skb = skb_peek(&ssk->sk_receive_queue);
 893                if (WARN_ON_ONCE(!skb))
 894                        return false;
 895
 896                /* if msk lacks the remote key, this subflow must provide an
 897                 * MP_CAPABLE-based mapping
 898                 */
 899                if (unlikely(!READ_ONCE(msk->can_ack))) {
 900                        if (!subflow->mpc_map) {
 901                                ssk->sk_err = EBADMSG;
 902                                goto fatal;
 903                        }
 904                        WRITE_ONCE(msk->remote_key, subflow->remote_key);
 905                        WRITE_ONCE(msk->ack_seq, subflow->map_seq);
 906                        WRITE_ONCE(msk->can_ack, true);
 907                }
 908
 909                old_ack = READ_ONCE(msk->ack_seq);
 910                ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
 911                pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
 912                         ack_seq);
 913                if (ack_seq == old_ack) {
 914                        subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
 915                        break;
 916                } else if (after64(ack_seq, old_ack)) {
 917                        subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA;
 918                        break;
 919                }
 920
 921                /* only accept in-sequence mapping. Old values are spurious
 922                 * retransmission
 923                 */
 924                mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
 925        }
 926        return true;
 927
 928fatal:
 929        /* fatal protocol error, close the socket */
 930        /* This barrier is coupled with smp_rmb() in tcp_poll() */
 931        smp_wmb();
 932        ssk->sk_error_report(ssk);
 933        tcp_set_state(ssk, TCP_CLOSE);
 934        tcp_send_active_reset(ssk, GFP_ATOMIC);
 935        subflow->data_avail = 0;
 936        return false;
 937}
 938
 939bool mptcp_subflow_data_available(struct sock *sk)
 940{
 941        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 942
 943        /* check if current mapping is still valid */
 944        if (subflow->map_valid &&
 945            mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
 946                subflow->map_valid = 0;
 947                subflow->data_avail = 0;
 948
 949                pr_debug("Done with mapping: seq=%u data_len=%u",
 950                         subflow->map_subflow_seq,
 951                         subflow->map_data_len);
 952        }
 953
 954        return subflow_check_data_avail(sk);
 955}
 956
 957/* If ssk has an mptcp parent socket, use the mptcp rcvbuf occupancy,
 958 * not the ssk one.
 959 *
 960 * In mptcp, rwin is about the mptcp-level connection data.
 961 *
 962 * Data that is still on the ssk rx queue can thus be ignored,
 963 * as far as mptcp peer is concerened that data is still inflight.
 964 * DSS ACK is updated when skb is moved to the mptcp rx queue.
 965 */
 966void mptcp_space(const struct sock *ssk, int *space, int *full_space)
 967{
 968        const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
 969        const struct sock *sk = subflow->conn;
 970
 971        *space = tcp_space(sk);
 972        *full_space = tcp_full_space(sk);
 973}
 974
 975static void subflow_data_ready(struct sock *sk)
 976{
 977        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 978        u16 state = 1 << inet_sk_state_load(sk);
 979        struct sock *parent = subflow->conn;
 980        struct mptcp_sock *msk;
 981
 982        msk = mptcp_sk(parent);
 983        if (state & TCPF_LISTEN) {
 984                set_bit(MPTCP_DATA_READY, &msk->flags);
 985                parent->sk_data_ready(parent);
 986                return;
 987        }
 988
 989        WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
 990                     !subflow->mp_join && !(state & TCPF_CLOSE));
 991
 992        if (mptcp_subflow_data_available(sk))
 993                mptcp_data_ready(parent, sk);
 994}
 995
 996static void subflow_write_space(struct sock *sk)
 997{
 998        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 999        struct sock *parent = subflow->conn;
1000
1001        if (!sk_stream_is_writeable(sk))
1002                return;
1003
1004        if (sk_stream_is_writeable(parent)) {
1005                set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags);
1006                smp_mb__after_atomic();
1007                /* set SEND_SPACE before sk_stream_write_space clears NOSPACE */
1008                sk_stream_write_space(parent);
1009        }
1010}
1011
1012static struct inet_connection_sock_af_ops *
1013subflow_default_af_ops(struct sock *sk)
1014{
1015#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1016        if (sk->sk_family == AF_INET6)
1017                return &subflow_v6_specific;
1018#endif
1019        return &subflow_specific;
1020}
1021
1022#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1023void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
1024{
1025        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
1026        struct inet_connection_sock *icsk = inet_csk(sk);
1027        struct inet_connection_sock_af_ops *target;
1028
1029        target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk);
1030
1031        pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d",
1032                 subflow, sk->sk_family, icsk->icsk_af_ops, target, mapped);
1033
1034        if (likely(icsk->icsk_af_ops == target))
1035                return;
1036
1037        subflow->icsk_af_ops = icsk->icsk_af_ops;
1038        icsk->icsk_af_ops = target;
1039}
1040#endif
1041
1042static void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
1043                                struct sockaddr_storage *addr)
1044{
1045        memset(addr, 0, sizeof(*addr));
1046        addr->ss_family = info->family;
1047        if (addr->ss_family == AF_INET) {
1048                struct sockaddr_in *in_addr = (struct sockaddr_in *)addr;
1049
1050                in_addr->sin_addr = info->addr;
1051                in_addr->sin_port = info->port;
1052        }
1053#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1054        else if (addr->ss_family == AF_INET6) {
1055                struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)addr;
1056
1057                in6_addr->sin6_addr = info->addr6;
1058                in6_addr->sin6_port = info->port;
1059        }
1060#endif
1061}
1062
1063int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
1064                            const struct mptcp_addr_info *remote)
1065{
1066        struct mptcp_sock *msk = mptcp_sk(sk);
1067        struct mptcp_subflow_context *subflow;
1068        struct sockaddr_storage addr;
1069        int remote_id = remote->id;
1070        int local_id = loc->id;
1071        struct socket *sf;
1072        struct sock *ssk;
1073        u32 remote_token;
1074        int addrlen;
1075        int err;
1076
1077        if (!mptcp_is_fully_established(sk))
1078                return -ENOTCONN;
1079
1080        err = mptcp_subflow_create_socket(sk, &sf);
1081        if (err)
1082                return err;
1083
1084        ssk = sf->sk;
1085        subflow = mptcp_subflow_ctx(ssk);
1086        do {
1087                get_random_bytes(&subflow->local_nonce, sizeof(u32));
1088        } while (!subflow->local_nonce);
1089
1090        if (!local_id) {
1091                err = mptcp_pm_get_local_id(msk, (struct sock_common *)ssk);
1092                if (err < 0)
1093                        goto failed;
1094
1095                local_id = err;
1096        }
1097
1098        subflow->remote_key = msk->remote_key;
1099        subflow->local_key = msk->local_key;
1100        subflow->token = msk->token;
1101        mptcp_info2sockaddr(loc, &addr);
1102
1103        addrlen = sizeof(struct sockaddr_in);
1104#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1105        if (loc->family == AF_INET6)
1106                addrlen = sizeof(struct sockaddr_in6);
1107#endif
1108        ssk->sk_bound_dev_if = loc->ifindex;
1109        err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
1110        if (err)
1111                goto failed;
1112
1113        mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL);
1114        pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk,
1115                 remote_token, local_id, remote_id);
1116        subflow->remote_token = remote_token;
1117        subflow->local_id = local_id;
1118        subflow->remote_id = remote_id;
1119        subflow->request_join = 1;
1120        subflow->request_bkup = !!(loc->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
1121        mptcp_info2sockaddr(remote, &addr);
1122
1123        err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
1124        if (err && err != -EINPROGRESS)
1125                goto failed;
1126
1127        spin_lock_bh(&msk->join_list_lock);
1128        list_add_tail(&subflow->node, &msk->join_list);
1129        spin_unlock_bh(&msk->join_list_lock);
1130
1131        return err;
1132
1133failed:
1134        sock_release(sf);
1135        return err;
1136}
1137
1138int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
1139{
1140        struct mptcp_subflow_context *subflow;
1141        struct net *net = sock_net(sk);
1142        struct socket *sf;
1143        int err;
1144
1145        /* un-accepted server sockets can reach here - on bad configuration
1146         * bail early to avoid greater trouble later
1147         */
1148        if (unlikely(!sk->sk_socket))
1149                return -EINVAL;
1150
1151        err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP,
1152                               &sf);
1153        if (err)
1154                return err;
1155
1156        lock_sock(sf->sk);
1157
1158        /* kernel sockets do not by default acquire net ref, but TCP timer
1159         * needs it.
1160         */
1161        sf->sk->sk_net_refcnt = 1;
1162        get_net(net);
1163#ifdef CONFIG_PROC_FS
1164        this_cpu_add(*net->core.sock_inuse, 1);
1165#endif
1166        err = tcp_set_ulp(sf->sk, "mptcp");
1167        release_sock(sf->sk);
1168
1169        if (err) {
1170                sock_release(sf);
1171                return err;
1172        }
1173
1174        /* the newly created socket really belongs to the owning MPTCP master
1175         * socket, even if for additional subflows the allocation is performed
1176         * by a kernel workqueue. Adjust inode references, so that the
1177         * procfs/diag interaces really show this one belonging to the correct
1178         * user.
1179         */
1180        SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino;
1181        SOCK_INODE(sf)->i_uid = SOCK_INODE(sk->sk_socket)->i_uid;
1182        SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid;
1183
1184        subflow = mptcp_subflow_ctx(sf->sk);
1185        pr_debug("subflow=%p", subflow);
1186
1187        *new_sock = sf;
1188        sock_hold(sk);
1189        subflow->conn = sk;
1190
1191        return 0;
1192}
1193
1194static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
1195                                                        gfp_t priority)
1196{
1197        struct inet_connection_sock *icsk = inet_csk(sk);
1198        struct mptcp_subflow_context *ctx;
1199
1200        ctx = kzalloc(sizeof(*ctx), priority);
1201        if (!ctx)
1202                return NULL;
1203
1204        rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
1205        INIT_LIST_HEAD(&ctx->node);
1206
1207        pr_debug("subflow=%p", ctx);
1208
1209        ctx->tcp_sock = sk;
1210
1211        return ctx;
1212}
1213
1214static void __subflow_state_change(struct sock *sk)
1215{
1216        struct socket_wq *wq;
1217
1218        rcu_read_lock();
1219        wq = rcu_dereference(sk->sk_wq);
1220        if (skwq_has_sleeper(wq))
1221                wake_up_interruptible_all(&wq->wait);
1222        rcu_read_unlock();
1223}
1224
1225static bool subflow_is_done(const struct sock *sk)
1226{
1227        return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE;
1228}
1229
1230static void subflow_state_change(struct sock *sk)
1231{
1232        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
1233        struct sock *parent = subflow->conn;
1234
1235        __subflow_state_change(sk);
1236
1237        if (subflow_simultaneous_connect(sk)) {
1238                mptcp_do_fallback(sk);
1239                mptcp_rcv_space_init(mptcp_sk(parent), sk);
1240                pr_fallback(mptcp_sk(parent));
1241                subflow->conn_finished = 1;
1242                if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
1243                        inet_sk_state_store(parent, TCP_ESTABLISHED);
1244                        parent->sk_state_change(parent);
1245                }
1246        }
1247
1248        /* as recvmsg() does not acquire the subflow socket for ssk selection
1249         * a fin packet carrying a DSS can be unnoticed if we don't trigger
1250         * the data available machinery here.
1251         */
1252        if (mptcp_subflow_data_available(sk))
1253                mptcp_data_ready(parent, sk);
1254
1255        if (__mptcp_check_fallback(mptcp_sk(parent)) &&
1256            !(parent->sk_shutdown & RCV_SHUTDOWN) &&
1257            !subflow->rx_eof && subflow_is_done(sk)) {
1258                subflow->rx_eof = 1;
1259                mptcp_subflow_eof(parent);
1260        }
1261}
1262
1263static int subflow_ulp_init(struct sock *sk)
1264{
1265        struct inet_connection_sock *icsk = inet_csk(sk);
1266        struct mptcp_subflow_context *ctx;
1267        struct tcp_sock *tp = tcp_sk(sk);
1268        int err = 0;
1269
1270        /* disallow attaching ULP to a socket unless it has been
1271         * created with sock_create_kern()
1272         */
1273        if (!sk->sk_kern_sock) {
1274                err = -EOPNOTSUPP;
1275                goto out;
1276        }
1277
1278        ctx = subflow_create_ctx(sk, GFP_KERNEL);
1279        if (!ctx) {
1280                err = -ENOMEM;
1281                goto out;
1282        }
1283
1284        pr_debug("subflow=%p, family=%d", ctx, sk->sk_family);
1285
1286        tp->is_mptcp = 1;
1287        ctx->icsk_af_ops = icsk->icsk_af_ops;
1288        icsk->icsk_af_ops = subflow_default_af_ops(sk);
1289        ctx->tcp_data_ready = sk->sk_data_ready;
1290        ctx->tcp_state_change = sk->sk_state_change;
1291        ctx->tcp_write_space = sk->sk_write_space;
1292        sk->sk_data_ready = subflow_data_ready;
1293        sk->sk_write_space = subflow_write_space;
1294        sk->sk_state_change = subflow_state_change;
1295out:
1296        return err;
1297}
1298
1299static void subflow_ulp_release(struct sock *sk)
1300{
1301        struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk);
1302
1303        if (!ctx)
1304                return;
1305
1306        if (ctx->conn)
1307                sock_put(ctx->conn);
1308
1309        kfree_rcu(ctx, rcu);
1310}
1311
1312static void subflow_ulp_clone(const struct request_sock *req,
1313                              struct sock *newsk,
1314                              const gfp_t priority)
1315{
1316        struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
1317        struct mptcp_subflow_context *old_ctx = mptcp_subflow_ctx(newsk);
1318        struct mptcp_subflow_context *new_ctx;
1319
1320        if (!tcp_rsk(req)->is_mptcp ||
1321            (!subflow_req->mp_capable && !subflow_req->mp_join)) {
1322                subflow_ulp_fallback(newsk, old_ctx);
1323                return;
1324        }
1325
1326        new_ctx = subflow_create_ctx(newsk, priority);
1327        if (!new_ctx) {
1328                subflow_ulp_fallback(newsk, old_ctx);
1329                return;
1330        }
1331
1332        new_ctx->conn_finished = 1;
1333        new_ctx->icsk_af_ops = old_ctx->icsk_af_ops;
1334        new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
1335        new_ctx->tcp_state_change = old_ctx->tcp_state_change;
1336        new_ctx->tcp_write_space = old_ctx->tcp_write_space;
1337        new_ctx->rel_write_seq = 1;
1338        new_ctx->tcp_sock = newsk;
1339
1340        if (subflow_req->mp_capable) {
1341                /* see comments in subflow_syn_recv_sock(), MPTCP connection
1342                 * is fully established only after we receive the remote key
1343                 */
1344                new_ctx->mp_capable = 1;
1345                new_ctx->local_key = subflow_req->local_key;
1346                new_ctx->token = subflow_req->token;
1347                new_ctx->ssn_offset = subflow_req->ssn_offset;
1348                new_ctx->idsn = subflow_req->idsn;
1349        } else if (subflow_req->mp_join) {
1350                new_ctx->ssn_offset = subflow_req->ssn_offset;
1351                new_ctx->mp_join = 1;
1352                new_ctx->fully_established = 1;
1353                new_ctx->backup = subflow_req->backup;
1354                new_ctx->local_id = subflow_req->local_id;
1355                new_ctx->remote_id = subflow_req->remote_id;
1356                new_ctx->token = subflow_req->token;
1357                new_ctx->thmac = subflow_req->thmac;
1358        }
1359}
1360
1361static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
1362        .name           = "mptcp",
1363        .owner          = THIS_MODULE,
1364        .init           = subflow_ulp_init,
1365        .release        = subflow_ulp_release,
1366        .clone          = subflow_ulp_clone,
1367};
1368
1369static int subflow_ops_init(struct request_sock_ops *subflow_ops)
1370{
1371        subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock);
1372        subflow_ops->slab_name = "request_sock_subflow";
1373
1374        subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name,
1375                                              subflow_ops->obj_size, 0,
1376                                              SLAB_ACCOUNT |
1377                                              SLAB_TYPESAFE_BY_RCU,
1378                                              NULL);
1379        if (!subflow_ops->slab)
1380                return -ENOMEM;
1381
1382        subflow_ops->destructor = subflow_req_destructor;
1383
1384        return 0;
1385}
1386
1387void __init mptcp_subflow_init(void)
1388{
1389        mptcp_subflow_request_sock_ops = tcp_request_sock_ops;
1390        if (subflow_ops_init(&mptcp_subflow_request_sock_ops) != 0)
1391                panic("MPTCP: failed to init subflow request sock ops\n");
1392
1393        subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
1394        subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req;
1395
1396        subflow_specific = ipv4_specific;
1397        subflow_specific.conn_request = subflow_v4_conn_request;
1398        subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
1399        subflow_specific.sk_rx_dst_set = subflow_finish_connect;
1400
1401#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1402        subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
1403        subflow_request_sock_ipv6_ops.init_req = subflow_v6_init_req;
1404
1405        subflow_v6_specific = ipv6_specific;
1406        subflow_v6_specific.conn_request = subflow_v6_conn_request;
1407        subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock;
1408        subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect;
1409
1410        subflow_v6m_specific = subflow_v6_specific;
1411        subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit;
1412        subflow_v6m_specific.send_check = ipv4_specific.send_check;
1413        subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
1414        subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
1415        subflow_v6m_specific.net_frag_header_len = 0;
1416#endif
1417
1418        mptcp_diag_subflow_init(&subflow_ulp_ops);
1419
1420        if (tcp_register_ulp(&subflow_ulp_ops) != 0)
1421                panic("MPTCP: failed to register subflows to ULP\n");
1422}
1423