linux/net/mptcp/subflow.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/* Multipath TCP
   3 *
   4 * Copyright (c) 2017 - 2019, Intel Corporation.
   5 */
   6
   7#define pr_fmt(fmt) "MPTCP: " fmt
   8
   9#include <linux/kernel.h>
  10#include <linux/module.h>
  11#include <linux/netdevice.h>
  12#include <crypto/algapi.h>
  13#include <crypto/sha.h>
  14#include <net/sock.h>
  15#include <net/inet_common.h>
  16#include <net/inet_hashtables.h>
  17#include <net/protocol.h>
  18#include <net/tcp.h>
  19#if IS_ENABLED(CONFIG_MPTCP_IPV6)
  20#include <net/ip6_route.h>
  21#endif
  22#include <net/mptcp.h>
  23#include "protocol.h"
  24#include "mib.h"
  25
  26static void SUBFLOW_REQ_INC_STATS(struct request_sock *req,
  27                                  enum linux_mptcp_mib_field field)
  28{
  29        MPTCP_INC_STATS(sock_net(req_to_sk(req)), field);
  30}
  31
  32static int subflow_rebuild_header(struct sock *sk)
  33{
  34        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
  35        int local_id, err = 0;
  36
  37        if (subflow->request_mptcp && !subflow->token) {
  38                pr_debug("subflow=%p", sk);
  39                err = mptcp_token_new_connect(sk);
  40        } else if (subflow->request_join && !subflow->local_nonce) {
  41                struct mptcp_sock *msk = (struct mptcp_sock *)subflow->conn;
  42
  43                pr_debug("subflow=%p", sk);
  44
  45                do {
  46                        get_random_bytes(&subflow->local_nonce, sizeof(u32));
  47                } while (!subflow->local_nonce);
  48
  49                if (subflow->local_id)
  50                        goto out;
  51
  52                local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)sk);
  53                if (local_id < 0)
  54                        return -EINVAL;
  55
  56                subflow->local_id = local_id;
  57        }
  58
  59out:
  60        if (err)
  61                return err;
  62
  63        return subflow->icsk_af_ops->rebuild_header(sk);
  64}
  65
  66static void subflow_req_destructor(struct request_sock *req)
  67{
  68        struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
  69
  70        pr_debug("subflow_req=%p", subflow_req);
  71
  72        if (subflow_req->msk)
  73                sock_put((struct sock *)subflow_req->msk);
  74
  75        if (subflow_req->mp_capable)
  76                mptcp_token_destroy_request(subflow_req->token);
  77        tcp_request_sock_ops.destructor(req);
  78}
  79
  80static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
  81                                  void *hmac)
  82{
  83        u8 msg[8];
  84
  85        put_unaligned_be32(nonce1, &msg[0]);
  86        put_unaligned_be32(nonce2, &msg[4]);
  87
  88        mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
  89}
  90
  91/* validate received token and create truncated hmac and nonce for SYN-ACK */
  92static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
  93                                                     const struct sk_buff *skb)
  94{
  95        struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
  96        u8 hmac[SHA256_DIGEST_SIZE];
  97        struct mptcp_sock *msk;
  98        int local_id;
  99
 100        msk = mptcp_token_get_sock(subflow_req->token);
 101        if (!msk) {
 102                SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN);
 103                return NULL;
 104        }
 105
 106        local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)req);
 107        if (local_id < 0) {
 108                sock_put((struct sock *)msk);
 109                return NULL;
 110        }
 111        subflow_req->local_id = local_id;
 112
 113        get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
 114
 115        subflow_generate_hmac(msk->local_key, msk->remote_key,
 116                              subflow_req->local_nonce,
 117                              subflow_req->remote_nonce, hmac);
 118
 119        subflow_req->thmac = get_unaligned_be64(hmac);
 120        return msk;
 121}
 122
 123static void subflow_init_req(struct request_sock *req,
 124                             const struct sock *sk_listener,
 125                             struct sk_buff *skb)
 126{
 127        struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
 128        struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
 129        struct mptcp_options_received mp_opt;
 130
 131        pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
 132
 133        mptcp_get_options(skb, &mp_opt);
 134
 135        subflow_req->mp_capable = 0;
 136        subflow_req->mp_join = 0;
 137        subflow_req->msk = NULL;
 138
 139#ifdef CONFIG_TCP_MD5SIG
 140        /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
 141         * TCP option space.
 142         */
 143        if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
 144                return;
 145#endif
 146
 147        if (mp_opt.mp_capable) {
 148                SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
 149
 150                if (mp_opt.mp_join)
 151                        return;
 152        } else if (mp_opt.mp_join) {
 153                SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
 154        }
 155
 156        if (mp_opt.mp_capable && listener->request_mptcp) {
 157                int err;
 158
 159                err = mptcp_token_new_request(req);
 160                if (err == 0)
 161                        subflow_req->mp_capable = 1;
 162
 163                subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
 164        } else if (mp_opt.mp_join && listener->request_mptcp) {
 165                subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
 166                subflow_req->mp_join = 1;
 167                subflow_req->backup = mp_opt.backup;
 168                subflow_req->remote_id = mp_opt.join_id;
 169                subflow_req->token = mp_opt.token;
 170                subflow_req->remote_nonce = mp_opt.nonce;
 171                subflow_req->msk = subflow_token_join_request(req, skb);
 172                pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token,
 173                         subflow_req->remote_nonce, subflow_req->msk);
 174        }
 175}
 176
 177static void subflow_v4_init_req(struct request_sock *req,
 178                                const struct sock *sk_listener,
 179                                struct sk_buff *skb)
 180{
 181        tcp_rsk(req)->is_mptcp = 1;
 182
 183        tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb);
 184
 185        subflow_init_req(req, sk_listener, skb);
 186}
 187
 188#if IS_ENABLED(CONFIG_MPTCP_IPV6)
 189static void subflow_v6_init_req(struct request_sock *req,
 190                                const struct sock *sk_listener,
 191                                struct sk_buff *skb)
 192{
 193        tcp_rsk(req)->is_mptcp = 1;
 194
 195        tcp_request_sock_ipv6_ops.init_req(req, sk_listener, skb);
 196
 197        subflow_init_req(req, sk_listener, skb);
 198}
 199#endif
 200
 201/* validate received truncated hmac and create hmac for third ACK */
 202static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
 203{
 204        u8 hmac[SHA256_DIGEST_SIZE];
 205        u64 thmac;
 206
 207        subflow_generate_hmac(subflow->remote_key, subflow->local_key,
 208                              subflow->remote_nonce, subflow->local_nonce,
 209                              hmac);
 210
 211        thmac = get_unaligned_be64(hmac);
 212        pr_debug("subflow=%p, token=%u, thmac=%llu, subflow->thmac=%llu\n",
 213                 subflow, subflow->token,
 214                 (unsigned long long)thmac,
 215                 (unsigned long long)subflow->thmac);
 216
 217        return thmac == subflow->thmac;
 218}
 219
 220static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
 221{
 222        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 223        struct mptcp_options_received mp_opt;
 224        struct sock *parent = subflow->conn;
 225
 226        subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
 227
 228        if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
 229                inet_sk_state_store(parent, TCP_ESTABLISHED);
 230                parent->sk_state_change(parent);
 231        }
 232
 233        /* be sure no special action on any packet other than syn-ack */
 234        if (subflow->conn_finished)
 235                return;
 236
 237        subflow->conn_finished = 1;
 238        subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
 239        pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
 240
 241        mptcp_get_options(skb, &mp_opt);
 242        if (subflow->request_mptcp && mp_opt.mp_capable) {
 243                subflow->mp_capable = 1;
 244                subflow->can_ack = 1;
 245                subflow->remote_key = mp_opt.sndr_key;
 246                pr_debug("subflow=%p, remote_key=%llu", subflow,
 247                         subflow->remote_key);
 248        } else if (subflow->request_join && mp_opt.mp_join) {
 249                subflow->mp_join = 1;
 250                subflow->thmac = mp_opt.thmac;
 251                subflow->remote_nonce = mp_opt.nonce;
 252                pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
 253                         subflow->thmac, subflow->remote_nonce);
 254        } else {
 255                if (subflow->request_mptcp)
 256                        MPTCP_INC_STATS(sock_net(sk),
 257                                        MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
 258                mptcp_do_fallback(sk);
 259                pr_fallback(mptcp_sk(subflow->conn));
 260        }
 261
 262        if (mptcp_check_fallback(sk)) {
 263                mptcp_rcv_space_init(mptcp_sk(parent), sk);
 264                return;
 265        }
 266
 267        if (subflow->mp_capable) {
 268                pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
 269                         subflow->remote_key);
 270                mptcp_finish_connect(sk);
 271        } else if (subflow->mp_join) {
 272                u8 hmac[SHA256_DIGEST_SIZE];
 273
 274                pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u",
 275                         subflow, subflow->thmac,
 276                         subflow->remote_nonce);
 277                if (!subflow_thmac_valid(subflow)) {
 278                        MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
 279                        subflow->mp_join = 0;
 280                        goto do_reset;
 281                }
 282
 283                subflow_generate_hmac(subflow->local_key, subflow->remote_key,
 284                                      subflow->local_nonce,
 285                                      subflow->remote_nonce,
 286                                      hmac);
 287
 288                memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
 289
 290                if (!mptcp_finish_join(sk))
 291                        goto do_reset;
 292
 293                MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
 294        } else {
 295do_reset:
 296                tcp_send_active_reset(sk, GFP_ATOMIC);
 297                tcp_done(sk);
 298        }
 299}
 300
 301static struct request_sock_ops subflow_request_sock_ops;
 302static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
 303
 304static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 305{
 306        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 307
 308        pr_debug("subflow=%p", subflow);
 309
 310        /* Never answer to SYNs sent to broadcast or multicast */
 311        if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
 312                goto drop;
 313
 314        return tcp_conn_request(&subflow_request_sock_ops,
 315                                &subflow_request_sock_ipv4_ops,
 316                                sk, skb);
 317drop:
 318        tcp_listendrop(sk);
 319        return 0;
 320}
 321
 322#if IS_ENABLED(CONFIG_MPTCP_IPV6)
 323static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops;
 324static struct inet_connection_sock_af_ops subflow_v6_specific;
 325static struct inet_connection_sock_af_ops subflow_v6m_specific;
 326
 327static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 328{
 329        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 330
 331        pr_debug("subflow=%p", subflow);
 332
 333        if (skb->protocol == htons(ETH_P_IP))
 334                return subflow_v4_conn_request(sk, skb);
 335
 336        if (!ipv6_unicast_destination(skb))
 337                goto drop;
 338
 339        return tcp_conn_request(&subflow_request_sock_ops,
 340                                &subflow_request_sock_ipv6_ops, sk, skb);
 341
 342drop:
 343        tcp_listendrop(sk);
 344        return 0; /* don't send reset */
 345}
 346#endif
 347
 348/* validate hmac received in third ACK */
 349static bool subflow_hmac_valid(const struct request_sock *req,
 350                               const struct mptcp_options_received *mp_opt)
 351{
 352        const struct mptcp_subflow_request_sock *subflow_req;
 353        u8 hmac[SHA256_DIGEST_SIZE];
 354        struct mptcp_sock *msk;
 355
 356        subflow_req = mptcp_subflow_rsk(req);
 357        msk = subflow_req->msk;
 358        if (!msk)
 359                return false;
 360
 361        subflow_generate_hmac(msk->remote_key, msk->local_key,
 362                              subflow_req->remote_nonce,
 363                              subflow_req->local_nonce, hmac);
 364
 365        return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN);
 366}
 367
 368static void mptcp_sock_destruct(struct sock *sk)
 369{
 370        /* if new mptcp socket isn't accepted, it is free'd
 371         * from the tcp listener sockets request queue, linked
 372         * from req->sk.  The tcp socket is released.
 373         * This calls the ULP release function which will
 374         * also remove the mptcp socket, via
 375         * sock_put(ctx->conn).
 376         *
 377         * Problem is that the mptcp socket will not be in
 378         * SYN_RECV state and doesn't have SOCK_DEAD flag.
 379         * Both result in warnings from inet_sock_destruct.
 380         */
 381
 382        if (sk->sk_state == TCP_SYN_RECV) {
 383                sk->sk_state = TCP_CLOSE;
 384                WARN_ON_ONCE(sk->sk_socket);
 385                sock_orphan(sk);
 386        }
 387
 388        mptcp_token_destroy(mptcp_sk(sk)->token);
 389        inet_sock_destruct(sk);
 390}
 391
 392static void mptcp_force_close(struct sock *sk)
 393{
 394        inet_sk_state_store(sk, TCP_CLOSE);
 395        sk_common_release(sk);
 396}
 397
 398static void subflow_ulp_fallback(struct sock *sk,
 399                                 struct mptcp_subflow_context *old_ctx)
 400{
 401        struct inet_connection_sock *icsk = inet_csk(sk);
 402
 403        mptcp_subflow_tcp_fallback(sk, old_ctx);
 404        icsk->icsk_ulp_ops = NULL;
 405        rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
 406        tcp_sk(sk)->is_mptcp = 0;
 407}
 408
 409static void subflow_drop_ctx(struct sock *ssk)
 410{
 411        struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk);
 412
 413        if (!ctx)
 414                return;
 415
 416        subflow_ulp_fallback(ssk, ctx);
 417        if (ctx->conn)
 418                sock_put(ctx->conn);
 419
 420        kfree_rcu(ctx, rcu);
 421}
 422
 423static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 424                                          struct sk_buff *skb,
 425                                          struct request_sock *req,
 426                                          struct dst_entry *dst,
 427                                          struct request_sock *req_unhash,
 428                                          bool *own_req)
 429{
 430        struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
 431        struct mptcp_subflow_request_sock *subflow_req;
 432        struct mptcp_options_received mp_opt;
 433        bool fallback, fallback_is_fatal;
 434        struct sock *new_msk = NULL;
 435        struct sock *child;
 436
 437        pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
 438
 439        /* After child creation we must look for 'mp_capable' even when options
 440         * are not parsed
 441         */
 442        mp_opt.mp_capable = 0;
 443
 444        /* hopefully temporary handling for MP_JOIN+syncookie */
 445        subflow_req = mptcp_subflow_rsk(req);
 446        fallback_is_fatal = tcp_rsk(req)->is_mptcp && subflow_req->mp_join;
 447        fallback = !tcp_rsk(req)->is_mptcp;
 448        if (fallback)
 449                goto create_child;
 450
 451        /* if the sk is MP_CAPABLE, we try to fetch the client key */
 452        if (subflow_req->mp_capable) {
 453                if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) {
 454                        /* here we can receive and accept an in-window,
 455                         * out-of-order pkt, which will not carry the MP_CAPABLE
 456                         * opt even on mptcp enabled paths
 457                         */
 458                        goto create_msk;
 459                }
 460
 461                mptcp_get_options(skb, &mp_opt);
 462                if (!mp_opt.mp_capable) {
 463                        fallback = true;
 464                        goto create_child;
 465                }
 466
 467create_msk:
 468                new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
 469                if (!new_msk)
 470                        fallback = true;
 471        } else if (subflow_req->mp_join) {
 472                mptcp_get_options(skb, &mp_opt);
 473                if (!mp_opt.mp_join ||
 474                    !subflow_hmac_valid(req, &mp_opt)) {
 475                        SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
 476                        fallback = true;
 477                }
 478        }
 479
 480create_child:
 481        child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
 482                                                     req_unhash, own_req);
 483
 484        if (child && *own_req) {
 485                struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child);
 486
 487                tcp_rsk(req)->drop_req = false;
 488
 489                /* we need to fallback on ctx allocation failure and on pre-reqs
 490                 * checking above. In the latter scenario we additionally need
 491                 * to reset the context to non MPTCP status.
 492                 */
 493                if (!ctx || fallback) {
 494                        if (fallback_is_fatal)
 495                                goto dispose_child;
 496
 497                        subflow_drop_ctx(child);
 498                        goto out;
 499                }
 500
 501                if (ctx->mp_capable) {
 502                        /* new mpc subflow takes ownership of the newly
 503                         * created mptcp socket
 504                         */
 505                        new_msk->sk_destruct = mptcp_sock_destruct;
 506                        mptcp_pm_new_connection(mptcp_sk(new_msk), 1);
 507                        ctx->conn = new_msk;
 508                        new_msk = NULL;
 509
 510                        /* with OoO packets we can reach here without ingress
 511                         * mpc option
 512                         */
 513                        ctx->remote_key = mp_opt.sndr_key;
 514                        ctx->fully_established = mp_opt.mp_capable;
 515                        ctx->can_ack = mp_opt.mp_capable;
 516                } else if (ctx->mp_join) {
 517                        struct mptcp_sock *owner;
 518
 519                        owner = subflow_req->msk;
 520                        if (!owner)
 521                                goto dispose_child;
 522
 523                        /* move the msk reference ownership to the subflow */
 524                        subflow_req->msk = NULL;
 525                        ctx->conn = (struct sock *)owner;
 526                        if (!mptcp_finish_join(child))
 527                                goto dispose_child;
 528
 529                        SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
 530                        tcp_rsk(req)->drop_req = true;
 531                }
 532        }
 533
 534out:
 535        /* dispose of the left over mptcp master, if any */
 536        if (unlikely(new_msk))
 537                mptcp_force_close(new_msk);
 538
 539        /* check for expected invariant - should never trigger, just help
 540         * catching eariler subtle bugs
 541         */
 542        WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp &&
 543                     (!mptcp_subflow_ctx(child) ||
 544                      !mptcp_subflow_ctx(child)->conn));
 545        return child;
 546
 547dispose_child:
 548        subflow_drop_ctx(child);
 549        tcp_rsk(req)->drop_req = true;
 550        tcp_send_active_reset(child, GFP_ATOMIC);
 551        inet_csk_prepare_for_destroy_sock(child);
 552        tcp_done(child);
 553
 554        /* The last child reference will be released by the caller */
 555        return child;
 556}
 557
 558static struct inet_connection_sock_af_ops subflow_specific;
 559
 560enum mapping_status {
 561        MAPPING_OK,
 562        MAPPING_INVALID,
 563        MAPPING_EMPTY,
 564        MAPPING_DATA_FIN,
 565        MAPPING_DUMMY
 566};
 567
 568static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
 569{
 570        if ((u32)seq == (u32)old_seq)
 571                return old_seq;
 572
 573        /* Assume map covers data not mapped yet. */
 574        return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
 575}
 576
 577static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
 578{
 579        WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
 580                  ssn, subflow->map_subflow_seq, subflow->map_data_len);
 581}
 582
 583static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
 584{
 585        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
 586        unsigned int skb_consumed;
 587
 588        skb_consumed = tcp_sk(ssk)->copied_seq - TCP_SKB_CB(skb)->seq;
 589        if (WARN_ON_ONCE(skb_consumed >= skb->len))
 590                return true;
 591
 592        return skb->len - skb_consumed <= subflow->map_data_len -
 593                                          mptcp_subflow_get_map_offset(subflow);
 594}
 595
 596static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
 597{
 598        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
 599        u32 ssn = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
 600
 601        if (unlikely(before(ssn, subflow->map_subflow_seq))) {
 602                /* Mapping covers data later in the subflow stream,
 603                 * currently unsupported.
 604                 */
 605                warn_bad_map(subflow, ssn);
 606                return false;
 607        }
 608        if (unlikely(!before(ssn, subflow->map_subflow_seq +
 609                                  subflow->map_data_len))) {
 610                /* Mapping does covers past subflow data, invalid */
 611                warn_bad_map(subflow, ssn + skb->len);
 612                return false;
 613        }
 614        return true;
 615}
 616
 617static enum mapping_status get_mapping_status(struct sock *ssk)
 618{
 619        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
 620        struct mptcp_ext *mpext;
 621        struct sk_buff *skb;
 622        u16 data_len;
 623        u64 map_seq;
 624
 625        skb = skb_peek(&ssk->sk_receive_queue);
 626        if (!skb)
 627                return MAPPING_EMPTY;
 628
 629        if (mptcp_check_fallback(ssk))
 630                return MAPPING_DUMMY;
 631
 632        mpext = mptcp_get_ext(skb);
 633        if (!mpext || !mpext->use_map) {
 634                if (!subflow->map_valid && !skb->len) {
 635                        /* the TCP stack deliver 0 len FIN pkt to the receive
 636                         * queue, that is the only 0len pkts ever expected here,
 637                         * and we can admit no mapping only for 0 len pkts
 638                         */
 639                        if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
 640                                WARN_ONCE(1, "0len seq %d:%d flags %x",
 641                                          TCP_SKB_CB(skb)->seq,
 642                                          TCP_SKB_CB(skb)->end_seq,
 643                                          TCP_SKB_CB(skb)->tcp_flags);
 644                        sk_eat_skb(ssk, skb);
 645                        return MAPPING_EMPTY;
 646                }
 647
 648                if (!subflow->map_valid)
 649                        return MAPPING_INVALID;
 650
 651                goto validate_seq;
 652        }
 653
 654        pr_debug("seq=%llu is64=%d ssn=%u data_len=%u data_fin=%d",
 655                 mpext->data_seq, mpext->dsn64, mpext->subflow_seq,
 656                 mpext->data_len, mpext->data_fin);
 657
 658        data_len = mpext->data_len;
 659        if (data_len == 0) {
 660                pr_err("Infinite mapping not handled");
 661                MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
 662                return MAPPING_INVALID;
 663        }
 664
 665        if (mpext->data_fin == 1) {
 666                if (data_len == 1) {
 667                        pr_debug("DATA_FIN with no payload");
 668                        if (subflow->map_valid) {
 669                                /* A DATA_FIN might arrive in a DSS
 670                                 * option before the previous mapping
 671                                 * has been fully consumed. Continue
 672                                 * handling the existing mapping.
 673                                 */
 674                                skb_ext_del(skb, SKB_EXT_MPTCP);
 675                                return MAPPING_OK;
 676                        } else {
 677                                return MAPPING_DATA_FIN;
 678                        }
 679                }
 680
 681                /* Adjust for DATA_FIN using 1 byte of sequence space */
 682                data_len--;
 683        }
 684
 685        if (!mpext->dsn64) {
 686                map_seq = expand_seq(subflow->map_seq, subflow->map_data_len,
 687                                     mpext->data_seq);
 688                pr_debug("expanded seq=%llu", subflow->map_seq);
 689        } else {
 690                map_seq = mpext->data_seq;
 691        }
 692
 693        if (subflow->map_valid) {
 694                /* Allow replacing only with an identical map */
 695                if (subflow->map_seq == map_seq &&
 696                    subflow->map_subflow_seq == mpext->subflow_seq &&
 697                    subflow->map_data_len == data_len) {
 698                        skb_ext_del(skb, SKB_EXT_MPTCP);
 699                        return MAPPING_OK;
 700                }
 701
 702                /* If this skb data are fully covered by the current mapping,
 703                 * the new map would need caching, which is not supported
 704                 */
 705                if (skb_is_fully_mapped(ssk, skb)) {
 706                        MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSNOMATCH);
 707                        return MAPPING_INVALID;
 708                }
 709
 710                /* will validate the next map after consuming the current one */
 711                return MAPPING_OK;
 712        }
 713
 714        subflow->map_seq = map_seq;
 715        subflow->map_subflow_seq = mpext->subflow_seq;
 716        subflow->map_data_len = data_len;
 717        subflow->map_valid = 1;
 718        subflow->mpc_map = mpext->mpc_map;
 719        pr_debug("new map seq=%llu subflow_seq=%u data_len=%u",
 720                 subflow->map_seq, subflow->map_subflow_seq,
 721                 subflow->map_data_len);
 722
 723validate_seq:
 724        /* we revalidate valid mapping on new skb, because we must ensure
 725         * the current skb is completely covered by the available mapping
 726         */
 727        if (!validate_mapping(ssk, skb))
 728                return MAPPING_INVALID;
 729
 730        skb_ext_del(skb, SKB_EXT_MPTCP);
 731        return MAPPING_OK;
 732}
 733
 734static int subflow_read_actor(read_descriptor_t *desc,
 735                              struct sk_buff *skb,
 736                              unsigned int offset, size_t len)
 737{
 738        size_t copy_len = min(desc->count, len);
 739
 740        desc->count -= copy_len;
 741
 742        pr_debug("flushed %zu bytes, %zu left", copy_len, desc->count);
 743        return copy_len;
 744}
 745
 746static bool subflow_check_data_avail(struct sock *ssk)
 747{
 748        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
 749        enum mapping_status status;
 750        struct mptcp_sock *msk;
 751        struct sk_buff *skb;
 752
 753        pr_debug("msk=%p ssk=%p data_avail=%d skb=%p", subflow->conn, ssk,
 754                 subflow->data_avail, skb_peek(&ssk->sk_receive_queue));
 755        if (subflow->data_avail)
 756                return true;
 757
 758        msk = mptcp_sk(subflow->conn);
 759        for (;;) {
 760                u32 map_remaining;
 761                size_t delta;
 762                u64 ack_seq;
 763                u64 old_ack;
 764
 765                status = get_mapping_status(ssk);
 766                pr_debug("msk=%p ssk=%p status=%d", msk, ssk, status);
 767                if (status == MAPPING_INVALID) {
 768                        ssk->sk_err = EBADMSG;
 769                        goto fatal;
 770                }
 771                if (status == MAPPING_DUMMY) {
 772                        __mptcp_do_fallback(msk);
 773                        skb = skb_peek(&ssk->sk_receive_queue);
 774                        subflow->map_valid = 1;
 775                        subflow->map_seq = READ_ONCE(msk->ack_seq);
 776                        subflow->map_data_len = skb->len;
 777                        subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq -
 778                                                   subflow->ssn_offset;
 779                        return true;
 780                }
 781
 782                if (status != MAPPING_OK)
 783                        return false;
 784
 785                skb = skb_peek(&ssk->sk_receive_queue);
 786                if (WARN_ON_ONCE(!skb))
 787                        return false;
 788
 789                /* if msk lacks the remote key, this subflow must provide an
 790                 * MP_CAPABLE-based mapping
 791                 */
 792                if (unlikely(!READ_ONCE(msk->can_ack))) {
 793                        if (!subflow->mpc_map) {
 794                                ssk->sk_err = EBADMSG;
 795                                goto fatal;
 796                        }
 797                        WRITE_ONCE(msk->remote_key, subflow->remote_key);
 798                        WRITE_ONCE(msk->ack_seq, subflow->map_seq);
 799                        WRITE_ONCE(msk->can_ack, true);
 800                }
 801
 802                old_ack = READ_ONCE(msk->ack_seq);
 803                ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
 804                pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
 805                         ack_seq);
 806                if (ack_seq == old_ack)
 807                        break;
 808
 809                /* only accept in-sequence mapping. Old values are spurious
 810                 * retransmission; we can hit "future" values on active backup
 811                 * subflow switch, we relay on retransmissions to get
 812                 * in-sequence data.
 813                 * Cuncurrent subflows support will require subflow data
 814                 * reordering
 815                 */
 816                map_remaining = subflow->map_data_len -
 817                                mptcp_subflow_get_map_offset(subflow);
 818                if (before64(ack_seq, old_ack))
 819                        delta = min_t(size_t, old_ack - ack_seq, map_remaining);
 820                else
 821                        delta = min_t(size_t, ack_seq - old_ack, map_remaining);
 822
 823                /* discard mapped data */
 824                pr_debug("discarding %zu bytes, current map len=%d", delta,
 825                         map_remaining);
 826                if (delta) {
 827                        read_descriptor_t desc = {
 828                                .count = delta,
 829                        };
 830                        int ret;
 831
 832                        ret = tcp_read_sock(ssk, &desc, subflow_read_actor);
 833                        if (ret < 0) {
 834                                ssk->sk_err = -ret;
 835                                goto fatal;
 836                        }
 837                        if (ret < delta)
 838                                return false;
 839                        if (delta == map_remaining)
 840                                subflow->map_valid = 0;
 841                }
 842        }
 843        return true;
 844
 845fatal:
 846        /* fatal protocol error, close the socket */
 847        /* This barrier is coupled with smp_rmb() in tcp_poll() */
 848        smp_wmb();
 849        ssk->sk_error_report(ssk);
 850        tcp_set_state(ssk, TCP_CLOSE);
 851        tcp_send_active_reset(ssk, GFP_ATOMIC);
 852        return false;
 853}
 854
 855bool mptcp_subflow_data_available(struct sock *sk)
 856{
 857        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 858        struct sk_buff *skb;
 859
 860        /* check if current mapping is still valid */
 861        if (subflow->map_valid &&
 862            mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
 863                subflow->map_valid = 0;
 864                subflow->data_avail = 0;
 865
 866                pr_debug("Done with mapping: seq=%u data_len=%u",
 867                         subflow->map_subflow_seq,
 868                         subflow->map_data_len);
 869        }
 870
 871        if (!subflow_check_data_avail(sk)) {
 872                subflow->data_avail = 0;
 873                return false;
 874        }
 875
 876        skb = skb_peek(&sk->sk_receive_queue);
 877        subflow->data_avail = skb &&
 878                       before(tcp_sk(sk)->copied_seq, TCP_SKB_CB(skb)->end_seq);
 879        return subflow->data_avail;
 880}
 881
 882/* If ssk has an mptcp parent socket, use the mptcp rcvbuf occupancy,
 883 * not the ssk one.
 884 *
 885 * In mptcp, rwin is about the mptcp-level connection data.
 886 *
 887 * Data that is still on the ssk rx queue can thus be ignored,
 888 * as far as mptcp peer is concerened that data is still inflight.
 889 * DSS ACK is updated when skb is moved to the mptcp rx queue.
 890 */
 891void mptcp_space(const struct sock *ssk, int *space, int *full_space)
 892{
 893        const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
 894        const struct sock *sk = subflow->conn;
 895
 896        *space = tcp_space(sk);
 897        *full_space = tcp_full_space(sk);
 898}
 899
 900static void subflow_data_ready(struct sock *sk)
 901{
 902        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 903        u16 state = 1 << inet_sk_state_load(sk);
 904        struct sock *parent = subflow->conn;
 905        struct mptcp_sock *msk;
 906
 907        msk = mptcp_sk(parent);
 908        if (state & TCPF_LISTEN) {
 909                set_bit(MPTCP_DATA_READY, &msk->flags);
 910                parent->sk_data_ready(parent);
 911                return;
 912        }
 913
 914        WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
 915                     !subflow->mp_join && !(state & TCPF_CLOSE));
 916
 917        if (mptcp_subflow_data_available(sk))
 918                mptcp_data_ready(parent, sk);
 919}
 920
 921static void subflow_write_space(struct sock *sk)
 922{
 923        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 924        struct sock *parent = subflow->conn;
 925
 926        sk_stream_write_space(sk);
 927        if (sk_stream_is_writeable(sk)) {
 928                set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags);
 929                smp_mb__after_atomic();
 930                /* set SEND_SPACE before sk_stream_write_space clears NOSPACE */
 931                sk_stream_write_space(parent);
 932        }
 933}
 934
 935static struct inet_connection_sock_af_ops *
 936subflow_default_af_ops(struct sock *sk)
 937{
 938#if IS_ENABLED(CONFIG_MPTCP_IPV6)
 939        if (sk->sk_family == AF_INET6)
 940                return &subflow_v6_specific;
 941#endif
 942        return &subflow_specific;
 943}
 944
 945#if IS_ENABLED(CONFIG_MPTCP_IPV6)
 946void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
 947{
 948        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 949        struct inet_connection_sock *icsk = inet_csk(sk);
 950        struct inet_connection_sock_af_ops *target;
 951
 952        target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk);
 953
 954        pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d",
 955                 subflow, sk->sk_family, icsk->icsk_af_ops, target, mapped);
 956
 957        if (likely(icsk->icsk_af_ops == target))
 958                return;
 959
 960        subflow->icsk_af_ops = icsk->icsk_af_ops;
 961        icsk->icsk_af_ops = target;
 962}
 963#endif
 964
 965static void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
 966                                struct sockaddr_storage *addr)
 967{
 968        memset(addr, 0, sizeof(*addr));
 969        addr->ss_family = info->family;
 970        if (addr->ss_family == AF_INET) {
 971                struct sockaddr_in *in_addr = (struct sockaddr_in *)addr;
 972
 973                in_addr->sin_addr = info->addr;
 974                in_addr->sin_port = info->port;
 975        }
 976#if IS_ENABLED(CONFIG_MPTCP_IPV6)
 977        else if (addr->ss_family == AF_INET6) {
 978                struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)addr;
 979
 980                in6_addr->sin6_addr = info->addr6;
 981                in6_addr->sin6_port = info->port;
 982        }
 983#endif
 984}
 985
 986int __mptcp_subflow_connect(struct sock *sk, int ifindex,
 987                            const struct mptcp_addr_info *loc,
 988                            const struct mptcp_addr_info *remote)
 989{
 990        struct mptcp_sock *msk = mptcp_sk(sk);
 991        struct mptcp_subflow_context *subflow;
 992        struct sockaddr_storage addr;
 993        struct socket *sf;
 994        u32 remote_token;
 995        int addrlen;
 996        int err;
 997
 998        if (sk->sk_state != TCP_ESTABLISHED)
 999                return -ENOTCONN;
1000
1001        err = mptcp_subflow_create_socket(sk, &sf);
1002        if (err)
1003                return err;
1004
1005        subflow = mptcp_subflow_ctx(sf->sk);
1006        subflow->remote_key = msk->remote_key;
1007        subflow->local_key = msk->local_key;
1008        subflow->token = msk->token;
1009        mptcp_info2sockaddr(loc, &addr);
1010
1011        addrlen = sizeof(struct sockaddr_in);
1012#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1013        if (loc->family == AF_INET6)
1014                addrlen = sizeof(struct sockaddr_in6);
1015#endif
1016        sf->sk->sk_bound_dev_if = ifindex;
1017        err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
1018        if (err)
1019                goto failed;
1020
1021        mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL);
1022        pr_debug("msk=%p remote_token=%u", msk, remote_token);
1023        subflow->remote_token = remote_token;
1024        subflow->local_id = loc->id;
1025        subflow->request_join = 1;
1026        subflow->request_bkup = 1;
1027        mptcp_info2sockaddr(remote, &addr);
1028
1029        err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
1030        if (err && err != -EINPROGRESS)
1031                goto failed;
1032
1033        spin_lock_bh(&msk->join_list_lock);
1034        list_add_tail(&subflow->node, &msk->join_list);
1035        spin_unlock_bh(&msk->join_list_lock);
1036
1037        return err;
1038
1039failed:
1040        sock_release(sf);
1041        return err;
1042}
1043
1044int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
1045{
1046        struct mptcp_subflow_context *subflow;
1047        struct net *net = sock_net(sk);
1048        struct socket *sf;
1049        int err;
1050
1051        /* un-accepted server sockets can reach here - on bad configuration
1052         * bail early to avoid greater trouble later
1053         */
1054        if (unlikely(!sk->sk_socket))
1055                return -EINVAL;
1056
1057        err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP,
1058                               &sf);
1059        if (err)
1060                return err;
1061
1062        lock_sock(sf->sk);
1063
1064        /* kernel sockets do not by default acquire net ref, but TCP timer
1065         * needs it.
1066         */
1067        sf->sk->sk_net_refcnt = 1;
1068        get_net(net);
1069#ifdef CONFIG_PROC_FS
1070        this_cpu_add(*net->core.sock_inuse, 1);
1071#endif
1072        err = tcp_set_ulp(sf->sk, "mptcp");
1073        release_sock(sf->sk);
1074
1075        if (err) {
1076                sock_release(sf);
1077                return err;
1078        }
1079
1080        /* the newly created socket really belongs to the owning MPTCP master
1081         * socket, even if for additional subflows the allocation is performed
1082         * by a kernel workqueue. Adjust inode references, so that the
1083         * procfs/diag interaces really show this one belonging to the correct
1084         * user.
1085         */
1086        SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino;
1087        SOCK_INODE(sf)->i_uid = SOCK_INODE(sk->sk_socket)->i_uid;
1088        SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid;
1089
1090        subflow = mptcp_subflow_ctx(sf->sk);
1091        pr_debug("subflow=%p", subflow);
1092
1093        *new_sock = sf;
1094        sock_hold(sk);
1095        subflow->conn = sk;
1096
1097        return 0;
1098}
1099
1100static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
1101                                                        gfp_t priority)
1102{
1103        struct inet_connection_sock *icsk = inet_csk(sk);
1104        struct mptcp_subflow_context *ctx;
1105
1106        ctx = kzalloc(sizeof(*ctx), priority);
1107        if (!ctx)
1108                return NULL;
1109
1110        rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
1111        INIT_LIST_HEAD(&ctx->node);
1112
1113        pr_debug("subflow=%p", ctx);
1114
1115        ctx->tcp_sock = sk;
1116
1117        return ctx;
1118}
1119
1120static void __subflow_state_change(struct sock *sk)
1121{
1122        struct socket_wq *wq;
1123
1124        rcu_read_lock();
1125        wq = rcu_dereference(sk->sk_wq);
1126        if (skwq_has_sleeper(wq))
1127                wake_up_interruptible_all(&wq->wait);
1128        rcu_read_unlock();
1129}
1130
1131static bool subflow_is_done(const struct sock *sk)
1132{
1133        return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE;
1134}
1135
1136static void subflow_state_change(struct sock *sk)
1137{
1138        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
1139        struct sock *parent = subflow->conn;
1140
1141        __subflow_state_change(sk);
1142
1143        if (subflow_simultaneous_connect(sk)) {
1144                mptcp_do_fallback(sk);
1145                mptcp_rcv_space_init(mptcp_sk(parent), sk);
1146                pr_fallback(mptcp_sk(parent));
1147                subflow->conn_finished = 1;
1148                if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
1149                        inet_sk_state_store(parent, TCP_ESTABLISHED);
1150                        parent->sk_state_change(parent);
1151                }
1152        }
1153
1154        /* as recvmsg() does not acquire the subflow socket for ssk selection
1155         * a fin packet carrying a DSS can be unnoticed if we don't trigger
1156         * the data available machinery here.
1157         */
1158        if (mptcp_subflow_data_available(sk))
1159                mptcp_data_ready(parent, sk);
1160
1161        if (!(parent->sk_shutdown & RCV_SHUTDOWN) &&
1162            !subflow->rx_eof && subflow_is_done(sk)) {
1163                subflow->rx_eof = 1;
1164                mptcp_subflow_eof(parent);
1165        }
1166}
1167
1168static int subflow_ulp_init(struct sock *sk)
1169{
1170        struct inet_connection_sock *icsk = inet_csk(sk);
1171        struct mptcp_subflow_context *ctx;
1172        struct tcp_sock *tp = tcp_sk(sk);
1173        int err = 0;
1174
1175        /* disallow attaching ULP to a socket unless it has been
1176         * created with sock_create_kern()
1177         */
1178        if (!sk->sk_kern_sock) {
1179                err = -EOPNOTSUPP;
1180                goto out;
1181        }
1182
1183        ctx = subflow_create_ctx(sk, GFP_KERNEL);
1184        if (!ctx) {
1185                err = -ENOMEM;
1186                goto out;
1187        }
1188
1189        pr_debug("subflow=%p, family=%d", ctx, sk->sk_family);
1190
1191        tp->is_mptcp = 1;
1192        ctx->icsk_af_ops = icsk->icsk_af_ops;
1193        icsk->icsk_af_ops = subflow_default_af_ops(sk);
1194        ctx->tcp_data_ready = sk->sk_data_ready;
1195        ctx->tcp_state_change = sk->sk_state_change;
1196        ctx->tcp_write_space = sk->sk_write_space;
1197        sk->sk_data_ready = subflow_data_ready;
1198        sk->sk_write_space = subflow_write_space;
1199        sk->sk_state_change = subflow_state_change;
1200out:
1201        return err;
1202}
1203
1204static void subflow_ulp_release(struct sock *sk)
1205{
1206        struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk);
1207
1208        if (!ctx)
1209                return;
1210
1211        if (ctx->conn)
1212                sock_put(ctx->conn);
1213
1214        kfree_rcu(ctx, rcu);
1215}
1216
1217static void subflow_ulp_clone(const struct request_sock *req,
1218                              struct sock *newsk,
1219                              const gfp_t priority)
1220{
1221        struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
1222        struct mptcp_subflow_context *old_ctx = mptcp_subflow_ctx(newsk);
1223        struct mptcp_subflow_context *new_ctx;
1224
1225        if (!tcp_rsk(req)->is_mptcp ||
1226            (!subflow_req->mp_capable && !subflow_req->mp_join)) {
1227                subflow_ulp_fallback(newsk, old_ctx);
1228                return;
1229        }
1230
1231        new_ctx = subflow_create_ctx(newsk, priority);
1232        if (!new_ctx) {
1233                subflow_ulp_fallback(newsk, old_ctx);
1234                return;
1235        }
1236
1237        new_ctx->conn_finished = 1;
1238        new_ctx->icsk_af_ops = old_ctx->icsk_af_ops;
1239        new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
1240        new_ctx->tcp_state_change = old_ctx->tcp_state_change;
1241        new_ctx->tcp_write_space = old_ctx->tcp_write_space;
1242        new_ctx->rel_write_seq = 1;
1243        new_ctx->tcp_sock = newsk;
1244
1245        if (subflow_req->mp_capable) {
1246                /* see comments in subflow_syn_recv_sock(), MPTCP connection
1247                 * is fully established only after we receive the remote key
1248                 */
1249                new_ctx->mp_capable = 1;
1250                new_ctx->local_key = subflow_req->local_key;
1251                new_ctx->token = subflow_req->token;
1252                new_ctx->ssn_offset = subflow_req->ssn_offset;
1253                new_ctx->idsn = subflow_req->idsn;
1254        } else if (subflow_req->mp_join) {
1255                new_ctx->ssn_offset = subflow_req->ssn_offset;
1256                new_ctx->mp_join = 1;
1257                new_ctx->fully_established = 1;
1258                new_ctx->backup = subflow_req->backup;
1259                new_ctx->local_id = subflow_req->local_id;
1260                new_ctx->token = subflow_req->token;
1261                new_ctx->thmac = subflow_req->thmac;
1262        }
1263}
1264
1265static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
1266        .name           = "mptcp",
1267        .owner          = THIS_MODULE,
1268        .init           = subflow_ulp_init,
1269        .release        = subflow_ulp_release,
1270        .clone          = subflow_ulp_clone,
1271};
1272
1273static int subflow_ops_init(struct request_sock_ops *subflow_ops)
1274{
1275        subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock);
1276        subflow_ops->slab_name = "request_sock_subflow";
1277
1278        subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name,
1279                                              subflow_ops->obj_size, 0,
1280                                              SLAB_ACCOUNT |
1281                                              SLAB_TYPESAFE_BY_RCU,
1282                                              NULL);
1283        if (!subflow_ops->slab)
1284                return -ENOMEM;
1285
1286        subflow_ops->destructor = subflow_req_destructor;
1287
1288        return 0;
1289}
1290
1291void mptcp_subflow_init(void)
1292{
1293        subflow_request_sock_ops = tcp_request_sock_ops;
1294        if (subflow_ops_init(&subflow_request_sock_ops) != 0)
1295                panic("MPTCP: failed to init subflow request sock ops\n");
1296
1297        subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
1298        subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req;
1299
1300        subflow_specific = ipv4_specific;
1301        subflow_specific.conn_request = subflow_v4_conn_request;
1302        subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
1303        subflow_specific.sk_rx_dst_set = subflow_finish_connect;
1304        subflow_specific.rebuild_header = subflow_rebuild_header;
1305
1306#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1307        subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
1308        subflow_request_sock_ipv6_ops.init_req = subflow_v6_init_req;
1309
1310        subflow_v6_specific = ipv6_specific;
1311        subflow_v6_specific.conn_request = subflow_v6_conn_request;
1312        subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock;
1313        subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect;
1314        subflow_v6_specific.rebuild_header = subflow_rebuild_header;
1315
1316        subflow_v6m_specific = subflow_v6_specific;
1317        subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit;
1318        subflow_v6m_specific.send_check = ipv4_specific.send_check;
1319        subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
1320        subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
1321        subflow_v6m_specific.net_frag_header_len = 0;
1322#endif
1323
1324        mptcp_diag_subflow_init(&subflow_ulp_ops);
1325
1326        if (tcp_register_ulp(&subflow_ulp_ops) != 0)
1327                panic("MPTCP: failed to register subflows to ULP\n");
1328}
1329