linux/net/mptcp/options.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/* Multipath TCP
   3 *
   4 * Copyright (c) 2017 - 2019, Intel Corporation.
   5 */
   6
   7#define pr_fmt(fmt) "MPTCP: " fmt
   8
   9#include <linux/kernel.h>
  10#include <crypto/sha2.h>
  11#include <net/tcp.h>
  12#include <net/mptcp.h>
  13#include "protocol.h"
  14#include "mib.h"
  15
  16static bool mptcp_cap_flag_sha256(u8 flags)
  17{
  18        return (flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA256;
  19}
  20
  21static void mptcp_parse_option(const struct sk_buff *skb,
  22                               const unsigned char *ptr, int opsize,
  23                               struct mptcp_options_received *mp_opt)
  24{
  25        u8 subtype = *ptr >> 4;
  26        int expected_opsize;
  27        u8 version;
  28        u8 flags;
  29
  30        switch (subtype) {
  31        case MPTCPOPT_MP_CAPABLE:
  32                /* strict size checking */
  33                if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
  34                        if (skb->len > tcp_hdr(skb)->doff << 2)
  35                                expected_opsize = TCPOLEN_MPTCP_MPC_ACK_DATA;
  36                        else
  37                                expected_opsize = TCPOLEN_MPTCP_MPC_ACK;
  38                } else {
  39                        if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)
  40                                expected_opsize = TCPOLEN_MPTCP_MPC_SYNACK;
  41                        else
  42                                expected_opsize = TCPOLEN_MPTCP_MPC_SYN;
  43                }
  44                if (opsize != expected_opsize)
  45                        break;
  46
  47                /* try to be gentle vs future versions on the initial syn */
  48                version = *ptr++ & MPTCP_VERSION_MASK;
  49                if (opsize != TCPOLEN_MPTCP_MPC_SYN) {
  50                        if (version != MPTCP_SUPPORTED_VERSION)
  51                                break;
  52                } else if (version < MPTCP_SUPPORTED_VERSION) {
  53                        break;
  54                }
  55
  56                flags = *ptr++;
  57                if (!mptcp_cap_flag_sha256(flags) ||
  58                    (flags & MPTCP_CAP_EXTENSIBILITY))
  59                        break;
  60
  61                /* RFC 6824, Section 3.1:
  62                 * "For the Checksum Required bit (labeled "A"), if either
  63                 * host requires the use of checksums, checksums MUST be used.
  64                 * In other words, the only way for checksums not to be used
  65                 * is if both hosts in their SYNs set A=0."
  66                 *
  67                 * Section 3.3.0:
  68                 * "If a checksum is not present when its use has been
  69                 * negotiated, the receiver MUST close the subflow with a RST as
  70                 * it is considered broken."
  71                 *
  72                 * We don't implement DSS checksum - fall back to TCP.
  73                 */
  74                if (flags & MPTCP_CAP_CHECKSUM_REQD)
  75                        break;
  76
  77                mp_opt->mp_capable = 1;
  78                if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
  79                        mp_opt->sndr_key = get_unaligned_be64(ptr);
  80                        ptr += 8;
  81                }
  82                if (opsize >= TCPOLEN_MPTCP_MPC_ACK) {
  83                        mp_opt->rcvr_key = get_unaligned_be64(ptr);
  84                        ptr += 8;
  85                }
  86                if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA) {
  87                        /* Section 3.1.:
  88                         * "the data parameters in a MP_CAPABLE are semantically
  89                         * equivalent to those in a DSS option and can be used
  90                         * interchangeably."
  91                         */
  92                        mp_opt->dss = 1;
  93                        mp_opt->use_map = 1;
  94                        mp_opt->mpc_map = 1;
  95                        mp_opt->data_len = get_unaligned_be16(ptr);
  96                        ptr += 2;
  97                }
  98                pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d",
  99                         version, flags, opsize, mp_opt->sndr_key,
 100                         mp_opt->rcvr_key, mp_opt->data_len);
 101                break;
 102
 103        case MPTCPOPT_MP_JOIN:
 104                mp_opt->mp_join = 1;
 105                if (opsize == TCPOLEN_MPTCP_MPJ_SYN) {
 106                        mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
 107                        mp_opt->join_id = *ptr++;
 108                        mp_opt->token = get_unaligned_be32(ptr);
 109                        ptr += 4;
 110                        mp_opt->nonce = get_unaligned_be32(ptr);
 111                        ptr += 4;
 112                        pr_debug("MP_JOIN bkup=%u, id=%u, token=%u, nonce=%u",
 113                                 mp_opt->backup, mp_opt->join_id,
 114                                 mp_opt->token, mp_opt->nonce);
 115                } else if (opsize == TCPOLEN_MPTCP_MPJ_SYNACK) {
 116                        mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
 117                        mp_opt->join_id = *ptr++;
 118                        mp_opt->thmac = get_unaligned_be64(ptr);
 119                        ptr += 8;
 120                        mp_opt->nonce = get_unaligned_be32(ptr);
 121                        ptr += 4;
 122                        pr_debug("MP_JOIN bkup=%u, id=%u, thmac=%llu, nonce=%u",
 123                                 mp_opt->backup, mp_opt->join_id,
 124                                 mp_opt->thmac, mp_opt->nonce);
 125                } else if (opsize == TCPOLEN_MPTCP_MPJ_ACK) {
 126                        ptr += 2;
 127                        memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN);
 128                        pr_debug("MP_JOIN hmac");
 129                } else {
 130                        pr_warn("MP_JOIN bad option size");
 131                        mp_opt->mp_join = 0;
 132                }
 133                break;
 134
 135        case MPTCPOPT_DSS:
 136                pr_debug("DSS");
 137                ptr++;
 138
 139                /* we must clear 'mpc_map' be able to detect MP_CAPABLE
 140                 * map vs DSS map in mptcp_incoming_options(), and reconstruct
 141                 * map info accordingly
 142                 */
 143                mp_opt->mpc_map = 0;
 144                flags = (*ptr++) & MPTCP_DSS_FLAG_MASK;
 145                mp_opt->data_fin = (flags & MPTCP_DSS_DATA_FIN) != 0;
 146                mp_opt->dsn64 = (flags & MPTCP_DSS_DSN64) != 0;
 147                mp_opt->use_map = (flags & MPTCP_DSS_HAS_MAP) != 0;
 148                mp_opt->ack64 = (flags & MPTCP_DSS_ACK64) != 0;
 149                mp_opt->use_ack = (flags & MPTCP_DSS_HAS_ACK);
 150
 151                pr_debug("data_fin=%d dsn64=%d use_map=%d ack64=%d use_ack=%d",
 152                         mp_opt->data_fin, mp_opt->dsn64,
 153                         mp_opt->use_map, mp_opt->ack64,
 154                         mp_opt->use_ack);
 155
 156                expected_opsize = TCPOLEN_MPTCP_DSS_BASE;
 157
 158                if (mp_opt->use_ack) {
 159                        if (mp_opt->ack64)
 160                                expected_opsize += TCPOLEN_MPTCP_DSS_ACK64;
 161                        else
 162                                expected_opsize += TCPOLEN_MPTCP_DSS_ACK32;
 163                }
 164
 165                if (mp_opt->use_map) {
 166                        if (mp_opt->dsn64)
 167                                expected_opsize += TCPOLEN_MPTCP_DSS_MAP64;
 168                        else
 169                                expected_opsize += TCPOLEN_MPTCP_DSS_MAP32;
 170                }
 171
 172                /* RFC 6824, Section 3.3:
 173                 * If a checksum is present, but its use had
 174                 * not been negotiated in the MP_CAPABLE handshake,
 175                 * the checksum field MUST be ignored.
 176                 */
 177                if (opsize != expected_opsize &&
 178                    opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM)
 179                        break;
 180
 181                mp_opt->dss = 1;
 182
 183                if (mp_opt->use_ack) {
 184                        if (mp_opt->ack64) {
 185                                mp_opt->data_ack = get_unaligned_be64(ptr);
 186                                ptr += 8;
 187                        } else {
 188                                mp_opt->data_ack = get_unaligned_be32(ptr);
 189                                ptr += 4;
 190                        }
 191
 192                        pr_debug("data_ack=%llu", mp_opt->data_ack);
 193                }
 194
 195                if (mp_opt->use_map) {
 196                        if (mp_opt->dsn64) {
 197                                mp_opt->data_seq = get_unaligned_be64(ptr);
 198                                ptr += 8;
 199                        } else {
 200                                mp_opt->data_seq = get_unaligned_be32(ptr);
 201                                ptr += 4;
 202                        }
 203
 204                        mp_opt->subflow_seq = get_unaligned_be32(ptr);
 205                        ptr += 4;
 206
 207                        mp_opt->data_len = get_unaligned_be16(ptr);
 208                        ptr += 2;
 209
 210                        pr_debug("data_seq=%llu subflow_seq=%u data_len=%u",
 211                                 mp_opt->data_seq, mp_opt->subflow_seq,
 212                                 mp_opt->data_len);
 213                }
 214
 215                break;
 216
 217        case MPTCPOPT_ADD_ADDR:
 218                mp_opt->echo = (*ptr++) & MPTCP_ADDR_ECHO;
 219                if (!mp_opt->echo) {
 220                        if (opsize == TCPOLEN_MPTCP_ADD_ADDR ||
 221                            opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT)
 222                                mp_opt->family = MPTCP_ADDR_IPVERSION_4;
 223#if IS_ENABLED(CONFIG_MPTCP_IPV6)
 224                        else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6 ||
 225                                 opsize == TCPOLEN_MPTCP_ADD_ADDR6_PORT)
 226                                mp_opt->family = MPTCP_ADDR_IPVERSION_6;
 227#endif
 228                        else
 229                                break;
 230                } else {
 231                        if (opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE ||
 232                            opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT)
 233                                mp_opt->family = MPTCP_ADDR_IPVERSION_4;
 234#if IS_ENABLED(CONFIG_MPTCP_IPV6)
 235                        else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE ||
 236                                 opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT)
 237                                mp_opt->family = MPTCP_ADDR_IPVERSION_6;
 238#endif
 239                        else
 240                                break;
 241                }
 242
 243                mp_opt->add_addr = 1;
 244                mp_opt->addr_id = *ptr++;
 245                if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) {
 246                        memcpy((u8 *)&mp_opt->addr.s_addr, (u8 *)ptr, 4);
 247                        ptr += 4;
 248                        if (opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT ||
 249                            opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT) {
 250                                mp_opt->port = get_unaligned_be16(ptr);
 251                                ptr += 2;
 252                        }
 253                }
 254#if IS_ENABLED(CONFIG_MPTCP_IPV6)
 255                else {
 256                        memcpy(mp_opt->addr6.s6_addr, (u8 *)ptr, 16);
 257                        ptr += 16;
 258                        if (opsize == TCPOLEN_MPTCP_ADD_ADDR6_PORT ||
 259                            opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT) {
 260                                mp_opt->port = get_unaligned_be16(ptr);
 261                                ptr += 2;
 262                        }
 263                }
 264#endif
 265                if (!mp_opt->echo) {
 266                        mp_opt->ahmac = get_unaligned_be64(ptr);
 267                        ptr += 8;
 268                }
 269                pr_debug("ADD_ADDR%s: id=%d, ahmac=%llu, echo=%d, port=%d",
 270                         (mp_opt->family == MPTCP_ADDR_IPVERSION_6) ? "6" : "",
 271                         mp_opt->addr_id, mp_opt->ahmac, mp_opt->echo, mp_opt->port);
 272                break;
 273
 274        case MPTCPOPT_RM_ADDR:
 275                if (opsize != TCPOLEN_MPTCP_RM_ADDR_BASE)
 276                        break;
 277
 278                ptr++;
 279
 280                mp_opt->rm_addr = 1;
 281                mp_opt->rm_id = *ptr++;
 282                pr_debug("RM_ADDR: id=%d", mp_opt->rm_id);
 283                break;
 284
 285        case MPTCPOPT_MP_PRIO:
 286                if (opsize != TCPOLEN_MPTCP_PRIO)
 287                        break;
 288
 289                mp_opt->mp_prio = 1;
 290                mp_opt->backup = *ptr++ & MPTCP_PRIO_BKUP;
 291                pr_debug("MP_PRIO: prio=%d", mp_opt->backup);
 292                break;
 293
 294        case MPTCPOPT_MP_FASTCLOSE:
 295                if (opsize != TCPOLEN_MPTCP_FASTCLOSE)
 296                        break;
 297
 298                ptr += 2;
 299                mp_opt->rcvr_key = get_unaligned_be64(ptr);
 300                ptr += 8;
 301                mp_opt->fastclose = 1;
 302                break;
 303
 304        default:
 305                break;
 306        }
 307}
 308
 309void mptcp_get_options(const struct sk_buff *skb,
 310                       struct mptcp_options_received *mp_opt)
 311{
 312        const struct tcphdr *th = tcp_hdr(skb);
 313        const unsigned char *ptr;
 314        int length;
 315
 316        /* initialize option status */
 317        mp_opt->mp_capable = 0;
 318        mp_opt->mp_join = 0;
 319        mp_opt->add_addr = 0;
 320        mp_opt->ahmac = 0;
 321        mp_opt->fastclose = 0;
 322        mp_opt->port = 0;
 323        mp_opt->rm_addr = 0;
 324        mp_opt->dss = 0;
 325        mp_opt->mp_prio = 0;
 326
 327        length = (th->doff * 4) - sizeof(struct tcphdr);
 328        ptr = (const unsigned char *)(th + 1);
 329
 330        while (length > 0) {
 331                int opcode = *ptr++;
 332                int opsize;
 333
 334                switch (opcode) {
 335                case TCPOPT_EOL:
 336                        return;
 337                case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
 338                        length--;
 339                        continue;
 340                default:
 341                        opsize = *ptr++;
 342                        if (opsize < 2) /* "silly options" */
 343                                return;
 344                        if (opsize > length)
 345                                return; /* don't parse partial options */
 346                        if (opcode == TCPOPT_MPTCP)
 347                                mptcp_parse_option(skb, ptr, opsize, mp_opt);
 348                        ptr += opsize - 2;
 349                        length -= opsize;
 350                }
 351        }
 352}
 353
 354bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
 355                       unsigned int *size, struct mptcp_out_options *opts)
 356{
 357        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 358
 359        /* we will use snd_isn to detect first pkt [re]transmission
 360         * in mptcp_established_options_mp()
 361         */
 362        subflow->snd_isn = TCP_SKB_CB(skb)->end_seq;
 363        if (subflow->request_mptcp) {
 364                opts->suboptions = OPTION_MPTCP_MPC_SYN;
 365                *size = TCPOLEN_MPTCP_MPC_SYN;
 366                return true;
 367        } else if (subflow->request_join) {
 368                pr_debug("remote_token=%u, nonce=%u", subflow->remote_token,
 369                         subflow->local_nonce);
 370                opts->suboptions = OPTION_MPTCP_MPJ_SYN;
 371                opts->join_id = subflow->local_id;
 372                opts->token = subflow->remote_token;
 373                opts->nonce = subflow->local_nonce;
 374                opts->backup = subflow->request_bkup;
 375                *size = TCPOLEN_MPTCP_MPJ_SYN;
 376                return true;
 377        }
 378        return false;
 379}
 380
 381/* MP_JOIN client subflow must wait for 4th ack before sending any data:
 382 * TCP can't schedule delack timer before the subflow is fully established.
 383 * MPTCP uses the delack timer to do 3rd ack retransmissions
 384 */
 385static void schedule_3rdack_retransmission(struct sock *sk)
 386{
 387        struct inet_connection_sock *icsk = inet_csk(sk);
 388        struct tcp_sock *tp = tcp_sk(sk);
 389        unsigned long timeout;
 390
 391        /* reschedule with a timeout above RTT, as we must look only for drop */
 392        if (tp->srtt_us)
 393                timeout = tp->srtt_us << 1;
 394        else
 395                timeout = TCP_TIMEOUT_INIT;
 396
 397        WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER);
 398        icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
 399        icsk->icsk_ack.timeout = timeout;
 400        sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
 401}
 402
 403static void clear_3rdack_retransmission(struct sock *sk)
 404{
 405        struct inet_connection_sock *icsk = inet_csk(sk);
 406
 407        sk_stop_timer(sk, &icsk->icsk_delack_timer);
 408        icsk->icsk_ack.timeout = 0;
 409        icsk->icsk_ack.ato = 0;
 410        icsk->icsk_ack.pending &= ~(ICSK_ACK_SCHED | ICSK_ACK_TIMER);
 411}
 412
 413static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
 414                                         bool snd_data_fin_enable,
 415                                         unsigned int *size,
 416                                         unsigned int remaining,
 417                                         struct mptcp_out_options *opts)
 418{
 419        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 420        struct mptcp_ext *mpext;
 421        unsigned int data_len;
 422
 423        /* When skb is not available, we better over-estimate the emitted
 424         * options len. A full DSS option (28 bytes) is longer than
 425         * TCPOLEN_MPTCP_MPC_ACK_DATA(22) or TCPOLEN_MPTCP_MPJ_ACK(24), so
 426         * tell the caller to defer the estimate to
 427         * mptcp_established_options_dss(), which will reserve enough space.
 428         */
 429        if (!skb)
 430                return false;
 431
 432        /* MPC/MPJ needed only on 3rd ack packet, DATA_FIN and TCP shutdown take precedence */
 433        if (subflow->fully_established || snd_data_fin_enable ||
 434            subflow->snd_isn != TCP_SKB_CB(skb)->seq ||
 435            sk->sk_state != TCP_ESTABLISHED)
 436                return false;
 437
 438        if (subflow->mp_capable) {
 439                mpext = mptcp_get_ext(skb);
 440                data_len = mpext ? mpext->data_len : 0;
 441
 442                /* we will check ext_copy.data_len in mptcp_write_options() to
 443                 * discriminate between TCPOLEN_MPTCP_MPC_ACK_DATA and
 444                 * TCPOLEN_MPTCP_MPC_ACK
 445                 */
 446                opts->ext_copy.data_len = data_len;
 447                opts->suboptions = OPTION_MPTCP_MPC_ACK;
 448                opts->sndr_key = subflow->local_key;
 449                opts->rcvr_key = subflow->remote_key;
 450
 451                /* Section 3.1.
 452                 * The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK
 453                 * packets that start the first subflow of an MPTCP connection,
 454                 * as well as the first packet that carries data
 455                 */
 456                if (data_len > 0)
 457                        *size = ALIGN(TCPOLEN_MPTCP_MPC_ACK_DATA, 4);
 458                else
 459                        *size = TCPOLEN_MPTCP_MPC_ACK;
 460
 461                pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d",
 462                         subflow, subflow->local_key, subflow->remote_key,
 463                         data_len);
 464
 465                return true;
 466        } else if (subflow->mp_join) {
 467                opts->suboptions = OPTION_MPTCP_MPJ_ACK;
 468                memcpy(opts->hmac, subflow->hmac, MPTCPOPT_HMAC_LEN);
 469                *size = TCPOLEN_MPTCP_MPJ_ACK;
 470                pr_debug("subflow=%p", subflow);
 471
 472                schedule_3rdack_retransmission(sk);
 473                return true;
 474        }
 475        return false;
 476}
 477
 478static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
 479                                 struct sk_buff *skb, struct mptcp_ext *ext)
 480{
 481        /* The write_seq value has already been incremented, so the actual
 482         * sequence number for the DATA_FIN is one less.
 483         */
 484        u64 data_fin_tx_seq = READ_ONCE(mptcp_sk(subflow->conn)->write_seq) - 1;
 485
 486        if (!ext->use_map || !skb->len) {
 487                /* RFC6824 requires a DSS mapping with specific values
 488                 * if DATA_FIN is set but no data payload is mapped
 489                 */
 490                ext->data_fin = 1;
 491                ext->use_map = 1;
 492                ext->dsn64 = 1;
 493                ext->data_seq = data_fin_tx_seq;
 494                ext->subflow_seq = 0;
 495                ext->data_len = 1;
 496        } else if (ext->data_seq + ext->data_len == data_fin_tx_seq) {
 497                /* If there's an existing DSS mapping and it is the
 498                 * final mapping, DATA_FIN consumes 1 additional byte of
 499                 * mapping space.
 500                 */
 501                ext->data_fin = 1;
 502                ext->data_len++;
 503        }
 504}
 505
 506static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
 507                                          bool snd_data_fin_enable,
 508                                          unsigned int *size,
 509                                          unsigned int remaining,
 510                                          struct mptcp_out_options *opts)
 511{
 512        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 513        struct mptcp_sock *msk = mptcp_sk(subflow->conn);
 514        unsigned int dss_size = 0;
 515        struct mptcp_ext *mpext;
 516        unsigned int ack_size;
 517        bool ret = false;
 518        u64 ack_seq;
 519
 520        mpext = skb ? mptcp_get_ext(skb) : NULL;
 521
 522        if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) {
 523                unsigned int map_size;
 524
 525                map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
 526
 527                remaining -= map_size;
 528                dss_size = map_size;
 529                if (mpext)
 530                        opts->ext_copy = *mpext;
 531
 532                if (skb && snd_data_fin_enable)
 533                        mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
 534                ret = true;
 535        }
 536
 537        /* passive sockets msk will set the 'can_ack' after accept(), even
 538         * if the first subflow may have the already the remote key handy
 539         */
 540        opts->ext_copy.use_ack = 0;
 541        if (!READ_ONCE(msk->can_ack)) {
 542                *size = ALIGN(dss_size, 4);
 543                return ret;
 544        }
 545
 546        ack_seq = READ_ONCE(msk->ack_seq);
 547        if (READ_ONCE(msk->use_64bit_ack)) {
 548                ack_size = TCPOLEN_MPTCP_DSS_ACK64;
 549                opts->ext_copy.data_ack = ack_seq;
 550                opts->ext_copy.ack64 = 1;
 551        } else {
 552                ack_size = TCPOLEN_MPTCP_DSS_ACK32;
 553                opts->ext_copy.data_ack32 = (uint32_t)ack_seq;
 554                opts->ext_copy.ack64 = 0;
 555        }
 556        opts->ext_copy.use_ack = 1;
 557        WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk));
 558
 559        /* Add kind/length/subtype/flag overhead if mapping is not populated */
 560        if (dss_size == 0)
 561                ack_size += TCPOLEN_MPTCP_DSS_BASE;
 562
 563        dss_size += ack_size;
 564
 565        *size = ALIGN(dss_size, 4);
 566        return true;
 567}
 568
 569static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id,
 570                                  struct in_addr *addr, u16 port)
 571{
 572        u8 hmac[SHA256_DIGEST_SIZE];
 573        u8 msg[7];
 574
 575        msg[0] = addr_id;
 576        memcpy(&msg[1], &addr->s_addr, 4);
 577        msg[5] = port >> 8;
 578        msg[6] = port & 0xFF;
 579
 580        mptcp_crypto_hmac_sha(key1, key2, msg, 7, hmac);
 581
 582        return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]);
 583}
 584
 585#if IS_ENABLED(CONFIG_MPTCP_IPV6)
 586static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id,
 587                                   struct in6_addr *addr, u16 port)
 588{
 589        u8 hmac[SHA256_DIGEST_SIZE];
 590        u8 msg[19];
 591
 592        msg[0] = addr_id;
 593        memcpy(&msg[1], &addr->s6_addr, 16);
 594        msg[17] = port >> 8;
 595        msg[18] = port & 0xFF;
 596
 597        mptcp_crypto_hmac_sha(key1, key2, msg, 19, hmac);
 598
 599        return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]);
 600}
 601#endif
 602
 603static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *skb,
 604                                               unsigned int *size,
 605                                               unsigned int remaining,
 606                                               struct mptcp_out_options *opts)
 607{
 608        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 609        struct mptcp_sock *msk = mptcp_sk(subflow->conn);
 610        bool drop_other_suboptions = false;
 611        unsigned int opt_size = *size;
 612        struct mptcp_addr_info saddr;
 613        bool echo;
 614        bool port;
 615        int len;
 616
 617        if ((mptcp_pm_should_add_signal_ipv6(msk) ||
 618             mptcp_pm_should_add_signal_port(msk)) &&
 619            skb && skb_is_tcp_pure_ack(skb)) {
 620                pr_debug("drop other suboptions");
 621                opts->suboptions = 0;
 622                opts->ext_copy.use_ack = 0;
 623                opts->ext_copy.use_map = 0;
 624                remaining += opt_size;
 625                drop_other_suboptions = true;
 626        }
 627
 628        if (!mptcp_pm_should_add_signal(msk) ||
 629            !(mptcp_pm_add_addr_signal(msk, remaining, &saddr, &echo, &port)))
 630                return false;
 631
 632        len = mptcp_add_addr_len(saddr.family, echo, port);
 633        if (remaining < len)
 634                return false;
 635
 636        *size = len;
 637        if (drop_other_suboptions)
 638                *size -= opt_size;
 639        opts->addr_id = saddr.id;
 640        if (port)
 641                opts->port = ntohs(saddr.port);
 642        if (saddr.family == AF_INET) {
 643                opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
 644                opts->addr = saddr.addr;
 645                if (!echo) {
 646                        opts->ahmac = add_addr_generate_hmac(msk->local_key,
 647                                                             msk->remote_key,
 648                                                             opts->addr_id,
 649                                                             &opts->addr,
 650                                                             opts->port);
 651                }
 652        }
 653#if IS_ENABLED(CONFIG_MPTCP_IPV6)
 654        else if (saddr.family == AF_INET6) {
 655                opts->suboptions |= OPTION_MPTCP_ADD_ADDR6;
 656                opts->addr6 = saddr.addr6;
 657                if (!echo) {
 658                        opts->ahmac = add_addr6_generate_hmac(msk->local_key,
 659                                                              msk->remote_key,
 660                                                              opts->addr_id,
 661                                                              &opts->addr6,
 662                                                              opts->port);
 663                }
 664        }
 665#endif
 666        pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d",
 667                 opts->addr_id, opts->ahmac, echo, opts->port);
 668
 669        return true;
 670}
 671
 672static bool mptcp_established_options_rm_addr(struct sock *sk,
 673                                              unsigned int *size,
 674                                              unsigned int remaining,
 675                                              struct mptcp_out_options *opts)
 676{
 677        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 678        struct mptcp_sock *msk = mptcp_sk(subflow->conn);
 679        u8 rm_id;
 680
 681        if (!mptcp_pm_should_rm_signal(msk) ||
 682            !(mptcp_pm_rm_addr_signal(msk, remaining, &rm_id)))
 683                return false;
 684
 685        if (remaining < TCPOLEN_MPTCP_RM_ADDR_BASE)
 686                return false;
 687
 688        *size = TCPOLEN_MPTCP_RM_ADDR_BASE;
 689        opts->suboptions |= OPTION_MPTCP_RM_ADDR;
 690        opts->rm_id = rm_id;
 691
 692        pr_debug("rm_id=%d", opts->rm_id);
 693
 694        return true;
 695}
 696
 697static bool mptcp_established_options_mp_prio(struct sock *sk,
 698                                              unsigned int *size,
 699                                              unsigned int remaining,
 700                                              struct mptcp_out_options *opts)
 701{
 702        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 703
 704        if (!subflow->send_mp_prio)
 705                return false;
 706
 707        /* account for the trailing 'nop' option */
 708        if (remaining < TCPOLEN_MPTCP_PRIO_ALIGN)
 709                return false;
 710
 711        *size = TCPOLEN_MPTCP_PRIO_ALIGN;
 712        opts->suboptions |= OPTION_MPTCP_PRIO;
 713        opts->backup = subflow->request_bkup;
 714
 715        pr_debug("prio=%d", opts->backup);
 716
 717        return true;
 718}
 719
 720bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
 721                               unsigned int *size, unsigned int remaining,
 722                               struct mptcp_out_options *opts)
 723{
 724        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 725        struct mptcp_sock *msk = mptcp_sk(subflow->conn);
 726        unsigned int opt_size = 0;
 727        bool snd_data_fin;
 728        bool ret = false;
 729
 730        opts->suboptions = 0;
 731
 732        if (unlikely(__mptcp_check_fallback(msk)))
 733                return false;
 734
 735        /* prevent adding of any MPTCP related options on reset packet
 736         * until we support MP_TCPRST/MP_FASTCLOSE
 737         */
 738        if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST))
 739                return false;
 740
 741        snd_data_fin = mptcp_data_fin_enabled(msk);
 742        if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts))
 743                ret = true;
 744        else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts))
 745                ret = true;
 746
 747        /* we reserved enough space for the above options, and exceeding the
 748         * TCP option space would be fatal
 749         */
 750        if (WARN_ON_ONCE(opt_size > remaining))
 751                return false;
 752
 753        *size += opt_size;
 754        remaining -= opt_size;
 755        if (mptcp_established_options_add_addr(sk, skb, &opt_size, remaining, opts)) {
 756                *size += opt_size;
 757                remaining -= opt_size;
 758                ret = true;
 759        } else if (mptcp_established_options_rm_addr(sk, &opt_size, remaining, opts)) {
 760                *size += opt_size;
 761                remaining -= opt_size;
 762                ret = true;
 763        }
 764
 765        if (mptcp_established_options_mp_prio(sk, &opt_size, remaining, opts)) {
 766                *size += opt_size;
 767                remaining -= opt_size;
 768                ret = true;
 769        }
 770
 771        return ret;
 772}
 773
 774bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
 775                          struct mptcp_out_options *opts)
 776{
 777        struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
 778
 779        if (subflow_req->mp_capable) {
 780                opts->suboptions = OPTION_MPTCP_MPC_SYNACK;
 781                opts->sndr_key = subflow_req->local_key;
 782                *size = TCPOLEN_MPTCP_MPC_SYNACK;
 783                pr_debug("subflow_req=%p, local_key=%llu",
 784                         subflow_req, subflow_req->local_key);
 785                return true;
 786        } else if (subflow_req->mp_join) {
 787                opts->suboptions = OPTION_MPTCP_MPJ_SYNACK;
 788                opts->backup = subflow_req->backup;
 789                opts->join_id = subflow_req->local_id;
 790                opts->thmac = subflow_req->thmac;
 791                opts->nonce = subflow_req->local_nonce;
 792                pr_debug("req=%p, bkup=%u, id=%u, thmac=%llu, nonce=%u",
 793                         subflow_req, opts->backup, opts->join_id,
 794                         opts->thmac, opts->nonce);
 795                *size = TCPOLEN_MPTCP_MPJ_SYNACK;
 796                return true;
 797        }
 798        return false;
 799}
 800
 801static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
 802                                    struct mptcp_subflow_context *subflow,
 803                                    struct sk_buff *skb,
 804                                    struct mptcp_options_received *mp_opt)
 805{
 806        /* here we can process OoO, in-window pkts, only in-sequence 4th ack
 807         * will make the subflow fully established
 808         */
 809        if (likely(subflow->fully_established)) {
 810                /* on passive sockets, check for 3rd ack retransmission
 811                 * note that msk is always set by subflow_syn_recv_sock()
 812                 * for mp_join subflows
 813                 */
 814                if (TCP_SKB_CB(skb)->seq == subflow->ssn_offset + 1 &&
 815                    TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq &&
 816                    subflow->mp_join && mp_opt->mp_join &&
 817                    READ_ONCE(msk->pm.server_side))
 818                        tcp_send_ack(ssk);
 819                goto fully_established;
 820        }
 821
 822        /* we must process OoO packets before the first subflow is fully
 823         * established. OoO packets are instead a protocol violation
 824         * for MP_JOIN subflows as the peer must not send any data
 825         * before receiving the forth ack - cfr. RFC 8684 section 3.2.
 826         */
 827        if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1) {
 828                if (subflow->mp_join)
 829                        goto reset;
 830                return subflow->mp_capable;
 831        }
 832
 833        if (mp_opt->dss && mp_opt->use_ack) {
 834                /* subflows are fully established as soon as we get any
 835                 * additional ack.
 836                 */
 837                subflow->fully_established = 1;
 838                WRITE_ONCE(msk->fully_established, true);
 839                goto fully_established;
 840        }
 841
 842        if (mp_opt->add_addr) {
 843                WRITE_ONCE(msk->fully_established, true);
 844                return true;
 845        }
 846
 847        /* If the first established packet does not contain MP_CAPABLE + data
 848         * then fallback to TCP. Fallback scenarios requires a reset for
 849         * MP_JOIN subflows.
 850         */
 851        if (!mp_opt->mp_capable) {
 852                if (subflow->mp_join)
 853                        goto reset;
 854                subflow->mp_capable = 0;
 855                pr_fallback(msk);
 856                __mptcp_do_fallback(msk);
 857                return false;
 858        }
 859
 860        if (unlikely(!READ_ONCE(msk->pm.server_side)))
 861                pr_warn_once("bogus mpc option on established client sk");
 862        mptcp_subflow_fully_established(subflow, mp_opt);
 863
 864fully_established:
 865        /* if the subflow is not already linked into the conn_list, we can't
 866         * notify the PM: this subflow is still on the listener queue
 867         * and the PM possibly acquiring the subflow lock could race with
 868         * the listener close
 869         */
 870        if (likely(subflow->pm_notified) || list_empty(&subflow->node))
 871                return true;
 872
 873        subflow->pm_notified = 1;
 874        if (subflow->mp_join) {
 875                clear_3rdack_retransmission(ssk);
 876                mptcp_pm_subflow_established(msk, subflow);
 877        } else {
 878                mptcp_pm_fully_established(msk, ssk, GFP_ATOMIC);
 879        }
 880        return true;
 881
 882reset:
 883        mptcp_subflow_reset(ssk);
 884        return false;
 885}
 886
 887static u64 expand_ack(u64 old_ack, u64 cur_ack, bool use_64bit)
 888{
 889        u32 old_ack32, cur_ack32;
 890
 891        if (use_64bit)
 892                return cur_ack;
 893
 894        old_ack32 = (u32)old_ack;
 895        cur_ack32 = (u32)cur_ack;
 896        cur_ack = (old_ack & GENMASK_ULL(63, 32)) + cur_ack32;
 897        if (unlikely(before(cur_ack32, old_ack32)))
 898                return cur_ack + (1LL << 32);
 899        return cur_ack;
 900}
 901
 902static void ack_update_msk(struct mptcp_sock *msk,
 903                           struct sock *ssk,
 904                           struct mptcp_options_received *mp_opt)
 905{
 906        u64 new_wnd_end, new_snd_una, snd_nxt = READ_ONCE(msk->snd_nxt);
 907        struct sock *sk = (struct sock *)msk;
 908        u64 old_snd_una;
 909
 910        mptcp_data_lock(sk);
 911
 912        /* avoid ack expansion on update conflict, to reduce the risk of
 913         * wrongly expanding to a future ack sequence number, which is way
 914         * more dangerous than missing an ack
 915         */
 916        old_snd_una = msk->snd_una;
 917        new_snd_una = expand_ack(old_snd_una, mp_opt->data_ack, mp_opt->ack64);
 918
 919        /* ACK for data not even sent yet? Ignore. */
 920        if (after64(new_snd_una, snd_nxt))
 921                new_snd_una = old_snd_una;
 922
 923        new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;
 924
 925        if (after64(new_wnd_end, msk->wnd_end))
 926                msk->wnd_end = new_wnd_end;
 927
 928        /* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
 929        if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)))
 930                __mptcp_check_push(sk, ssk);
 931
 932        if (after64(new_snd_una, old_snd_una)) {
 933                msk->snd_una = new_snd_una;
 934                __mptcp_data_acked(sk);
 935        }
 936        mptcp_data_unlock(sk);
 937}
 938
 939bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit)
 940{
 941        /* Skip if DATA_FIN was already received.
 942         * If updating simultaneously with the recvmsg loop, values
 943         * should match. If they mismatch, the peer is misbehaving and
 944         * we will prefer the most recent information.
 945         */
 946        if (READ_ONCE(msk->rcv_data_fin) || !READ_ONCE(msk->first))
 947                return false;
 948
 949        WRITE_ONCE(msk->rcv_data_fin_seq,
 950                   expand_ack(READ_ONCE(msk->ack_seq), data_fin_seq, use_64bit));
 951        WRITE_ONCE(msk->rcv_data_fin, 1);
 952
 953        return true;
 954}
 955
 956static bool add_addr_hmac_valid(struct mptcp_sock *msk,
 957                                struct mptcp_options_received *mp_opt)
 958{
 959        u64 hmac = 0;
 960
 961        if (mp_opt->echo)
 962                return true;
 963
 964        if (mp_opt->family == MPTCP_ADDR_IPVERSION_4)
 965                hmac = add_addr_generate_hmac(msk->remote_key,
 966                                              msk->local_key,
 967                                              mp_opt->addr_id, &mp_opt->addr,
 968                                              mp_opt->port);
 969#if IS_ENABLED(CONFIG_MPTCP_IPV6)
 970        else
 971                hmac = add_addr6_generate_hmac(msk->remote_key,
 972                                               msk->local_key,
 973                                               mp_opt->addr_id, &mp_opt->addr6,
 974                                               mp_opt->port);
 975#endif
 976
 977        pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n",
 978                 msk, (unsigned long long)hmac,
 979                 (unsigned long long)mp_opt->ahmac);
 980
 981        return hmac == mp_opt->ahmac;
 982}
 983
 984void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
 985{
 986        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 987        struct mptcp_sock *msk = mptcp_sk(subflow->conn);
 988        struct mptcp_options_received mp_opt;
 989        struct mptcp_ext *mpext;
 990
 991        if (__mptcp_check_fallback(msk)) {
 992                /* Keep it simple and unconditionally trigger send data cleanup and
 993                 * pending queue spooling. We will need to acquire the data lock
 994                 * for more accurate checks, and once the lock is acquired, such
 995                 * helpers are cheap.
 996                 */
 997                mptcp_data_lock(subflow->conn);
 998                if (sk_stream_memory_free(sk))
 999                        __mptcp_check_push(subflow->conn, sk);
1000                __mptcp_data_acked(subflow->conn);
1001                mptcp_data_unlock(subflow->conn);
1002                return;
1003        }
1004
1005        mptcp_get_options(skb, &mp_opt);
1006        if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
1007                return;
1008
1009        if (mp_opt.fastclose &&
1010            msk->local_key == mp_opt.rcvr_key) {
1011                WRITE_ONCE(msk->rcv_fastclose, true);
1012                mptcp_schedule_work((struct sock *)msk);
1013        }
1014
1015        if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) {
1016                struct mptcp_addr_info addr;
1017
1018                addr.port = htons(mp_opt.port);
1019                addr.id = mp_opt.addr_id;
1020                if (mp_opt.family == MPTCP_ADDR_IPVERSION_4) {
1021                        addr.family = AF_INET;
1022                        addr.addr = mp_opt.addr;
1023                }
1024#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1025                else if (mp_opt.family == MPTCP_ADDR_IPVERSION_6) {
1026                        addr.family = AF_INET6;
1027                        addr.addr6 = mp_opt.addr6;
1028                }
1029#endif
1030                if (!mp_opt.echo) {
1031                        mptcp_pm_add_addr_received(msk, &addr);
1032                        MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR);
1033                } else {
1034                        mptcp_pm_del_add_timer(msk, &addr);
1035                        MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD);
1036                }
1037
1038                if (mp_opt.port)
1039                        MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PORTADD);
1040
1041                mp_opt.add_addr = 0;
1042        }
1043
1044        if (mp_opt.rm_addr) {
1045                mptcp_pm_rm_addr_received(msk, mp_opt.rm_id);
1046                mp_opt.rm_addr = 0;
1047        }
1048
1049        if (mp_opt.mp_prio) {
1050                mptcp_pm_mp_prio_received(sk, mp_opt.backup);
1051                MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIORX);
1052                mp_opt.mp_prio = 0;
1053        }
1054
1055        if (!mp_opt.dss)
1056                return;
1057
1058        /* we can't wait for recvmsg() to update the ack_seq, otherwise
1059         * monodirectional flows will stuck
1060         */
1061        if (mp_opt.use_ack)
1062                ack_update_msk(msk, sk, &mp_opt);
1063
1064        /* Zero-data-length packets are dropped by the caller and not
1065         * propagated to the MPTCP layer, so the skb extension does not
1066         * need to be allocated or populated. DATA_FIN information, if
1067         * present, needs to be updated here before the skb is freed.
1068         */
1069        if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
1070                if (mp_opt.data_fin && mp_opt.data_len == 1 &&
1071                    mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64) &&
1072                    schedule_work(&msk->work))
1073                        sock_hold(subflow->conn);
1074
1075                return;
1076        }
1077
1078        mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
1079        if (!mpext)
1080                return;
1081
1082        memset(mpext, 0, sizeof(*mpext));
1083
1084        if (mp_opt.use_map) {
1085                if (mp_opt.mpc_map) {
1086                        /* this is an MP_CAPABLE carrying MPTCP data
1087                         * we know this map the first chunk of data
1088                         */
1089                        mptcp_crypto_key_sha(subflow->remote_key, NULL,
1090                                             &mpext->data_seq);
1091                        mpext->data_seq++;
1092                        mpext->subflow_seq = 1;
1093                        mpext->dsn64 = 1;
1094                        mpext->mpc_map = 1;
1095                        mpext->data_fin = 0;
1096                } else {
1097                        mpext->data_seq = mp_opt.data_seq;
1098                        mpext->subflow_seq = mp_opt.subflow_seq;
1099                        mpext->dsn64 = mp_opt.dsn64;
1100                        mpext->data_fin = mp_opt.data_fin;
1101                }
1102                mpext->data_len = mp_opt.data_len;
1103                mpext->use_map = 1;
1104        }
1105}
1106
1107static void mptcp_set_rwin(const struct tcp_sock *tp)
1108{
1109        const struct sock *ssk = (const struct sock *)tp;
1110        const struct mptcp_subflow_context *subflow;
1111        struct mptcp_sock *msk;
1112        u64 ack_seq;
1113
1114        subflow = mptcp_subflow_ctx(ssk);
1115        msk = mptcp_sk(subflow->conn);
1116
1117        ack_seq = READ_ONCE(msk->ack_seq) + tp->rcv_wnd;
1118
1119        if (after64(ack_seq, READ_ONCE(msk->rcv_wnd_sent)))
1120                WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
1121}
1122
1123void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
1124                         struct mptcp_out_options *opts)
1125{
1126        if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
1127             OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
1128                u8 len;
1129
1130                if (OPTION_MPTCP_MPC_SYN & opts->suboptions)
1131                        len = TCPOLEN_MPTCP_MPC_SYN;
1132                else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions)
1133                        len = TCPOLEN_MPTCP_MPC_SYNACK;
1134                else if (opts->ext_copy.data_len)
1135                        len = TCPOLEN_MPTCP_MPC_ACK_DATA;
1136                else
1137                        len = TCPOLEN_MPTCP_MPC_ACK;
1138
1139                *ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len,
1140                                      MPTCP_SUPPORTED_VERSION,
1141                                      MPTCP_CAP_HMAC_SHA256);
1142
1143                if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) &
1144                    opts->suboptions))
1145                        goto mp_capable_done;
1146
1147                put_unaligned_be64(opts->sndr_key, ptr);
1148                ptr += 2;
1149                if (!((OPTION_MPTCP_MPC_ACK) & opts->suboptions))
1150                        goto mp_capable_done;
1151
1152                put_unaligned_be64(opts->rcvr_key, ptr);
1153                ptr += 2;
1154                if (!opts->ext_copy.data_len)
1155                        goto mp_capable_done;
1156
1157                put_unaligned_be32(opts->ext_copy.data_len << 16 |
1158                                   TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
1159                ptr += 1;
1160        }
1161
1162mp_capable_done:
1163        if ((OPTION_MPTCP_ADD_ADDR
1164#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1165             | OPTION_MPTCP_ADD_ADDR6
1166#endif
1167            ) & opts->suboptions) {
1168                u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE;
1169                u8 echo = MPTCP_ADDR_ECHO;
1170
1171#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1172                if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions)
1173                        len = TCPOLEN_MPTCP_ADD_ADDR6_BASE;
1174#endif
1175
1176                if (opts->port)
1177                        len += TCPOLEN_MPTCP_PORT_LEN;
1178
1179                if (opts->ahmac) {
1180                        len += sizeof(opts->ahmac);
1181                        echo = 0;
1182                }
1183
1184                *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR,
1185                                      len, echo, opts->addr_id);
1186                if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
1187                        memcpy((u8 *)ptr, (u8 *)&opts->addr.s_addr, 4);
1188                        ptr += 1;
1189                }
1190#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1191                else if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions) {
1192                        memcpy((u8 *)ptr, opts->addr6.s6_addr, 16);
1193                        ptr += 4;
1194                }
1195#endif
1196
1197                if (!opts->port) {
1198                        if (opts->ahmac) {
1199                                put_unaligned_be64(opts->ahmac, ptr);
1200                                ptr += 2;
1201                        }
1202                } else {
1203                        if (opts->ahmac) {
1204                                u8 *bptr = (u8 *)ptr;
1205
1206                                put_unaligned_be16(opts->port, bptr);
1207                                bptr += 2;
1208                                put_unaligned_be64(opts->ahmac, bptr);
1209                                bptr += 8;
1210                                put_unaligned_be16(TCPOPT_NOP << 8 |
1211                                                   TCPOPT_NOP, bptr);
1212
1213                                ptr += 3;
1214                        } else {
1215                                put_unaligned_be32(opts->port << 16 |
1216                                                   TCPOPT_NOP << 8 |
1217                                                   TCPOPT_NOP, ptr);
1218                                ptr += 1;
1219                        }
1220                }
1221        }
1222
1223        if (OPTION_MPTCP_RM_ADDR & opts->suboptions) {
1224                *ptr++ = mptcp_option(MPTCPOPT_RM_ADDR,
1225                                      TCPOLEN_MPTCP_RM_ADDR_BASE,
1226                                      0, opts->rm_id);
1227        }
1228
1229        if (OPTION_MPTCP_PRIO & opts->suboptions) {
1230                const struct sock *ssk = (const struct sock *)tp;
1231                struct mptcp_subflow_context *subflow;
1232
1233                subflow = mptcp_subflow_ctx(ssk);
1234                subflow->send_mp_prio = 0;
1235
1236                *ptr++ = mptcp_option(MPTCPOPT_MP_PRIO,
1237                                      TCPOLEN_MPTCP_PRIO,
1238                                      opts->backup, TCPOPT_NOP);
1239        }
1240
1241        if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
1242                *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
1243                                      TCPOLEN_MPTCP_MPJ_SYN,
1244                                      opts->backup, opts->join_id);
1245                put_unaligned_be32(opts->token, ptr);
1246                ptr += 1;
1247                put_unaligned_be32(opts->nonce, ptr);
1248                ptr += 1;
1249        }
1250
1251        if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) {
1252                *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
1253                                      TCPOLEN_MPTCP_MPJ_SYNACK,
1254                                      opts->backup, opts->join_id);
1255                put_unaligned_be64(opts->thmac, ptr);
1256                ptr += 2;
1257                put_unaligned_be32(opts->nonce, ptr);
1258                ptr += 1;
1259        }
1260
1261        if (OPTION_MPTCP_MPJ_ACK & opts->suboptions) {
1262                *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
1263                                      TCPOLEN_MPTCP_MPJ_ACK, 0, 0);
1264                memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN);
1265                ptr += 5;
1266        }
1267
1268        if (opts->ext_copy.use_ack || opts->ext_copy.use_map) {
1269                struct mptcp_ext *mpext = &opts->ext_copy;
1270                u8 len = TCPOLEN_MPTCP_DSS_BASE;
1271                u8 flags = 0;
1272
1273                if (mpext->use_ack) {
1274                        flags = MPTCP_DSS_HAS_ACK;
1275                        if (mpext->ack64) {
1276                                len += TCPOLEN_MPTCP_DSS_ACK64;
1277                                flags |= MPTCP_DSS_ACK64;
1278                        } else {
1279                                len += TCPOLEN_MPTCP_DSS_ACK32;
1280                        }
1281                }
1282
1283                if (mpext->use_map) {
1284                        len += TCPOLEN_MPTCP_DSS_MAP64;
1285
1286                        /* Use only 64-bit mapping flags for now, add
1287                         * support for optional 32-bit mappings later.
1288                         */
1289                        flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
1290                        if (mpext->data_fin)
1291                                flags |= MPTCP_DSS_DATA_FIN;
1292                }
1293
1294                *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
1295
1296                if (mpext->use_ack) {
1297                        if (mpext->ack64) {
1298                                put_unaligned_be64(mpext->data_ack, ptr);
1299                                ptr += 2;
1300                        } else {
1301                                put_unaligned_be32(mpext->data_ack32, ptr);
1302                                ptr += 1;
1303                        }
1304                }
1305
1306                if (mpext->use_map) {
1307                        put_unaligned_be64(mpext->data_seq, ptr);
1308                        ptr += 2;
1309                        put_unaligned_be32(mpext->subflow_seq, ptr);
1310                        ptr += 1;
1311                        put_unaligned_be32(mpext->data_len << 16 |
1312                                           TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
1313                }
1314        }
1315
1316        if (tp)
1317                mptcp_set_rwin(tp);
1318}
1319