linux/net/mptcp/pm.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/* Multipath TCP
   3 *
   4 * Copyright (c) 2019, Intel Corporation.
   5 */
   6#define pr_fmt(fmt) "MPTCP: " fmt
   7
   8#include <linux/kernel.h>
   9#include <net/tcp.h>
  10#include <net/mptcp.h>
  11#include "protocol.h"
  12
  13#include "mib.h"
  14
  15/* path manager command handlers */
  16
  17int mptcp_pm_announce_addr(struct mptcp_sock *msk,
  18                           const struct mptcp_addr_info *addr,
  19                           bool echo)
  20{
  21        u8 add_addr = READ_ONCE(msk->pm.addr_signal);
  22
  23        pr_debug("msk=%p, local_id=%d, echo=%d", msk, addr->id, echo);
  24
  25        lockdep_assert_held(&msk->pm.lock);
  26
  27        if (add_addr &
  28            (echo ? BIT(MPTCP_ADD_ADDR_ECHO) : BIT(MPTCP_ADD_ADDR_SIGNAL))) {
  29                pr_warn("addr_signal error, add_addr=%d, echo=%d", add_addr, echo);
  30                return -EINVAL;
  31        }
  32
  33        if (echo) {
  34                msk->pm.remote = *addr;
  35                add_addr |= BIT(MPTCP_ADD_ADDR_ECHO);
  36        } else {
  37                msk->pm.local = *addr;
  38                add_addr |= BIT(MPTCP_ADD_ADDR_SIGNAL);
  39        }
  40        WRITE_ONCE(msk->pm.addr_signal, add_addr);
  41        return 0;
  42}
  43
  44int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list)
  45{
  46        u8 rm_addr = READ_ONCE(msk->pm.addr_signal);
  47
  48        pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr);
  49
  50        if (rm_addr) {
  51                pr_warn("addr_signal error, rm_addr=%d", rm_addr);
  52                return -EINVAL;
  53        }
  54
  55        msk->pm.rm_list_tx = *rm_list;
  56        rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL);
  57        WRITE_ONCE(msk->pm.addr_signal, rm_addr);
  58        mptcp_pm_nl_addr_send_ack(msk);
  59        return 0;
  60}
  61
  62int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list)
  63{
  64        pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr);
  65
  66        spin_lock_bh(&msk->pm.lock);
  67        mptcp_pm_nl_rm_subflow_received(msk, rm_list);
  68        spin_unlock_bh(&msk->pm.lock);
  69        return 0;
  70}
  71
  72/* path manager event handlers */
  73
  74void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side)
  75{
  76        struct mptcp_pm_data *pm = &msk->pm;
  77
  78        pr_debug("msk=%p, token=%u side=%d", msk, msk->token, server_side);
  79
  80        WRITE_ONCE(pm->server_side, server_side);
  81        mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC);
  82}
  83
  84bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
  85{
  86        struct mptcp_pm_data *pm = &msk->pm;
  87        unsigned int subflows_max;
  88        int ret = 0;
  89
  90        subflows_max = mptcp_pm_get_subflows_max(msk);
  91
  92        pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows,
  93                 subflows_max, READ_ONCE(pm->accept_subflow));
  94
  95        /* try to avoid acquiring the lock below */
  96        if (!READ_ONCE(pm->accept_subflow))
  97                return false;
  98
  99        spin_lock_bh(&pm->lock);
 100        if (READ_ONCE(pm->accept_subflow)) {
 101                ret = pm->subflows < subflows_max;
 102                if (ret && ++pm->subflows == subflows_max)
 103                        WRITE_ONCE(pm->accept_subflow, false);
 104        }
 105        spin_unlock_bh(&pm->lock);
 106
 107        return ret;
 108}
 109
 110/* return true if the new status bit is currently cleared, that is, this event
 111 * can be server, eventually by an already scheduled work
 112 */
 113static bool mptcp_pm_schedule_work(struct mptcp_sock *msk,
 114                                   enum mptcp_pm_status new_status)
 115{
 116        pr_debug("msk=%p status=%x new=%lx", msk, msk->pm.status,
 117                 BIT(new_status));
 118        if (msk->pm.status & BIT(new_status))
 119                return false;
 120
 121        msk->pm.status |= BIT(new_status);
 122        mptcp_schedule_work((struct sock *)msk);
 123        return true;
 124}
 125
 126void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp)
 127{
 128        struct mptcp_pm_data *pm = &msk->pm;
 129        bool announce = false;
 130
 131        pr_debug("msk=%p", msk);
 132
 133        spin_lock_bh(&pm->lock);
 134
 135        /* mptcp_pm_fully_established() can be invoked by multiple
 136         * racing paths - accept() and check_fully_established()
 137         * be sure to serve this event only once.
 138         */
 139        if (READ_ONCE(pm->work_pending) &&
 140            !(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)))
 141                mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED);
 142
 143        if ((msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0)
 144                announce = true;
 145
 146        msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED);
 147        spin_unlock_bh(&pm->lock);
 148
 149        if (announce)
 150                mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, gfp);
 151}
 152
 153void mptcp_pm_connection_closed(struct mptcp_sock *msk)
 154{
 155        pr_debug("msk=%p", msk);
 156}
 157
 158void mptcp_pm_subflow_established(struct mptcp_sock *msk)
 159{
 160        struct mptcp_pm_data *pm = &msk->pm;
 161
 162        pr_debug("msk=%p", msk);
 163
 164        if (!READ_ONCE(pm->work_pending))
 165                return;
 166
 167        spin_lock_bh(&pm->lock);
 168
 169        if (READ_ONCE(pm->work_pending))
 170                mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED);
 171
 172        spin_unlock_bh(&pm->lock);
 173}
 174
 175void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id)
 176{
 177        pr_debug("msk=%p", msk);
 178}
 179
 180void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
 181                                const struct mptcp_addr_info *addr)
 182{
 183        struct mptcp_pm_data *pm = &msk->pm;
 184
 185        pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id,
 186                 READ_ONCE(pm->accept_addr));
 187
 188        mptcp_event_addr_announced(msk, addr);
 189
 190        spin_lock_bh(&pm->lock);
 191
 192        if (!READ_ONCE(pm->accept_addr)) {
 193                mptcp_pm_announce_addr(msk, addr, true);
 194                mptcp_pm_add_addr_send_ack(msk);
 195        } else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) {
 196                pm->remote = *addr;
 197        }
 198
 199        spin_unlock_bh(&pm->lock);
 200}
 201
 202void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
 203                              struct mptcp_addr_info *addr)
 204{
 205        struct mptcp_pm_data *pm = &msk->pm;
 206
 207        pr_debug("msk=%p", msk);
 208
 209        spin_lock_bh(&pm->lock);
 210
 211        if (mptcp_lookup_anno_list_by_saddr(msk, addr) && READ_ONCE(pm->work_pending))
 212                mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED);
 213
 214        spin_unlock_bh(&pm->lock);
 215}
 216
 217void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk)
 218{
 219        if (!mptcp_pm_should_add_signal(msk))
 220                return;
 221
 222        mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_SEND_ACK);
 223}
 224
 225void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
 226                               const struct mptcp_rm_list *rm_list)
 227{
 228        struct mptcp_pm_data *pm = &msk->pm;
 229        u8 i;
 230
 231        pr_debug("msk=%p remote_ids_nr=%d", msk, rm_list->nr);
 232
 233        for (i = 0; i < rm_list->nr; i++)
 234                mptcp_event_addr_removed(msk, rm_list->ids[i]);
 235
 236        spin_lock_bh(&pm->lock);
 237        mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED);
 238        pm->rm_list_rx = *rm_list;
 239        spin_unlock_bh(&pm->lock);
 240}
 241
 242void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
 243{
 244        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 245
 246        pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup);
 247        subflow->backup = bkup;
 248
 249        mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC);
 250}
 251
 252void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
 253{
 254        pr_debug("fail_seq=%llu", fail_seq);
 255}
 256
 257/* path manager helpers */
 258
 259bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, struct sk_buff *skb,
 260                              unsigned int opt_size, unsigned int remaining,
 261                              struct mptcp_addr_info *addr, bool *echo,
 262                              bool *port, bool *drop_other_suboptions)
 263{
 264        int ret = false;
 265        u8 add_addr;
 266        u8 family;
 267
 268        spin_lock_bh(&msk->pm.lock);
 269
 270        /* double check after the lock is acquired */
 271        if (!mptcp_pm_should_add_signal(msk))
 272                goto out_unlock;
 273
 274        /* always drop every other options for pure ack ADD_ADDR; this is a
 275         * plain dup-ack from TCP perspective. The other MPTCP-relevant info,
 276         * if any, will be carried by the 'original' TCP ack
 277         */
 278        if (skb && skb_is_tcp_pure_ack(skb)) {
 279                remaining += opt_size;
 280                *drop_other_suboptions = true;
 281        }
 282
 283        *echo = mptcp_pm_should_add_signal_echo(msk);
 284        *port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port);
 285
 286        family = *echo ? msk->pm.remote.family : msk->pm.local.family;
 287        if (remaining < mptcp_add_addr_len(family, *echo, *port))
 288                goto out_unlock;
 289
 290        if (*echo) {
 291                *addr = msk->pm.remote;
 292                add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_ECHO);
 293        } else {
 294                *addr = msk->pm.local;
 295                add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_SIGNAL);
 296        }
 297        WRITE_ONCE(msk->pm.addr_signal, add_addr);
 298        ret = true;
 299
 300out_unlock:
 301        spin_unlock_bh(&msk->pm.lock);
 302        return ret;
 303}
 304
 305bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
 306                             struct mptcp_rm_list *rm_list)
 307{
 308        int ret = false, len;
 309        u8 rm_addr;
 310
 311        spin_lock_bh(&msk->pm.lock);
 312
 313        /* double check after the lock is acquired */
 314        if (!mptcp_pm_should_rm_signal(msk))
 315                goto out_unlock;
 316
 317        rm_addr = msk->pm.addr_signal & ~BIT(MPTCP_RM_ADDR_SIGNAL);
 318        len = mptcp_rm_addr_len(&msk->pm.rm_list_tx);
 319        if (len < 0) {
 320                WRITE_ONCE(msk->pm.addr_signal, rm_addr);
 321                goto out_unlock;
 322        }
 323        if (remaining < len)
 324                goto out_unlock;
 325
 326        *rm_list = msk->pm.rm_list_tx;
 327        WRITE_ONCE(msk->pm.addr_signal, rm_addr);
 328        ret = true;
 329
 330out_unlock:
 331        spin_unlock_bh(&msk->pm.lock);
 332        return ret;
 333}
 334
 335int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
 336{
 337        return mptcp_pm_nl_get_local_id(msk, skc);
 338}
 339
 340void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
 341{
 342        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
 343        u32 rcv_tstamp = READ_ONCE(tcp_sk(ssk)->rcv_tstamp);
 344
 345        /* keep track of rtx periods with no progress */
 346        if (!subflow->stale_count) {
 347                subflow->stale_rcv_tstamp = rcv_tstamp;
 348                subflow->stale_count++;
 349        } else if (subflow->stale_rcv_tstamp == rcv_tstamp) {
 350                if (subflow->stale_count < U8_MAX)
 351                        subflow->stale_count++;
 352                mptcp_pm_nl_subflow_chk_stale(msk, ssk);
 353        } else {
 354                subflow->stale_count = 0;
 355                mptcp_subflow_set_active(subflow);
 356        }
 357}
 358
 359void mptcp_pm_data_init(struct mptcp_sock *msk)
 360{
 361        msk->pm.add_addr_signaled = 0;
 362        msk->pm.add_addr_accepted = 0;
 363        msk->pm.local_addr_used = 0;
 364        msk->pm.subflows = 0;
 365        msk->pm.rm_list_tx.nr = 0;
 366        msk->pm.rm_list_rx.nr = 0;
 367        WRITE_ONCE(msk->pm.work_pending, false);
 368        WRITE_ONCE(msk->pm.addr_signal, 0);
 369        WRITE_ONCE(msk->pm.accept_addr, false);
 370        WRITE_ONCE(msk->pm.accept_subflow, false);
 371        WRITE_ONCE(msk->pm.remote_deny_join_id0, false);
 372        msk->pm.status = 0;
 373
 374        spin_lock_init(&msk->pm.lock);
 375        INIT_LIST_HEAD(&msk->pm.anno_list);
 376
 377        mptcp_pm_nl_data_init(msk);
 378}
 379
 380void __init mptcp_pm_init(void)
 381{
 382        mptcp_pm_nl_init();
 383}
 384