linux/net/switchdev/switchdev.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * net/switchdev/switchdev.c - Switch device API
   4 * Copyright (c) 2014-2015 Jiri Pirko <jiri@resnulli.us>
   5 * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
   6 */
   7
   8#include <linux/kernel.h>
   9#include <linux/types.h>
  10#include <linux/init.h>
  11#include <linux/mutex.h>
  12#include <linux/notifier.h>
  13#include <linux/netdevice.h>
  14#include <linux/etherdevice.h>
  15#include <linux/if_bridge.h>
  16#include <linux/list.h>
  17#include <linux/workqueue.h>
  18#include <linux/if_vlan.h>
  19#include <linux/rtnetlink.h>
  20#include <net/switchdev.h>
  21
  22static LIST_HEAD(deferred);
  23static DEFINE_SPINLOCK(deferred_lock);
  24
  25typedef void switchdev_deferred_func_t(struct net_device *dev,
  26                                       const void *data);
  27
  28struct switchdev_deferred_item {
  29        struct list_head list;
  30        struct net_device *dev;
  31        switchdev_deferred_func_t *func;
  32        unsigned long data[];
  33};
  34
  35static struct switchdev_deferred_item *switchdev_deferred_dequeue(void)
  36{
  37        struct switchdev_deferred_item *dfitem;
  38
  39        spin_lock_bh(&deferred_lock);
  40        if (list_empty(&deferred)) {
  41                dfitem = NULL;
  42                goto unlock;
  43        }
  44        dfitem = list_first_entry(&deferred,
  45                                  struct switchdev_deferred_item, list);
  46        list_del(&dfitem->list);
  47unlock:
  48        spin_unlock_bh(&deferred_lock);
  49        return dfitem;
  50}
  51
  52/**
  53 *      switchdev_deferred_process - Process ops in deferred queue
  54 *
  55 *      Called to flush the ops currently queued in deferred ops queue.
  56 *      rtnl_lock must be held.
  57 */
  58void switchdev_deferred_process(void)
  59{
  60        struct switchdev_deferred_item *dfitem;
  61
  62        ASSERT_RTNL();
  63
  64        while ((dfitem = switchdev_deferred_dequeue())) {
  65                dfitem->func(dfitem->dev, dfitem->data);
  66                dev_put(dfitem->dev);
  67                kfree(dfitem);
  68        }
  69}
  70EXPORT_SYMBOL_GPL(switchdev_deferred_process);
  71
  72static void switchdev_deferred_process_work(struct work_struct *work)
  73{
  74        rtnl_lock();
  75        switchdev_deferred_process();
  76        rtnl_unlock();
  77}
  78
  79static DECLARE_WORK(deferred_process_work, switchdev_deferred_process_work);
  80
  81static int switchdev_deferred_enqueue(struct net_device *dev,
  82                                      const void *data, size_t data_len,
  83                                      switchdev_deferred_func_t *func)
  84{
  85        struct switchdev_deferred_item *dfitem;
  86
  87        dfitem = kmalloc(sizeof(*dfitem) + data_len, GFP_ATOMIC);
  88        if (!dfitem)
  89                return -ENOMEM;
  90        dfitem->dev = dev;
  91        dfitem->func = func;
  92        memcpy(dfitem->data, data, data_len);
  93        dev_hold(dev);
  94        spin_lock_bh(&deferred_lock);
  95        list_add_tail(&dfitem->list, &deferred);
  96        spin_unlock_bh(&deferred_lock);
  97        schedule_work(&deferred_process_work);
  98        return 0;
  99}
 100
 101static int switchdev_port_attr_notify(enum switchdev_notifier_type nt,
 102                                      struct net_device *dev,
 103                                      const struct switchdev_attr *attr,
 104                                      struct switchdev_trans *trans)
 105{
 106        int err;
 107        int rc;
 108
 109        struct switchdev_notifier_port_attr_info attr_info = {
 110                .attr = attr,
 111                .trans = trans,
 112                .handled = false,
 113        };
 114
 115        rc = call_switchdev_blocking_notifiers(nt, dev,
 116                                               &attr_info.info, NULL);
 117        err = notifier_to_errno(rc);
 118        if (err) {
 119                WARN_ON(!attr_info.handled);
 120                return err;
 121        }
 122
 123        if (!attr_info.handled)
 124                return -EOPNOTSUPP;
 125
 126        return 0;
 127}
 128
 129static int switchdev_port_attr_set_now(struct net_device *dev,
 130                                       const struct switchdev_attr *attr)
 131{
 132        struct switchdev_trans trans;
 133        int err;
 134
 135        /* Phase I: prepare for attr set. Driver/device should fail
 136         * here if there are going to be issues in the commit phase,
 137         * such as lack of resources or support.  The driver/device
 138         * should reserve resources needed for the commit phase here,
 139         * but should not commit the attr.
 140         */
 141
 142        trans.ph_prepare = true;
 143        err = switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr,
 144                                         &trans);
 145        if (err)
 146                return err;
 147
 148        /* Phase II: commit attr set.  This cannot fail as a fault
 149         * of driver/device.  If it does, it's a bug in the driver/device
 150         * because the driver said everythings was OK in phase I.
 151         */
 152
 153        trans.ph_prepare = false;
 154        err = switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr,
 155                                         &trans);
 156        WARN(err, "%s: Commit of attribute (id=%d) failed.\n",
 157             dev->name, attr->id);
 158
 159        return err;
 160}
 161
 162static void switchdev_port_attr_set_deferred(struct net_device *dev,
 163                                             const void *data)
 164{
 165        const struct switchdev_attr *attr = data;
 166        int err;
 167
 168        err = switchdev_port_attr_set_now(dev, attr);
 169        if (err && err != -EOPNOTSUPP)
 170                netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n",
 171                           err, attr->id);
 172        if (attr->complete)
 173                attr->complete(dev, err, attr->complete_priv);
 174}
 175
 176static int switchdev_port_attr_set_defer(struct net_device *dev,
 177                                         const struct switchdev_attr *attr)
 178{
 179        return switchdev_deferred_enqueue(dev, attr, sizeof(*attr),
 180                                          switchdev_port_attr_set_deferred);
 181}
 182
 183/**
 184 *      switchdev_port_attr_set - Set port attribute
 185 *
 186 *      @dev: port device
 187 *      @attr: attribute to set
 188 *
 189 *      Use a 2-phase prepare-commit transaction model to ensure
 190 *      system is not left in a partially updated state due to
 191 *      failure from driver/device.
 192 *
 193 *      rtnl_lock must be held and must not be in atomic section,
 194 *      in case SWITCHDEV_F_DEFER flag is not set.
 195 */
 196int switchdev_port_attr_set(struct net_device *dev,
 197                            const struct switchdev_attr *attr)
 198{
 199        if (attr->flags & SWITCHDEV_F_DEFER)
 200                return switchdev_port_attr_set_defer(dev, attr);
 201        ASSERT_RTNL();
 202        return switchdev_port_attr_set_now(dev, attr);
 203}
 204EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
 205
 206static size_t switchdev_obj_size(const struct switchdev_obj *obj)
 207{
 208        switch (obj->id) {
 209        case SWITCHDEV_OBJ_ID_PORT_VLAN:
 210                return sizeof(struct switchdev_obj_port_vlan);
 211        case SWITCHDEV_OBJ_ID_PORT_MDB:
 212                return sizeof(struct switchdev_obj_port_mdb);
 213        case SWITCHDEV_OBJ_ID_HOST_MDB:
 214                return sizeof(struct switchdev_obj_port_mdb);
 215        default:
 216                BUG();
 217        }
 218        return 0;
 219}
 220
 221static int switchdev_port_obj_notify(enum switchdev_notifier_type nt,
 222                                     struct net_device *dev,
 223                                     const struct switchdev_obj *obj,
 224                                     struct switchdev_trans *trans,
 225                                     struct netlink_ext_ack *extack)
 226{
 227        int rc;
 228        int err;
 229
 230        struct switchdev_notifier_port_obj_info obj_info = {
 231                .obj = obj,
 232                .trans = trans,
 233                .handled = false,
 234        };
 235
 236        rc = call_switchdev_blocking_notifiers(nt, dev, &obj_info.info, extack);
 237        err = notifier_to_errno(rc);
 238        if (err) {
 239                WARN_ON(!obj_info.handled);
 240                return err;
 241        }
 242        if (!obj_info.handled)
 243                return -EOPNOTSUPP;
 244        return 0;
 245}
 246
 247static int switchdev_port_obj_add_now(struct net_device *dev,
 248                                      const struct switchdev_obj *obj,
 249                                      struct netlink_ext_ack *extack)
 250{
 251        struct switchdev_trans trans;
 252        int err;
 253
 254        ASSERT_RTNL();
 255
 256        /* Phase I: prepare for obj add. Driver/device should fail
 257         * here if there are going to be issues in the commit phase,
 258         * such as lack of resources or support.  The driver/device
 259         * should reserve resources needed for the commit phase here,
 260         * but should not commit the obj.
 261         */
 262
 263        trans.ph_prepare = true;
 264        err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD,
 265                                        dev, obj, &trans, extack);
 266        if (err)
 267                return err;
 268
 269        /* Phase II: commit obj add.  This cannot fail as a fault
 270         * of driver/device.  If it does, it's a bug in the driver/device
 271         * because the driver said everythings was OK in phase I.
 272         */
 273
 274        trans.ph_prepare = false;
 275        err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD,
 276                                        dev, obj, &trans, extack);
 277        WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id);
 278
 279        return err;
 280}
 281
 282static void switchdev_port_obj_add_deferred(struct net_device *dev,
 283                                            const void *data)
 284{
 285        const struct switchdev_obj *obj = data;
 286        int err;
 287
 288        err = switchdev_port_obj_add_now(dev, obj, NULL);
 289        if (err && err != -EOPNOTSUPP)
 290                netdev_err(dev, "failed (err=%d) to add object (id=%d)\n",
 291                           err, obj->id);
 292        if (obj->complete)
 293                obj->complete(dev, err, obj->complete_priv);
 294}
 295
 296static int switchdev_port_obj_add_defer(struct net_device *dev,
 297                                        const struct switchdev_obj *obj)
 298{
 299        return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
 300                                          switchdev_port_obj_add_deferred);
 301}
 302
 303/**
 304 *      switchdev_port_obj_add - Add port object
 305 *
 306 *      @dev: port device
 307 *      @obj: object to add
 308 *      @extack: netlink extended ack
 309 *
 310 *      Use a 2-phase prepare-commit transaction model to ensure
 311 *      system is not left in a partially updated state due to
 312 *      failure from driver/device.
 313 *
 314 *      rtnl_lock must be held and must not be in atomic section,
 315 *      in case SWITCHDEV_F_DEFER flag is not set.
 316 */
 317int switchdev_port_obj_add(struct net_device *dev,
 318                           const struct switchdev_obj *obj,
 319                           struct netlink_ext_ack *extack)
 320{
 321        if (obj->flags & SWITCHDEV_F_DEFER)
 322                return switchdev_port_obj_add_defer(dev, obj);
 323        ASSERT_RTNL();
 324        return switchdev_port_obj_add_now(dev, obj, extack);
 325}
 326EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
 327
 328static int switchdev_port_obj_del_now(struct net_device *dev,
 329                                      const struct switchdev_obj *obj)
 330{
 331        return switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_DEL,
 332                                         dev, obj, NULL, NULL);
 333}
 334
 335static void switchdev_port_obj_del_deferred(struct net_device *dev,
 336                                            const void *data)
 337{
 338        const struct switchdev_obj *obj = data;
 339        int err;
 340
 341        err = switchdev_port_obj_del_now(dev, obj);
 342        if (err && err != -EOPNOTSUPP)
 343                netdev_err(dev, "failed (err=%d) to del object (id=%d)\n",
 344                           err, obj->id);
 345        if (obj->complete)
 346                obj->complete(dev, err, obj->complete_priv);
 347}
 348
 349static int switchdev_port_obj_del_defer(struct net_device *dev,
 350                                        const struct switchdev_obj *obj)
 351{
 352        return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
 353                                          switchdev_port_obj_del_deferred);
 354}
 355
 356/**
 357 *      switchdev_port_obj_del - Delete port object
 358 *
 359 *      @dev: port device
 360 *      @obj: object to delete
 361 *
 362 *      rtnl_lock must be held and must not be in atomic section,
 363 *      in case SWITCHDEV_F_DEFER flag is not set.
 364 */
 365int switchdev_port_obj_del(struct net_device *dev,
 366                           const struct switchdev_obj *obj)
 367{
 368        if (obj->flags & SWITCHDEV_F_DEFER)
 369                return switchdev_port_obj_del_defer(dev, obj);
 370        ASSERT_RTNL();
 371        return switchdev_port_obj_del_now(dev, obj);
 372}
 373EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
 374
 375static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain);
 376static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain);
 377
 378/**
 379 *      register_switchdev_notifier - Register notifier
 380 *      @nb: notifier_block
 381 *
 382 *      Register switch device notifier.
 383 */
 384int register_switchdev_notifier(struct notifier_block *nb)
 385{
 386        return atomic_notifier_chain_register(&switchdev_notif_chain, nb);
 387}
 388EXPORT_SYMBOL_GPL(register_switchdev_notifier);
 389
 390/**
 391 *      unregister_switchdev_notifier - Unregister notifier
 392 *      @nb: notifier_block
 393 *
 394 *      Unregister switch device notifier.
 395 */
 396int unregister_switchdev_notifier(struct notifier_block *nb)
 397{
 398        return atomic_notifier_chain_unregister(&switchdev_notif_chain, nb);
 399}
 400EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
 401
 402/**
 403 *      call_switchdev_notifiers - Call notifiers
 404 *      @val: value passed unmodified to notifier function
 405 *      @dev: port device
 406 *      @info: notifier information data
 407 *      @extack: netlink extended ack
 408 *      Call all network notifier blocks.
 409 */
 410int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
 411                             struct switchdev_notifier_info *info,
 412                             struct netlink_ext_ack *extack)
 413{
 414        info->dev = dev;
 415        info->extack = extack;
 416        return atomic_notifier_call_chain(&switchdev_notif_chain, val, info);
 417}
 418EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
 419
 420int register_switchdev_blocking_notifier(struct notifier_block *nb)
 421{
 422        struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain;
 423
 424        return blocking_notifier_chain_register(chain, nb);
 425}
 426EXPORT_SYMBOL_GPL(register_switchdev_blocking_notifier);
 427
 428int unregister_switchdev_blocking_notifier(struct notifier_block *nb)
 429{
 430        struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain;
 431
 432        return blocking_notifier_chain_unregister(chain, nb);
 433}
 434EXPORT_SYMBOL_GPL(unregister_switchdev_blocking_notifier);
 435
 436int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev,
 437                                      struct switchdev_notifier_info *info,
 438                                      struct netlink_ext_ack *extack)
 439{
 440        info->dev = dev;
 441        info->extack = extack;
 442        return blocking_notifier_call_chain(&switchdev_blocking_notif_chain,
 443                                            val, info);
 444}
 445EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers);
 446
 447static int __switchdev_handle_port_obj_add(struct net_device *dev,
 448                        struct switchdev_notifier_port_obj_info *port_obj_info,
 449                        bool (*check_cb)(const struct net_device *dev),
 450                        int (*add_cb)(struct net_device *dev,
 451                                      const struct switchdev_obj *obj,
 452                                      struct switchdev_trans *trans,
 453                                      struct netlink_ext_ack *extack))
 454{
 455        struct netlink_ext_ack *extack;
 456        struct net_device *lower_dev;
 457        struct list_head *iter;
 458        int err = -EOPNOTSUPP;
 459
 460        extack = switchdev_notifier_info_to_extack(&port_obj_info->info);
 461
 462        if (check_cb(dev)) {
 463                /* This flag is only checked if the return value is success. */
 464                port_obj_info->handled = true;
 465                return add_cb(dev, port_obj_info->obj, port_obj_info->trans,
 466                              extack);
 467        }
 468
 469        /* Switch ports might be stacked under e.g. a LAG. Ignore the
 470         * unsupported devices, another driver might be able to handle them. But
 471         * propagate to the callers any hard errors.
 472         *
 473         * If the driver does its own bookkeeping of stacked ports, it's not
 474         * necessary to go through this helper.
 475         */
 476        netdev_for_each_lower_dev(dev, lower_dev, iter) {
 477                if (netif_is_bridge_master(lower_dev))
 478                        continue;
 479
 480                err = __switchdev_handle_port_obj_add(lower_dev, port_obj_info,
 481                                                      check_cb, add_cb);
 482                if (err && err != -EOPNOTSUPP)
 483                        return err;
 484        }
 485
 486        return err;
 487}
 488
 489int switchdev_handle_port_obj_add(struct net_device *dev,
 490                        struct switchdev_notifier_port_obj_info *port_obj_info,
 491                        bool (*check_cb)(const struct net_device *dev),
 492                        int (*add_cb)(struct net_device *dev,
 493                                      const struct switchdev_obj *obj,
 494                                      struct switchdev_trans *trans,
 495                                      struct netlink_ext_ack *extack))
 496{
 497        int err;
 498
 499        err = __switchdev_handle_port_obj_add(dev, port_obj_info, check_cb,
 500                                              add_cb);
 501        if (err == -EOPNOTSUPP)
 502                err = 0;
 503        return err;
 504}
 505EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_add);
 506
 507static int __switchdev_handle_port_obj_del(struct net_device *dev,
 508                        struct switchdev_notifier_port_obj_info *port_obj_info,
 509                        bool (*check_cb)(const struct net_device *dev),
 510                        int (*del_cb)(struct net_device *dev,
 511                                      const struct switchdev_obj *obj))
 512{
 513        struct net_device *lower_dev;
 514        struct list_head *iter;
 515        int err = -EOPNOTSUPP;
 516
 517        if (check_cb(dev)) {
 518                /* This flag is only checked if the return value is success. */
 519                port_obj_info->handled = true;
 520                return del_cb(dev, port_obj_info->obj);
 521        }
 522
 523        /* Switch ports might be stacked under e.g. a LAG. Ignore the
 524         * unsupported devices, another driver might be able to handle them. But
 525         * propagate to the callers any hard errors.
 526         *
 527         * If the driver does its own bookkeeping of stacked ports, it's not
 528         * necessary to go through this helper.
 529         */
 530        netdev_for_each_lower_dev(dev, lower_dev, iter) {
 531                if (netif_is_bridge_master(lower_dev))
 532                        continue;
 533
 534                err = __switchdev_handle_port_obj_del(lower_dev, port_obj_info,
 535                                                      check_cb, del_cb);
 536                if (err && err != -EOPNOTSUPP)
 537                        return err;
 538        }
 539
 540        return err;
 541}
 542
 543int switchdev_handle_port_obj_del(struct net_device *dev,
 544                        struct switchdev_notifier_port_obj_info *port_obj_info,
 545                        bool (*check_cb)(const struct net_device *dev),
 546                        int (*del_cb)(struct net_device *dev,
 547                                      const struct switchdev_obj *obj))
 548{
 549        int err;
 550
 551        err = __switchdev_handle_port_obj_del(dev, port_obj_info, check_cb,
 552                                              del_cb);
 553        if (err == -EOPNOTSUPP)
 554                err = 0;
 555        return err;
 556}
 557EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_del);
 558
 559static int __switchdev_handle_port_attr_set(struct net_device *dev,
 560                        struct switchdev_notifier_port_attr_info *port_attr_info,
 561                        bool (*check_cb)(const struct net_device *dev),
 562                        int (*set_cb)(struct net_device *dev,
 563                                      const struct switchdev_attr *attr,
 564                                      struct switchdev_trans *trans))
 565{
 566        struct net_device *lower_dev;
 567        struct list_head *iter;
 568        int err = -EOPNOTSUPP;
 569
 570        if (check_cb(dev)) {
 571                port_attr_info->handled = true;
 572                return set_cb(dev, port_attr_info->attr,
 573                              port_attr_info->trans);
 574        }
 575
 576        /* Switch ports might be stacked under e.g. a LAG. Ignore the
 577         * unsupported devices, another driver might be able to handle them. But
 578         * propagate to the callers any hard errors.
 579         *
 580         * If the driver does its own bookkeeping of stacked ports, it's not
 581         * necessary to go through this helper.
 582         */
 583        netdev_for_each_lower_dev(dev, lower_dev, iter) {
 584                if (netif_is_bridge_master(lower_dev))
 585                        continue;
 586
 587                err = __switchdev_handle_port_attr_set(lower_dev, port_attr_info,
 588                                                       check_cb, set_cb);
 589                if (err && err != -EOPNOTSUPP)
 590                        return err;
 591        }
 592
 593        return err;
 594}
 595
 596int switchdev_handle_port_attr_set(struct net_device *dev,
 597                        struct switchdev_notifier_port_attr_info *port_attr_info,
 598                        bool (*check_cb)(const struct net_device *dev),
 599                        int (*set_cb)(struct net_device *dev,
 600                                      const struct switchdev_attr *attr,
 601                                      struct switchdev_trans *trans))
 602{
 603        int err;
 604
 605        err = __switchdev_handle_port_attr_set(dev, port_attr_info, check_cb,
 606                                               set_cb);
 607        if (err == -EOPNOTSUPP)
 608                err = 0;
 609        return err;
 610}
 611EXPORT_SYMBOL_GPL(switchdev_handle_port_attr_set);
 612