linux/net/switchdev/switchdev.c
<<
>>
Prefs
   1/*
   2 * net/switchdev/switchdev.c - Switch device API
   3 * Copyright (c) 2014-2015 Jiri Pirko <jiri@resnulli.us>
   4 * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; either version 2 of the License, or
   9 * (at your option) any later version.
  10 */
  11
  12#include <linux/kernel.h>
  13#include <linux/types.h>
  14#include <linux/init.h>
  15#include <linux/mutex.h>
  16#include <linux/notifier.h>
  17#include <linux/netdevice.h>
  18#include <linux/etherdevice.h>
  19#include <linux/if_bridge.h>
  20#include <linux/list.h>
  21#include <linux/workqueue.h>
  22#include <linux/if_vlan.h>
  23#include <linux/rtnetlink.h>
  24#include <net/switchdev.h>
  25
  26/**
  27 *      switchdev_trans_item_enqueue - Enqueue data item to transaction queue
  28 *
  29 *      @trans: transaction
  30 *      @data: pointer to data being queued
  31 *      @destructor: data destructor
  32 *      @tritem: transaction item being queued
  33 *
  34 *      Enqeueue data item to transaction queue. tritem is typically placed in
  35 *      cointainter pointed at by data pointer. Destructor is called on
  36 *      transaction abort and after successful commit phase in case
  37 *      the caller did not dequeue the item before.
  38 */
  39void switchdev_trans_item_enqueue(struct switchdev_trans *trans,
  40                                  void *data, void (*destructor)(void const *),
  41                                  struct switchdev_trans_item *tritem)
  42{
  43        tritem->data = data;
  44        tritem->destructor = destructor;
  45        list_add_tail(&tritem->list, &trans->item_list);
  46}
  47EXPORT_SYMBOL_GPL(switchdev_trans_item_enqueue);
  48
  49static struct switchdev_trans_item *
  50__switchdev_trans_item_dequeue(struct switchdev_trans *trans)
  51{
  52        struct switchdev_trans_item *tritem;
  53
  54        if (list_empty(&trans->item_list))
  55                return NULL;
  56        tritem = list_first_entry(&trans->item_list,
  57                                  struct switchdev_trans_item, list);
  58        list_del(&tritem->list);
  59        return tritem;
  60}
  61
  62/**
  63 *      switchdev_trans_item_dequeue - Dequeue data item from transaction queue
  64 *
  65 *      @trans: transaction
  66 */
  67void *switchdev_trans_item_dequeue(struct switchdev_trans *trans)
  68{
  69        struct switchdev_trans_item *tritem;
  70
  71        tritem = __switchdev_trans_item_dequeue(trans);
  72        BUG_ON(!tritem);
  73        return tritem->data;
  74}
  75EXPORT_SYMBOL_GPL(switchdev_trans_item_dequeue);
  76
  77static void switchdev_trans_init(struct switchdev_trans *trans)
  78{
  79        INIT_LIST_HEAD(&trans->item_list);
  80}
  81
  82static void switchdev_trans_items_destroy(struct switchdev_trans *trans)
  83{
  84        struct switchdev_trans_item *tritem;
  85
  86        while ((tritem = __switchdev_trans_item_dequeue(trans)))
  87                tritem->destructor(tritem->data);
  88}
  89
  90static void switchdev_trans_items_warn_destroy(struct net_device *dev,
  91                                               struct switchdev_trans *trans)
  92{
  93        WARN(!list_empty(&trans->item_list), "%s: transaction item queue is not empty.\n",
  94             dev->name);
  95        switchdev_trans_items_destroy(trans);
  96}
  97
  98static LIST_HEAD(deferred);
  99static DEFINE_SPINLOCK(deferred_lock);
 100
 101typedef void switchdev_deferred_func_t(struct net_device *dev,
 102                                       const void *data);
 103
 104struct switchdev_deferred_item {
 105        struct list_head list;
 106        struct net_device *dev;
 107        switchdev_deferred_func_t *func;
 108        unsigned long data[0];
 109};
 110
 111static struct switchdev_deferred_item *switchdev_deferred_dequeue(void)
 112{
 113        struct switchdev_deferred_item *dfitem;
 114
 115        spin_lock_bh(&deferred_lock);
 116        if (list_empty(&deferred)) {
 117                dfitem = NULL;
 118                goto unlock;
 119        }
 120        dfitem = list_first_entry(&deferred,
 121                                  struct switchdev_deferred_item, list);
 122        list_del(&dfitem->list);
 123unlock:
 124        spin_unlock_bh(&deferred_lock);
 125        return dfitem;
 126}
 127
 128/**
 129 *      switchdev_deferred_process - Process ops in deferred queue
 130 *
 131 *      Called to flush the ops currently queued in deferred ops queue.
 132 *      rtnl_lock must be held.
 133 */
 134void switchdev_deferred_process(void)
 135{
 136        struct switchdev_deferred_item *dfitem;
 137
 138        ASSERT_RTNL();
 139
 140        while ((dfitem = switchdev_deferred_dequeue())) {
 141                dfitem->func(dfitem->dev, dfitem->data);
 142                dev_put(dfitem->dev);
 143                kfree(dfitem);
 144        }
 145}
 146EXPORT_SYMBOL_GPL(switchdev_deferred_process);
 147
 148static void switchdev_deferred_process_work(struct work_struct *work)
 149{
 150        rtnl_lock();
 151        switchdev_deferred_process();
 152        rtnl_unlock();
 153}
 154
 155static DECLARE_WORK(deferred_process_work, switchdev_deferred_process_work);
 156
 157static int switchdev_deferred_enqueue(struct net_device *dev,
 158                                      const void *data, size_t data_len,
 159                                      switchdev_deferred_func_t *func)
 160{
 161        struct switchdev_deferred_item *dfitem;
 162
 163        dfitem = kmalloc(sizeof(*dfitem) + data_len, GFP_ATOMIC);
 164        if (!dfitem)
 165                return -ENOMEM;
 166        dfitem->dev = dev;
 167        dfitem->func = func;
 168        memcpy(dfitem->data, data, data_len);
 169        dev_hold(dev);
 170        spin_lock_bh(&deferred_lock);
 171        list_add_tail(&dfitem->list, &deferred);
 172        spin_unlock_bh(&deferred_lock);
 173        schedule_work(&deferred_process_work);
 174        return 0;
 175}
 176
 177/**
 178 *      switchdev_port_attr_get - Get port attribute
 179 *
 180 *      @dev: port device
 181 *      @attr: attribute to get
 182 */
 183int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
 184{
 185        const struct switchdev_ops *ops = dev->switchdev_ops;
 186        struct net_device *lower_dev;
 187        struct list_head *iter;
 188        struct switchdev_attr first = {
 189                .id = SWITCHDEV_ATTR_ID_UNDEFINED
 190        };
 191        int err = -EOPNOTSUPP;
 192
 193        if (ops && ops->switchdev_port_attr_get)
 194                return ops->switchdev_port_attr_get(dev, attr);
 195
 196        if (attr->flags & SWITCHDEV_F_NO_RECURSE)
 197                return err;
 198
 199        /* Switch device port(s) may be stacked under
 200         * bond/team/vlan dev, so recurse down to get attr on
 201         * each port.  Return -ENODATA if attr values don't
 202         * compare across ports.
 203         */
 204
 205        netdev_for_each_lower_dev(dev, lower_dev, iter) {
 206                err = switchdev_port_attr_get(lower_dev, attr);
 207                if (err)
 208                        break;
 209                if (first.id == SWITCHDEV_ATTR_ID_UNDEFINED)
 210                        first = *attr;
 211                else if (memcmp(&first, attr, sizeof(*attr)))
 212                        return -ENODATA;
 213        }
 214
 215        return err;
 216}
 217EXPORT_SYMBOL_GPL(switchdev_port_attr_get);
 218
 219static int __switchdev_port_attr_set(struct net_device *dev,
 220                                     const struct switchdev_attr *attr,
 221                                     struct switchdev_trans *trans)
 222{
 223        const struct switchdev_ops *ops = dev->switchdev_ops;
 224        struct net_device *lower_dev;
 225        struct list_head *iter;
 226        int err = -EOPNOTSUPP;
 227
 228        if (ops && ops->switchdev_port_attr_set) {
 229                err = ops->switchdev_port_attr_set(dev, attr, trans);
 230                goto done;
 231        }
 232
 233        if (attr->flags & SWITCHDEV_F_NO_RECURSE)
 234                goto done;
 235
 236        /* Switch device port(s) may be stacked under
 237         * bond/team/vlan dev, so recurse down to set attr on
 238         * each port.
 239         */
 240
 241        netdev_for_each_lower_dev(dev, lower_dev, iter) {
 242                err = __switchdev_port_attr_set(lower_dev, attr, trans);
 243                if (err)
 244                        break;
 245        }
 246
 247done:
 248        if (err == -EOPNOTSUPP && attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP)
 249                err = 0;
 250
 251        return err;
 252}
 253
 254static int switchdev_port_attr_set_now(struct net_device *dev,
 255                                       const struct switchdev_attr *attr)
 256{
 257        struct switchdev_trans trans;
 258        int err;
 259
 260        switchdev_trans_init(&trans);
 261
 262        /* Phase I: prepare for attr set. Driver/device should fail
 263         * here if there are going to be issues in the commit phase,
 264         * such as lack of resources or support.  The driver/device
 265         * should reserve resources needed for the commit phase here,
 266         * but should not commit the attr.
 267         */
 268
 269        trans.ph_prepare = true;
 270        err = __switchdev_port_attr_set(dev, attr, &trans);
 271        if (err) {
 272                /* Prepare phase failed: abort the transaction.  Any
 273                 * resources reserved in the prepare phase are
 274                 * released.
 275                 */
 276
 277                if (err != -EOPNOTSUPP)
 278                        switchdev_trans_items_destroy(&trans);
 279
 280                return err;
 281        }
 282
 283        /* Phase II: commit attr set.  This cannot fail as a fault
 284         * of driver/device.  If it does, it's a bug in the driver/device
 285         * because the driver said everythings was OK in phase I.
 286         */
 287
 288        trans.ph_prepare = false;
 289        err = __switchdev_port_attr_set(dev, attr, &trans);
 290        WARN(err, "%s: Commit of attribute (id=%d) failed.\n",
 291             dev->name, attr->id);
 292        switchdev_trans_items_warn_destroy(dev, &trans);
 293
 294        return err;
 295}
 296
 297static void switchdev_port_attr_set_deferred(struct net_device *dev,
 298                                             const void *data)
 299{
 300        const struct switchdev_attr *attr = data;
 301        int err;
 302
 303        err = switchdev_port_attr_set_now(dev, attr);
 304        if (err && err != -EOPNOTSUPP)
 305                netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n",
 306                           err, attr->id);
 307        if (attr->complete)
 308                attr->complete(dev, err, attr->complete_priv);
 309}
 310
 311static int switchdev_port_attr_set_defer(struct net_device *dev,
 312                                         const struct switchdev_attr *attr)
 313{
 314        return switchdev_deferred_enqueue(dev, attr, sizeof(*attr),
 315                                          switchdev_port_attr_set_deferred);
 316}
 317
 318/**
 319 *      switchdev_port_attr_set - Set port attribute
 320 *
 321 *      @dev: port device
 322 *      @attr: attribute to set
 323 *
 324 *      Use a 2-phase prepare-commit transaction model to ensure
 325 *      system is not left in a partially updated state due to
 326 *      failure from driver/device.
 327 *
 328 *      rtnl_lock must be held and must not be in atomic section,
 329 *      in case SWITCHDEV_F_DEFER flag is not set.
 330 */
 331int switchdev_port_attr_set(struct net_device *dev,
 332                            const struct switchdev_attr *attr)
 333{
 334        if (attr->flags & SWITCHDEV_F_DEFER)
 335                return switchdev_port_attr_set_defer(dev, attr);
 336        ASSERT_RTNL();
 337        return switchdev_port_attr_set_now(dev, attr);
 338}
 339EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
 340
 341static size_t switchdev_obj_size(const struct switchdev_obj *obj)
 342{
 343        switch (obj->id) {
 344        case SWITCHDEV_OBJ_ID_PORT_VLAN:
 345                return sizeof(struct switchdev_obj_port_vlan);
 346        case SWITCHDEV_OBJ_ID_PORT_MDB:
 347                return sizeof(struct switchdev_obj_port_mdb);
 348        case SWITCHDEV_OBJ_ID_HOST_MDB:
 349                return sizeof(struct switchdev_obj_port_mdb);
 350        default:
 351                BUG();
 352        }
 353        return 0;
 354}
 355
 356static int __switchdev_port_obj_add(struct net_device *dev,
 357                                    const struct switchdev_obj *obj,
 358                                    struct switchdev_trans *trans)
 359{
 360        const struct switchdev_ops *ops = dev->switchdev_ops;
 361        struct net_device *lower_dev;
 362        struct list_head *iter;
 363        int err = -EOPNOTSUPP;
 364
 365        if (ops && ops->switchdev_port_obj_add)
 366                return ops->switchdev_port_obj_add(dev, obj, trans);
 367
 368        /* Switch device port(s) may be stacked under
 369         * bond/team/vlan dev, so recurse down to add object on
 370         * each port.
 371         */
 372
 373        netdev_for_each_lower_dev(dev, lower_dev, iter) {
 374                err = __switchdev_port_obj_add(lower_dev, obj, trans);
 375                if (err)
 376                        break;
 377        }
 378
 379        return err;
 380}
 381
 382static int switchdev_port_obj_add_now(struct net_device *dev,
 383                                      const struct switchdev_obj *obj)
 384{
 385        struct switchdev_trans trans;
 386        int err;
 387
 388        ASSERT_RTNL();
 389
 390        switchdev_trans_init(&trans);
 391
 392        /* Phase I: prepare for obj add. Driver/device should fail
 393         * here if there are going to be issues in the commit phase,
 394         * such as lack of resources or support.  The driver/device
 395         * should reserve resources needed for the commit phase here,
 396         * but should not commit the obj.
 397         */
 398
 399        trans.ph_prepare = true;
 400        err = __switchdev_port_obj_add(dev, obj, &trans);
 401        if (err) {
 402                /* Prepare phase failed: abort the transaction.  Any
 403                 * resources reserved in the prepare phase are
 404                 * released.
 405                 */
 406
 407                if (err != -EOPNOTSUPP)
 408                        switchdev_trans_items_destroy(&trans);
 409
 410                return err;
 411        }
 412
 413        /* Phase II: commit obj add.  This cannot fail as a fault
 414         * of driver/device.  If it does, it's a bug in the driver/device
 415         * because the driver said everythings was OK in phase I.
 416         */
 417
 418        trans.ph_prepare = false;
 419        err = __switchdev_port_obj_add(dev, obj, &trans);
 420        WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id);
 421        switchdev_trans_items_warn_destroy(dev, &trans);
 422
 423        return err;
 424}
 425
 426static void switchdev_port_obj_add_deferred(struct net_device *dev,
 427                                            const void *data)
 428{
 429        const struct switchdev_obj *obj = data;
 430        int err;
 431
 432        err = switchdev_port_obj_add_now(dev, obj);
 433        if (err && err != -EOPNOTSUPP)
 434                netdev_err(dev, "failed (err=%d) to add object (id=%d)\n",
 435                           err, obj->id);
 436        if (obj->complete)
 437                obj->complete(dev, err, obj->complete_priv);
 438}
 439
 440static int switchdev_port_obj_add_defer(struct net_device *dev,
 441                                        const struct switchdev_obj *obj)
 442{
 443        return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
 444                                          switchdev_port_obj_add_deferred);
 445}
 446
 447/**
 448 *      switchdev_port_obj_add - Add port object
 449 *
 450 *      @dev: port device
 451 *      @id: object ID
 452 *      @obj: object to add
 453 *
 454 *      Use a 2-phase prepare-commit transaction model to ensure
 455 *      system is not left in a partially updated state due to
 456 *      failure from driver/device.
 457 *
 458 *      rtnl_lock must be held and must not be in atomic section,
 459 *      in case SWITCHDEV_F_DEFER flag is not set.
 460 */
 461int switchdev_port_obj_add(struct net_device *dev,
 462                           const struct switchdev_obj *obj)
 463{
 464        if (obj->flags & SWITCHDEV_F_DEFER)
 465                return switchdev_port_obj_add_defer(dev, obj);
 466        ASSERT_RTNL();
 467        return switchdev_port_obj_add_now(dev, obj);
 468}
 469EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
 470
 471static int switchdev_port_obj_del_now(struct net_device *dev,
 472                                      const struct switchdev_obj *obj)
 473{
 474        const struct switchdev_ops *ops = dev->switchdev_ops;
 475        struct net_device *lower_dev;
 476        struct list_head *iter;
 477        int err = -EOPNOTSUPP;
 478
 479        if (ops && ops->switchdev_port_obj_del)
 480                return ops->switchdev_port_obj_del(dev, obj);
 481
 482        /* Switch device port(s) may be stacked under
 483         * bond/team/vlan dev, so recurse down to delete object on
 484         * each port.
 485         */
 486
 487        netdev_for_each_lower_dev(dev, lower_dev, iter) {
 488                err = switchdev_port_obj_del_now(lower_dev, obj);
 489                if (err)
 490                        break;
 491        }
 492
 493        return err;
 494}
 495
 496static void switchdev_port_obj_del_deferred(struct net_device *dev,
 497                                            const void *data)
 498{
 499        const struct switchdev_obj *obj = data;
 500        int err;
 501
 502        err = switchdev_port_obj_del_now(dev, obj);
 503        if (err && err != -EOPNOTSUPP)
 504                netdev_err(dev, "failed (err=%d) to del object (id=%d)\n",
 505                           err, obj->id);
 506        if (obj->complete)
 507                obj->complete(dev, err, obj->complete_priv);
 508}
 509
 510static int switchdev_port_obj_del_defer(struct net_device *dev,
 511                                        const struct switchdev_obj *obj)
 512{
 513        return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
 514                                          switchdev_port_obj_del_deferred);
 515}
 516
 517/**
 518 *      switchdev_port_obj_del - Delete port object
 519 *
 520 *      @dev: port device
 521 *      @id: object ID
 522 *      @obj: object to delete
 523 *
 524 *      rtnl_lock must be held and must not be in atomic section,
 525 *      in case SWITCHDEV_F_DEFER flag is not set.
 526 */
 527int switchdev_port_obj_del(struct net_device *dev,
 528                           const struct switchdev_obj *obj)
 529{
 530        if (obj->flags & SWITCHDEV_F_DEFER)
 531                return switchdev_port_obj_del_defer(dev, obj);
 532        ASSERT_RTNL();
 533        return switchdev_port_obj_del_now(dev, obj);
 534}
 535EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
 536
 537static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain);
 538
 539/**
 540 *      register_switchdev_notifier - Register notifier
 541 *      @nb: notifier_block
 542 *
 543 *      Register switch device notifier.
 544 */
 545int register_switchdev_notifier(struct notifier_block *nb)
 546{
 547        return atomic_notifier_chain_register(&switchdev_notif_chain, nb);
 548}
 549EXPORT_SYMBOL_GPL(register_switchdev_notifier);
 550
 551/**
 552 *      unregister_switchdev_notifier - Unregister notifier
 553 *      @nb: notifier_block
 554 *
 555 *      Unregister switch device notifier.
 556 */
 557int unregister_switchdev_notifier(struct notifier_block *nb)
 558{
 559        return atomic_notifier_chain_unregister(&switchdev_notif_chain, nb);
 560}
 561EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
 562
 563/**
 564 *      call_switchdev_notifiers - Call notifiers
 565 *      @val: value passed unmodified to notifier function
 566 *      @dev: port device
 567 *      @info: notifier information data
 568 *
 569 *      Call all network notifier blocks.
 570 */
 571int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
 572                             struct switchdev_notifier_info *info)
 573{
 574        info->dev = dev;
 575        return atomic_notifier_call_chain(&switchdev_notif_chain, val, info);
 576}
 577EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
 578
 579bool switchdev_port_same_parent_id(struct net_device *a,
 580                                   struct net_device *b)
 581{
 582        struct switchdev_attr a_attr = {
 583                .orig_dev = a,
 584                .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
 585        };
 586        struct switchdev_attr b_attr = {
 587                .orig_dev = b,
 588                .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
 589        };
 590
 591        if (switchdev_port_attr_get(a, &a_attr) ||
 592            switchdev_port_attr_get(b, &b_attr))
 593                return false;
 594
 595        return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
 596}
 597EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id);
 598