linux/drivers/infiniband/core/roce_gid_mgmt.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2015, Mellanox Technologies inc.  All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include "core_priv.h"
  34
  35#include <linux/in.h>
  36#include <linux/in6.h>
  37
  38/* For in6_dev_get/in6_dev_put */
  39#include <net/addrconf.h>
  40#include <net/bonding.h>
  41
  42#include <rdma/ib_cache.h>
  43#include <rdma/ib_addr.h>
  44
  45static struct workqueue_struct *gid_cache_wq;
  46
  47enum gid_op_type {
  48        GID_DEL = 0,
  49        GID_ADD
  50};
  51
  52struct update_gid_event_work {
  53        struct work_struct work;
  54        union ib_gid       gid;
  55        struct ib_gid_attr gid_attr;
  56        enum gid_op_type gid_op;
  57};
  58
  59#define ROCE_NETDEV_CALLBACK_SZ         3
  60struct netdev_event_work_cmd {
  61        roce_netdev_callback    cb;
  62        roce_netdev_filter      filter;
  63        struct net_device       *ndev;
  64        struct net_device       *filter_ndev;
  65};
  66
  67struct netdev_event_work {
  68        struct work_struct              work;
  69        struct netdev_event_work_cmd    cmds[ROCE_NETDEV_CALLBACK_SZ];
  70};
  71
  72static const struct {
  73        bool (*is_supported)(const struct ib_device *device, u8 port_num);
  74        enum ib_gid_type gid_type;
  75} PORT_CAP_TO_GID_TYPE[] = {
  76        {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE},
  77        {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP},
  78};
  79
  80#define CAP_TO_GID_TABLE_SIZE   ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
  81
  82unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
  83{
  84        int i;
  85        unsigned int ret_flags = 0;
  86
  87        if (!rdma_protocol_roce(ib_dev, port))
  88                return 1UL << IB_GID_TYPE_IB;
  89
  90        for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++)
  91                if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port))
  92                        ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type;
  93
  94        return ret_flags;
  95}
  96EXPORT_SYMBOL(roce_gid_type_mask_support);
  97
  98static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
  99                       u8 port, union ib_gid *gid,
 100                       struct ib_gid_attr *gid_attr)
 101{
 102        int i;
 103        unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
 104
 105        for (i = 0; i < IB_GID_TYPE_SIZE; i++) {
 106                if ((1UL << i) & gid_type_mask) {
 107                        gid_attr->gid_type = i;
 108                        switch (gid_op) {
 109                        case GID_ADD:
 110                                ib_cache_gid_add(ib_dev, port,
 111                                                 gid, gid_attr);
 112                                break;
 113                        case GID_DEL:
 114                                ib_cache_gid_del(ib_dev, port,
 115                                                 gid, gid_attr);
 116                                break;
 117                        }
 118                }
 119        }
 120}
 121
 122enum bonding_slave_state {
 123        BONDING_SLAVE_STATE_ACTIVE      = 1UL << 0,
 124        BONDING_SLAVE_STATE_INACTIVE    = 1UL << 1,
 125        /* No primary slave or the device isn't a slave in bonding */
 126        BONDING_SLAVE_STATE_NA          = 1UL << 2,
 127};
 128
 129static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_device *dev,
 130                                                                   struct net_device *upper)
 131{
 132        if (upper && netif_is_bond_master(upper)) {
 133                struct net_device *pdev =
 134                        bond_option_active_slave_get_rcu(netdev_priv(upper));
 135
 136                if (pdev)
 137                        return dev == pdev ? BONDING_SLAVE_STATE_ACTIVE :
 138                                BONDING_SLAVE_STATE_INACTIVE;
 139        }
 140
 141        return BONDING_SLAVE_STATE_NA;
 142}
 143
 144#define REQUIRED_BOND_STATES            (BONDING_SLAVE_STATE_ACTIVE |   \
 145                                         BONDING_SLAVE_STATE_NA)
 146static bool
 147is_eth_port_of_netdev_filter(struct ib_device *ib_dev, u8 port,
 148                             struct net_device *rdma_ndev, void *cookie)
 149{
 150        struct net_device *real_dev;
 151        bool res;
 152
 153        if (!rdma_ndev)
 154                return false;
 155
 156        rcu_read_lock();
 157        real_dev = rdma_vlan_dev_real_dev(cookie);
 158        if (!real_dev)
 159                real_dev = cookie;
 160
 161        res = ((rdma_is_upper_dev_rcu(rdma_ndev, cookie) &&
 162               (is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) &
 163                REQUIRED_BOND_STATES)) ||
 164               real_dev == rdma_ndev);
 165
 166        rcu_read_unlock();
 167        return res;
 168}
 169
 170static bool
 171is_eth_port_inactive_slave_filter(struct ib_device *ib_dev, u8 port,
 172                                  struct net_device *rdma_ndev, void *cookie)
 173{
 174        struct net_device *master_dev;
 175        bool res;
 176
 177        if (!rdma_ndev)
 178                return false;
 179
 180        rcu_read_lock();
 181        master_dev = netdev_master_upper_dev_get_rcu(rdma_ndev);
 182        res = is_eth_active_slave_of_bonding_rcu(rdma_ndev, master_dev) ==
 183                BONDING_SLAVE_STATE_INACTIVE;
 184        rcu_read_unlock();
 185
 186        return res;
 187}
 188
 189/** is_ndev_for_default_gid_filter - Check if a given netdevice
 190 * can be considered for default GIDs or not.
 191 * @ib_dev:             IB device to check
 192 * @port:               Port to consider for adding default GID
 193 * @rdma_ndev:          rdma netdevice pointer
 194 * @cookie_ndev:        Netdevice to consider to form a default GID
 195 *
 196 * is_ndev_for_default_gid_filter() returns true if a given netdevice can be
 197 * considered for deriving default RoCE GID, returns false otherwise.
 198 */
 199static bool
 200is_ndev_for_default_gid_filter(struct ib_device *ib_dev, u8 port,
 201                               struct net_device *rdma_ndev, void *cookie)
 202{
 203        struct net_device *cookie_ndev = cookie;
 204        bool res;
 205
 206        if (!rdma_ndev)
 207                return false;
 208
 209        rcu_read_lock();
 210
 211        /*
 212         * When rdma netdevice is used in bonding, bonding master netdevice
 213         * should be considered for default GIDs. Therefore, ignore slave rdma
 214         * netdevices when bonding is considered.
 215         * Additionally when event(cookie) netdevice is bond master device,
 216         * make sure that it the upper netdevice of rdma netdevice.
 217         */
 218        res = ((cookie_ndev == rdma_ndev && !netif_is_bond_slave(rdma_ndev)) ||
 219               (netif_is_bond_master(cookie_ndev) &&
 220                rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev)));
 221
 222        rcu_read_unlock();
 223        return res;
 224}
 225
 226static bool pass_all_filter(struct ib_device *ib_dev, u8 port,
 227                            struct net_device *rdma_ndev, void *cookie)
 228{
 229        return true;
 230}
 231
 232static bool upper_device_filter(struct ib_device *ib_dev, u8 port,
 233                                struct net_device *rdma_ndev, void *cookie)
 234{
 235        bool res;
 236
 237        if (!rdma_ndev)
 238                return false;
 239
 240        if (rdma_ndev == cookie)
 241                return true;
 242
 243        rcu_read_lock();
 244        res = rdma_is_upper_dev_rcu(rdma_ndev, cookie);
 245        rcu_read_unlock();
 246
 247        return res;
 248}
 249
 250/**
 251 * is_upper_ndev_bond_master_filter - Check if a given netdevice
 252 * is bond master device of netdevice of the the RDMA device of port.
 253 * @ib_dev:             IB device to check
 254 * @port:               Port to consider for adding default GID
 255 * @rdma_ndev:          Pointer to rdma netdevice
 256 * @cookie:             Netdevice to consider to form a default GID
 257 *
 258 * is_upper_ndev_bond_master_filter() returns true if a cookie_netdev
 259 * is bond master device and rdma_ndev is its lower netdevice. It might
 260 * not have been established as slave device yet.
 261 */
 262static bool
 263is_upper_ndev_bond_master_filter(struct ib_device *ib_dev, u8 port,
 264                                 struct net_device *rdma_ndev,
 265                                 void *cookie)
 266{
 267        struct net_device *cookie_ndev = cookie;
 268        bool match = false;
 269
 270        if (!rdma_ndev)
 271                return false;
 272
 273        rcu_read_lock();
 274        if (netif_is_bond_master(cookie_ndev) &&
 275            rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev))
 276                match = true;
 277        rcu_read_unlock();
 278        return match;
 279}
 280
 281static void update_gid_ip(enum gid_op_type gid_op,
 282                          struct ib_device *ib_dev,
 283                          u8 port, struct net_device *ndev,
 284                          struct sockaddr *addr)
 285{
 286        union ib_gid gid;
 287        struct ib_gid_attr gid_attr;
 288
 289        rdma_ip2gid(addr, &gid);
 290        memset(&gid_attr, 0, sizeof(gid_attr));
 291        gid_attr.ndev = ndev;
 292
 293        update_gid(gid_op, ib_dev, port, &gid, &gid_attr);
 294}
 295
 296static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
 297                                            u8 port,
 298                                            struct net_device *rdma_ndev,
 299                                            struct net_device *event_ndev)
 300{
 301        struct net_device *real_dev = rdma_vlan_dev_real_dev(event_ndev);
 302        unsigned long gid_type_mask;
 303
 304        if (!rdma_ndev)
 305                return;
 306
 307        if (!real_dev)
 308                real_dev = event_ndev;
 309
 310        rcu_read_lock();
 311
 312        if (((rdma_ndev != event_ndev &&
 313              !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
 314             is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev)
 315                                                 ==
 316             BONDING_SLAVE_STATE_INACTIVE)) {
 317                rcu_read_unlock();
 318                return;
 319        }
 320
 321        rcu_read_unlock();
 322
 323        gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
 324
 325        ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
 326                                     gid_type_mask,
 327                                     IB_CACHE_GID_DEFAULT_MODE_DELETE);
 328}
 329
 330static void enum_netdev_ipv4_ips(struct ib_device *ib_dev,
 331                                 u8 port, struct net_device *ndev)
 332{
 333        const struct in_ifaddr *ifa;
 334        struct in_device *in_dev;
 335        struct sin_list {
 336                struct list_head        list;
 337                struct sockaddr_in      ip;
 338        };
 339        struct sin_list *sin_iter;
 340        struct sin_list *sin_temp;
 341
 342        LIST_HEAD(sin_list);
 343        if (ndev->reg_state >= NETREG_UNREGISTERING)
 344                return;
 345
 346        rcu_read_lock();
 347        in_dev = __in_dev_get_rcu(ndev);
 348        if (!in_dev) {
 349                rcu_read_unlock();
 350                return;
 351        }
 352
 353        in_dev_for_each_ifa_rcu(ifa, in_dev) {
 354                struct sin_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
 355
 356                if (!entry)
 357                        continue;
 358
 359                entry->ip.sin_family = AF_INET;
 360                entry->ip.sin_addr.s_addr = ifa->ifa_address;
 361                list_add_tail(&entry->list, &sin_list);
 362        }
 363
 364        rcu_read_unlock();
 365
 366        list_for_each_entry_safe(sin_iter, sin_temp, &sin_list, list) {
 367                update_gid_ip(GID_ADD, ib_dev, port, ndev,
 368                              (struct sockaddr *)&sin_iter->ip);
 369                list_del(&sin_iter->list);
 370                kfree(sin_iter);
 371        }
 372}
 373
 374static void enum_netdev_ipv6_ips(struct ib_device *ib_dev,
 375                                 u8 port, struct net_device *ndev)
 376{
 377        struct inet6_ifaddr *ifp;
 378        struct inet6_dev *in6_dev;
 379        struct sin6_list {
 380                struct list_head        list;
 381                struct sockaddr_in6     sin6;
 382        };
 383        struct sin6_list *sin6_iter;
 384        struct sin6_list *sin6_temp;
 385        struct ib_gid_attr gid_attr = {.ndev = ndev};
 386        LIST_HEAD(sin6_list);
 387
 388        if (ndev->reg_state >= NETREG_UNREGISTERING)
 389                return;
 390
 391        in6_dev = in6_dev_get(ndev);
 392        if (!in6_dev)
 393                return;
 394
 395        read_lock_bh(&in6_dev->lock);
 396        list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
 397                struct sin6_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
 398
 399                if (!entry)
 400                        continue;
 401
 402                entry->sin6.sin6_family = AF_INET6;
 403                entry->sin6.sin6_addr = ifp->addr;
 404                list_add_tail(&entry->list, &sin6_list);
 405        }
 406        read_unlock_bh(&in6_dev->lock);
 407
 408        in6_dev_put(in6_dev);
 409
 410        list_for_each_entry_safe(sin6_iter, sin6_temp, &sin6_list, list) {
 411                union ib_gid    gid;
 412
 413                rdma_ip2gid((struct sockaddr *)&sin6_iter->sin6, &gid);
 414                update_gid(GID_ADD, ib_dev, port, &gid, &gid_attr);
 415                list_del(&sin6_iter->list);
 416                kfree(sin6_iter);
 417        }
 418}
 419
 420static void _add_netdev_ips(struct ib_device *ib_dev, u8 port,
 421                            struct net_device *ndev)
 422{
 423        enum_netdev_ipv4_ips(ib_dev, port, ndev);
 424        if (IS_ENABLED(CONFIG_IPV6))
 425                enum_netdev_ipv6_ips(ib_dev, port, ndev);
 426}
 427
 428static void add_netdev_ips(struct ib_device *ib_dev, u8 port,
 429                           struct net_device *rdma_ndev, void *cookie)
 430{
 431        _add_netdev_ips(ib_dev, port, cookie);
 432}
 433
 434static void del_netdev_ips(struct ib_device *ib_dev, u8 port,
 435                           struct net_device *rdma_ndev, void *cookie)
 436{
 437        ib_cache_gid_del_all_netdev_gids(ib_dev, port, cookie);
 438}
 439
 440/**
 441 * del_default_gids - Delete default GIDs of the event/cookie netdevice
 442 * @ib_dev:     RDMA device pointer
 443 * @port:       Port of the RDMA device whose GID table to consider
 444 * @rdma_ndev:  Unused rdma netdevice
 445 * @cookie:     Pointer to event netdevice
 446 *
 447 * del_default_gids() deletes the default GIDs of the event/cookie netdevice.
 448 */
 449static void del_default_gids(struct ib_device *ib_dev, u8 port,
 450                             struct net_device *rdma_ndev, void *cookie)
 451{
 452        struct net_device *cookie_ndev = cookie;
 453        unsigned long gid_type_mask;
 454
 455        gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
 456
 457        ib_cache_gid_set_default_gid(ib_dev, port, cookie_ndev, gid_type_mask,
 458                                     IB_CACHE_GID_DEFAULT_MODE_DELETE);
 459}
 460
 461static void add_default_gids(struct ib_device *ib_dev, u8 port,
 462                             struct net_device *rdma_ndev, void *cookie)
 463{
 464        struct net_device *event_ndev = cookie;
 465        unsigned long gid_type_mask;
 466
 467        gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
 468        ib_cache_gid_set_default_gid(ib_dev, port, event_ndev, gid_type_mask,
 469                                     IB_CACHE_GID_DEFAULT_MODE_SET);
 470}
 471
 472static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
 473                                    u8 port,
 474                                    struct net_device *rdma_ndev,
 475                                    void *cookie)
 476{
 477        struct net *net;
 478        struct net_device *ndev;
 479
 480        /* Lock the rtnl to make sure the netdevs does not move under
 481         * our feet
 482         */
 483        rtnl_lock();
 484        down_read(&net_rwsem);
 485        for_each_net(net)
 486                for_each_netdev(net, ndev) {
 487                        /*
 488                         * Filter and add default GIDs of the primary netdevice
 489                         * when not in bonding mode, or add default GIDs
 490                         * of bond master device, when in bonding mode.
 491                         */
 492                        if (is_ndev_for_default_gid_filter(ib_dev, port,
 493                                                           rdma_ndev, ndev))
 494                                add_default_gids(ib_dev, port, rdma_ndev, ndev);
 495
 496                        if (is_eth_port_of_netdev_filter(ib_dev, port,
 497                                                         rdma_ndev, ndev))
 498                                _add_netdev_ips(ib_dev, port, ndev);
 499                }
 500        up_read(&net_rwsem);
 501        rtnl_unlock();
 502}
 503
 504/**
 505 * rdma_roce_rescan_device - Rescan all of the network devices in the system
 506 * and add their gids, as needed, to the relevant RoCE devices.
 507 *
 508 * @device:         the rdma device
 509 */
 510void rdma_roce_rescan_device(struct ib_device *ib_dev)
 511{
 512        ib_enum_roce_netdev(ib_dev, pass_all_filter, NULL,
 513                            enum_all_gids_of_dev_cb, NULL);
 514}
 515EXPORT_SYMBOL(rdma_roce_rescan_device);
 516
 517static void callback_for_addr_gid_device_scan(struct ib_device *device,
 518                                              u8 port,
 519                                              struct net_device *rdma_ndev,
 520                                              void *cookie)
 521{
 522        struct update_gid_event_work *parsed = cookie;
 523
 524        return update_gid(parsed->gid_op, device,
 525                          port, &parsed->gid,
 526                          &parsed->gid_attr);
 527}
 528
 529struct upper_list {
 530        struct list_head list;
 531        struct net_device *upper;
 532};
 533
 534static int netdev_upper_walk(struct net_device *upper, void *data)
 535{
 536        struct upper_list *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
 537        struct list_head *upper_list = data;
 538
 539        if (!entry)
 540                return 0;
 541
 542        list_add_tail(&entry->list, upper_list);
 543        dev_hold(upper);
 544        entry->upper = upper;
 545
 546        return 0;
 547}
 548
 549static void handle_netdev_upper(struct ib_device *ib_dev, u8 port,
 550                                void *cookie,
 551                                void (*handle_netdev)(struct ib_device *ib_dev,
 552                                                      u8 port,
 553                                                      struct net_device *ndev))
 554{
 555        struct net_device *ndev = cookie;
 556        struct upper_list *upper_iter;
 557        struct upper_list *upper_temp;
 558        LIST_HEAD(upper_list);
 559
 560        rcu_read_lock();
 561        netdev_walk_all_upper_dev_rcu(ndev, netdev_upper_walk, &upper_list);
 562        rcu_read_unlock();
 563
 564        handle_netdev(ib_dev, port, ndev);
 565        list_for_each_entry_safe(upper_iter, upper_temp, &upper_list,
 566                                 list) {
 567                handle_netdev(ib_dev, port, upper_iter->upper);
 568                dev_put(upper_iter->upper);
 569                list_del(&upper_iter->list);
 570                kfree(upper_iter);
 571        }
 572}
 573
 574static void _roce_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
 575                                      struct net_device *event_ndev)
 576{
 577        ib_cache_gid_del_all_netdev_gids(ib_dev, port, event_ndev);
 578}
 579
 580static void del_netdev_upper_ips(struct ib_device *ib_dev, u8 port,
 581                                 struct net_device *rdma_ndev, void *cookie)
 582{
 583        handle_netdev_upper(ib_dev, port, cookie, _roce_del_all_netdev_gids);
 584}
 585
 586static void add_netdev_upper_ips(struct ib_device *ib_dev, u8 port,
 587                                 struct net_device *rdma_ndev, void *cookie)
 588{
 589        handle_netdev_upper(ib_dev, port, cookie, _add_netdev_ips);
 590}
 591
 592static void del_netdev_default_ips_join(struct ib_device *ib_dev, u8 port,
 593                                        struct net_device *rdma_ndev,
 594                                        void *cookie)
 595{
 596        struct net_device *master_ndev;
 597
 598        rcu_read_lock();
 599        master_ndev = netdev_master_upper_dev_get_rcu(rdma_ndev);
 600        if (master_ndev)
 601                dev_hold(master_ndev);
 602        rcu_read_unlock();
 603
 604        if (master_ndev) {
 605                bond_delete_netdev_default_gids(ib_dev, port, rdma_ndev,
 606                                                master_ndev);
 607                dev_put(master_ndev);
 608        }
 609}
 610
 611/* The following functions operate on all IB devices. netdevice_event and
 612 * addr_event execute ib_enum_all_roce_netdevs through a work.
 613 * ib_enum_all_roce_netdevs iterates through all IB devices.
 614 */
 615
 616static void netdevice_event_work_handler(struct work_struct *_work)
 617{
 618        struct netdev_event_work *work =
 619                container_of(_work, struct netdev_event_work, work);
 620        unsigned int i;
 621
 622        for (i = 0; i < ARRAY_SIZE(work->cmds) && work->cmds[i].cb; i++) {
 623                ib_enum_all_roce_netdevs(work->cmds[i].filter,
 624                                         work->cmds[i].filter_ndev,
 625                                         work->cmds[i].cb,
 626                                         work->cmds[i].ndev);
 627                dev_put(work->cmds[i].ndev);
 628                dev_put(work->cmds[i].filter_ndev);
 629        }
 630
 631        kfree(work);
 632}
 633
 634static int netdevice_queue_work(struct netdev_event_work_cmd *cmds,
 635                                struct net_device *ndev)
 636{
 637        unsigned int i;
 638        struct netdev_event_work *ndev_work =
 639                kmalloc(sizeof(*ndev_work), GFP_KERNEL);
 640
 641        if (!ndev_work)
 642                return NOTIFY_DONE;
 643
 644        memcpy(ndev_work->cmds, cmds, sizeof(ndev_work->cmds));
 645        for (i = 0; i < ARRAY_SIZE(ndev_work->cmds) && ndev_work->cmds[i].cb; i++) {
 646                if (!ndev_work->cmds[i].ndev)
 647                        ndev_work->cmds[i].ndev = ndev;
 648                if (!ndev_work->cmds[i].filter_ndev)
 649                        ndev_work->cmds[i].filter_ndev = ndev;
 650                dev_hold(ndev_work->cmds[i].ndev);
 651                dev_hold(ndev_work->cmds[i].filter_ndev);
 652        }
 653        INIT_WORK(&ndev_work->work, netdevice_event_work_handler);
 654
 655        queue_work(gid_cache_wq, &ndev_work->work);
 656
 657        return NOTIFY_DONE;
 658}
 659
 660static const struct netdev_event_work_cmd add_cmd = {
 661        .cb     = add_netdev_ips,
 662        .filter = is_eth_port_of_netdev_filter
 663};
 664
 665static const struct netdev_event_work_cmd add_cmd_upper_ips = {
 666        .cb     = add_netdev_upper_ips,
 667        .filter = is_eth_port_of_netdev_filter
 668};
 669
 670static void
 671ndev_event_unlink(struct netdev_notifier_changeupper_info *changeupper_info,
 672                  struct netdev_event_work_cmd *cmds)
 673{
 674        static const struct netdev_event_work_cmd
 675                        upper_ips_del_cmd = {
 676                                .cb     = del_netdev_upper_ips,
 677                                .filter = upper_device_filter
 678        };
 679
 680        cmds[0] = upper_ips_del_cmd;
 681        cmds[0].ndev = changeupper_info->upper_dev;
 682        cmds[1] = add_cmd;
 683}
 684
 685static const struct netdev_event_work_cmd bonding_default_add_cmd = {
 686        .cb     = add_default_gids,
 687        .filter = is_upper_ndev_bond_master_filter
 688};
 689
 690static void
 691ndev_event_link(struct net_device *event_ndev,
 692                struct netdev_notifier_changeupper_info *changeupper_info,
 693                struct netdev_event_work_cmd *cmds)
 694{
 695        static const struct netdev_event_work_cmd
 696                        bonding_default_del_cmd = {
 697                                .cb     = del_default_gids,
 698                                .filter = is_upper_ndev_bond_master_filter
 699                        };
 700        /*
 701         * When a lower netdev is linked to its upper bonding
 702         * netdev, delete lower slave netdev's default GIDs.
 703         */
 704        cmds[0] = bonding_default_del_cmd;
 705        cmds[0].ndev = event_ndev;
 706        cmds[0].filter_ndev = changeupper_info->upper_dev;
 707
 708        /* Now add bonding upper device default GIDs */
 709        cmds[1] = bonding_default_add_cmd;
 710        cmds[1].ndev = changeupper_info->upper_dev;
 711        cmds[1].filter_ndev = changeupper_info->upper_dev;
 712
 713        /* Now add bonding upper device IP based GIDs */
 714        cmds[2] = add_cmd_upper_ips;
 715        cmds[2].ndev = changeupper_info->upper_dev;
 716        cmds[2].filter_ndev = changeupper_info->upper_dev;
 717}
 718
 719static void netdevice_event_changeupper(struct net_device *event_ndev,
 720                struct netdev_notifier_changeupper_info *changeupper_info,
 721                struct netdev_event_work_cmd *cmds)
 722{
 723        if (changeupper_info->linking)
 724                ndev_event_link(event_ndev, changeupper_info, cmds);
 725        else
 726                ndev_event_unlink(changeupper_info, cmds);
 727}
 728
 729static const struct netdev_event_work_cmd add_default_gid_cmd = {
 730        .cb     = add_default_gids,
 731        .filter = is_ndev_for_default_gid_filter,
 732};
 733
 734static int netdevice_event(struct notifier_block *this, unsigned long event,
 735                           void *ptr)
 736{
 737        static const struct netdev_event_work_cmd del_cmd = {
 738                .cb = del_netdev_ips, .filter = pass_all_filter};
 739        static const struct netdev_event_work_cmd
 740                        bonding_default_del_cmd_join = {
 741                                .cb     = del_netdev_default_ips_join,
 742                                .filter = is_eth_port_inactive_slave_filter
 743                        };
 744        static const struct netdev_event_work_cmd
 745                        netdev_del_cmd = {
 746                                .cb     = del_netdev_ips,
 747                                .filter = is_eth_port_of_netdev_filter
 748                        };
 749        static const struct netdev_event_work_cmd bonding_event_ips_del_cmd = {
 750                .cb = del_netdev_upper_ips, .filter = upper_device_filter};
 751        struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
 752        struct netdev_event_work_cmd cmds[ROCE_NETDEV_CALLBACK_SZ] = { {NULL} };
 753
 754        if (ndev->type != ARPHRD_ETHER)
 755                return NOTIFY_DONE;
 756
 757        switch (event) {
 758        case NETDEV_REGISTER:
 759        case NETDEV_UP:
 760                cmds[0] = bonding_default_del_cmd_join;
 761                cmds[1] = add_default_gid_cmd;
 762                cmds[2] = add_cmd;
 763                break;
 764
 765        case NETDEV_UNREGISTER:
 766                if (ndev->reg_state < NETREG_UNREGISTERED)
 767                        cmds[0] = del_cmd;
 768                else
 769                        return NOTIFY_DONE;
 770                break;
 771
 772        case NETDEV_CHANGEADDR:
 773                cmds[0] = netdev_del_cmd;
 774                if (ndev->reg_state == NETREG_REGISTERED) {
 775                        cmds[1] = add_default_gid_cmd;
 776                        cmds[2] = add_cmd;
 777                }
 778                break;
 779
 780        case NETDEV_CHANGEUPPER:
 781                netdevice_event_changeupper(ndev,
 782                        container_of(ptr, struct netdev_notifier_changeupper_info, info),
 783                        cmds);
 784                break;
 785
 786        case NETDEV_BONDING_FAILOVER:
 787                cmds[0] = bonding_event_ips_del_cmd;
 788                /* Add default GIDs of the bond device */
 789                cmds[1] = bonding_default_add_cmd;
 790                /* Add IP based GIDs of the bond device */
 791                cmds[2] = add_cmd_upper_ips;
 792                break;
 793
 794        default:
 795                return NOTIFY_DONE;
 796        }
 797
 798        return netdevice_queue_work(cmds, ndev);
 799}
 800
 801static void update_gid_event_work_handler(struct work_struct *_work)
 802{
 803        struct update_gid_event_work *work =
 804                container_of(_work, struct update_gid_event_work, work);
 805
 806        ib_enum_all_roce_netdevs(is_eth_port_of_netdev_filter,
 807                                 work->gid_attr.ndev,
 808                                 callback_for_addr_gid_device_scan, work);
 809
 810        dev_put(work->gid_attr.ndev);
 811        kfree(work);
 812}
 813
 814static int addr_event(struct notifier_block *this, unsigned long event,
 815                      struct sockaddr *sa, struct net_device *ndev)
 816{
 817        struct update_gid_event_work *work;
 818        enum gid_op_type gid_op;
 819
 820        if (ndev->type != ARPHRD_ETHER)
 821                return NOTIFY_DONE;
 822
 823        switch (event) {
 824        case NETDEV_UP:
 825                gid_op = GID_ADD;
 826                break;
 827
 828        case NETDEV_DOWN:
 829                gid_op = GID_DEL;
 830                break;
 831
 832        default:
 833                return NOTIFY_DONE;
 834        }
 835
 836        work = kmalloc(sizeof(*work), GFP_ATOMIC);
 837        if (!work)
 838                return NOTIFY_DONE;
 839
 840        INIT_WORK(&work->work, update_gid_event_work_handler);
 841
 842        rdma_ip2gid(sa, &work->gid);
 843        work->gid_op = gid_op;
 844
 845        memset(&work->gid_attr, 0, sizeof(work->gid_attr));
 846        dev_hold(ndev);
 847        work->gid_attr.ndev   = ndev;
 848
 849        queue_work(gid_cache_wq, &work->work);
 850
 851        return NOTIFY_DONE;
 852}
 853
 854static int inetaddr_event(struct notifier_block *this, unsigned long event,
 855                          void *ptr)
 856{
 857        struct sockaddr_in      in;
 858        struct net_device       *ndev;
 859        struct in_ifaddr        *ifa = ptr;
 860
 861        in.sin_family = AF_INET;
 862        in.sin_addr.s_addr = ifa->ifa_address;
 863        ndev = ifa->ifa_dev->dev;
 864
 865        return addr_event(this, event, (struct sockaddr *)&in, ndev);
 866}
 867
 868static int inet6addr_event(struct notifier_block *this, unsigned long event,
 869                           void *ptr)
 870{
 871        struct sockaddr_in6     in6;
 872        struct net_device       *ndev;
 873        struct inet6_ifaddr     *ifa6 = ptr;
 874
 875        in6.sin6_family = AF_INET6;
 876        in6.sin6_addr = ifa6->addr;
 877        ndev = ifa6->idev->dev;
 878
 879        return addr_event(this, event, (struct sockaddr *)&in6, ndev);
 880}
 881
 882static struct notifier_block nb_netdevice = {
 883        .notifier_call = netdevice_event
 884};
 885
 886static struct notifier_block nb_inetaddr = {
 887        .notifier_call = inetaddr_event
 888};
 889
 890static struct notifier_block nb_inet6addr = {
 891        .notifier_call = inet6addr_event
 892};
 893
 894int __init roce_gid_mgmt_init(void)
 895{
 896        gid_cache_wq = alloc_ordered_workqueue("gid-cache-wq", 0);
 897        if (!gid_cache_wq)
 898                return -ENOMEM;
 899
 900        register_inetaddr_notifier(&nb_inetaddr);
 901        if (IS_ENABLED(CONFIG_IPV6))
 902                register_inet6addr_notifier(&nb_inet6addr);
 903        /* We relay on the netdevice notifier to enumerate all
 904         * existing devices in the system. Register to this notifier
 905         * last to make sure we will not miss any IP add/del
 906         * callbacks.
 907         */
 908        register_netdevice_notifier(&nb_netdevice);
 909
 910        return 0;
 911}
 912
 913void __exit roce_gid_mgmt_cleanup(void)
 914{
 915        if (IS_ENABLED(CONFIG_IPV6))
 916                unregister_inet6addr_notifier(&nb_inet6addr);
 917        unregister_inetaddr_notifier(&nb_inetaddr);
 918        unregister_netdevice_notifier(&nb_netdevice);
 919        /* Ensure all gid deletion tasks complete before we go down,
 920         * to avoid any reference to free'd memory. By the time
 921         * ib-core is removed, all physical devices have been removed,
 922         * so no issue with remaining hardware contexts.
 923         */
 924        destroy_workqueue(gid_cache_wq);
 925}
 926