linux/drivers/infiniband/core/roce_gid_mgmt.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2015, Mellanox Technologies inc.  All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include "core_priv.h"
  34
  35#include <linux/in.h>
  36#include <linux/in6.h>
  37
  38/* For in6_dev_get/in6_dev_put */
  39#include <net/addrconf.h>
  40#include <net/bonding.h>
  41
  42#include <rdma/ib_cache.h>
  43#include <rdma/ib_addr.h>
  44
  45static struct workqueue_struct *gid_cache_wq;
  46
  47enum gid_op_type {
  48        GID_DEL = 0,
  49        GID_ADD
  50};
  51
  52struct update_gid_event_work {
  53        struct work_struct work;
  54        union ib_gid       gid;
  55        struct ib_gid_attr gid_attr;
  56        enum gid_op_type gid_op;
  57};
  58
  59#define ROCE_NETDEV_CALLBACK_SZ         3
  60struct netdev_event_work_cmd {
  61        roce_netdev_callback    cb;
  62        roce_netdev_filter      filter;
  63        struct net_device       *ndev;
  64        struct net_device       *filter_ndev;
  65};
  66
  67struct netdev_event_work {
  68        struct work_struct              work;
  69        struct netdev_event_work_cmd    cmds[ROCE_NETDEV_CALLBACK_SZ];
  70};
  71
  72static const struct {
  73        bool (*is_supported)(const struct ib_device *device, u8 port_num);
  74        enum ib_gid_type gid_type;
  75} PORT_CAP_TO_GID_TYPE[] = {
  76        {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE},
  77        {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP},
  78};
  79
  80#define CAP_TO_GID_TABLE_SIZE   ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
  81
  82unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
  83{
  84        int i;
  85        unsigned int ret_flags = 0;
  86
  87        if (!rdma_protocol_roce(ib_dev, port))
  88                return 1UL << IB_GID_TYPE_IB;
  89
  90        for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++)
  91                if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port))
  92                        ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type;
  93
  94        return ret_flags;
  95}
  96EXPORT_SYMBOL(roce_gid_type_mask_support);
  97
  98static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
  99                       u8 port, union ib_gid *gid,
 100                       struct ib_gid_attr *gid_attr)
 101{
 102        int i;
 103        unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
 104
 105        for (i = 0; i < IB_GID_TYPE_SIZE; i++) {
 106                if ((1UL << i) & gid_type_mask) {
 107                        gid_attr->gid_type = i;
 108                        switch (gid_op) {
 109                        case GID_ADD:
 110                                ib_cache_gid_add(ib_dev, port,
 111                                                 gid, gid_attr);
 112                                break;
 113                        case GID_DEL:
 114                                ib_cache_gid_del(ib_dev, port,
 115                                                 gid, gid_attr);
 116                                break;
 117                        }
 118                }
 119        }
 120}
 121
 122enum bonding_slave_state {
 123        BONDING_SLAVE_STATE_ACTIVE      = 1UL << 0,
 124        BONDING_SLAVE_STATE_INACTIVE    = 1UL << 1,
 125        /* No primary slave or the device isn't a slave in bonding */
 126        BONDING_SLAVE_STATE_NA          = 1UL << 2,
 127};
 128
 129static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_device *dev,
 130                                                                   struct net_device *upper)
 131{
 132        if (upper && netif_is_bond_master(upper)) {
 133                struct net_device *pdev =
 134                        bond_option_active_slave_get_rcu(netdev_priv(upper));
 135
 136                if (pdev)
 137                        return dev == pdev ? BONDING_SLAVE_STATE_ACTIVE :
 138                                BONDING_SLAVE_STATE_INACTIVE;
 139        }
 140
 141        return BONDING_SLAVE_STATE_NA;
 142}
 143
 144#define REQUIRED_BOND_STATES            (BONDING_SLAVE_STATE_ACTIVE |   \
 145                                         BONDING_SLAVE_STATE_NA)
 146static bool
 147is_eth_port_of_netdev_filter(struct ib_device *ib_dev, u8 port,
 148                             struct net_device *rdma_ndev, void *cookie)
 149{
 150        struct net_device *real_dev;
 151        bool res;
 152
 153        if (!rdma_ndev)
 154                return false;
 155
 156        rcu_read_lock();
 157        real_dev = rdma_vlan_dev_real_dev(cookie);
 158        if (!real_dev)
 159                real_dev = cookie;
 160
 161        res = ((rdma_is_upper_dev_rcu(rdma_ndev, cookie) &&
 162               (is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) &
 163                REQUIRED_BOND_STATES)) ||
 164               real_dev == rdma_ndev);
 165
 166        rcu_read_unlock();
 167        return res;
 168}
 169
 170static bool
 171is_eth_port_inactive_slave_filter(struct ib_device *ib_dev, u8 port,
 172                                  struct net_device *rdma_ndev, void *cookie)
 173{
 174        struct net_device *master_dev;
 175        bool res;
 176
 177        if (!rdma_ndev)
 178                return false;
 179
 180        rcu_read_lock();
 181        master_dev = netdev_master_upper_dev_get_rcu(rdma_ndev);
 182        res = is_eth_active_slave_of_bonding_rcu(rdma_ndev, master_dev) ==
 183                BONDING_SLAVE_STATE_INACTIVE;
 184        rcu_read_unlock();
 185
 186        return res;
 187}
 188
 189/** is_ndev_for_default_gid_filter - Check if a given netdevice
 190 * can be considered for default GIDs or not.
 191 * @ib_dev:             IB device to check
 192 * @port:               Port to consider for adding default GID
 193 * @rdma_ndev:          rdma netdevice pointer
 194 * @cookie_ndev:        Netdevice to consider to form a default GID
 195 *
 196 * is_ndev_for_default_gid_filter() returns true if a given netdevice can be
 197 * considered for deriving default RoCE GID, returns false otherwise.
 198 */
 199static bool
 200is_ndev_for_default_gid_filter(struct ib_device *ib_dev, u8 port,
 201                               struct net_device *rdma_ndev, void *cookie)
 202{
 203        struct net_device *cookie_ndev = cookie;
 204        bool res;
 205
 206        if (!rdma_ndev)
 207                return false;
 208
 209        rcu_read_lock();
 210
 211        /*
 212         * When rdma netdevice is used in bonding, bonding master netdevice
 213         * should be considered for default GIDs. Therefore, ignore slave rdma
 214         * netdevices when bonding is considered.
 215         * Additionally when event(cookie) netdevice is bond master device,
 216         * make sure that it the upper netdevice of rdma netdevice.
 217         */
 218        res = ((cookie_ndev == rdma_ndev && !netif_is_bond_slave(rdma_ndev)) ||
 219               (netif_is_bond_master(cookie_ndev) &&
 220                rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev)));
 221
 222        rcu_read_unlock();
 223        return res;
 224}
 225
 226static bool pass_all_filter(struct ib_device *ib_dev, u8 port,
 227                            struct net_device *rdma_ndev, void *cookie)
 228{
 229        return true;
 230}
 231
 232static bool upper_device_filter(struct ib_device *ib_dev, u8 port,
 233                                struct net_device *rdma_ndev, void *cookie)
 234{
 235        bool res;
 236
 237        if (!rdma_ndev)
 238                return false;
 239
 240        if (rdma_ndev == cookie)
 241                return true;
 242
 243        rcu_read_lock();
 244        res = rdma_is_upper_dev_rcu(rdma_ndev, cookie);
 245        rcu_read_unlock();
 246
 247        return res;
 248}
 249
 250/**
 251 * is_upper_ndev_bond_master_filter - Check if a given netdevice
 252 * is bond master device of netdevice of the the RDMA device of port.
 253 * @ib_dev:             IB device to check
 254 * @port:               Port to consider for adding default GID
 255 * @rdma_ndev:          Pointer to rdma netdevice
 256 * @cookie:             Netdevice to consider to form a default GID
 257 *
 258 * is_upper_ndev_bond_master_filter() returns true if a cookie_netdev
 259 * is bond master device and rdma_ndev is its lower netdevice. It might
 260 * not have been established as slave device yet.
 261 */
 262static bool
 263is_upper_ndev_bond_master_filter(struct ib_device *ib_dev, u8 port,
 264                                 struct net_device *rdma_ndev,
 265                                 void *cookie)
 266{
 267        struct net_device *cookie_ndev = cookie;
 268        bool match = false;
 269
 270        if (!rdma_ndev)
 271                return false;
 272
 273        rcu_read_lock();
 274        if (netif_is_bond_master(cookie_ndev) &&
 275            rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev))
 276                match = true;
 277        rcu_read_unlock();
 278        return match;
 279}
 280
 281static void update_gid_ip(enum gid_op_type gid_op,
 282                          struct ib_device *ib_dev,
 283                          u8 port, struct net_device *ndev,
 284                          struct sockaddr *addr)
 285{
 286        union ib_gid gid;
 287        struct ib_gid_attr gid_attr;
 288
 289        rdma_ip2gid(addr, &gid);
 290        memset(&gid_attr, 0, sizeof(gid_attr));
 291        gid_attr.ndev = ndev;
 292
 293        update_gid(gid_op, ib_dev, port, &gid, &gid_attr);
 294}
 295
 296static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
 297                                            u8 port,
 298                                            struct net_device *rdma_ndev,
 299                                            struct net_device *event_ndev)
 300{
 301        struct net_device *real_dev = rdma_vlan_dev_real_dev(event_ndev);
 302        unsigned long gid_type_mask;
 303
 304        if (!rdma_ndev)
 305                return;
 306
 307        if (!real_dev)
 308                real_dev = event_ndev;
 309
 310        rcu_read_lock();
 311
 312        if (((rdma_ndev != event_ndev &&
 313              !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
 314             is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev)
 315                                                 ==
 316             BONDING_SLAVE_STATE_INACTIVE)) {
 317                rcu_read_unlock();
 318                return;
 319        }
 320
 321        rcu_read_unlock();
 322
 323        gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
 324
 325        ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
 326                                     gid_type_mask,
 327                                     IB_CACHE_GID_DEFAULT_MODE_DELETE);
 328}
 329
 330static void enum_netdev_ipv4_ips(struct ib_device *ib_dev,
 331                                 u8 port, struct net_device *ndev)
 332{
 333        struct in_device *in_dev;
 334        struct sin_list {
 335                struct list_head        list;
 336                struct sockaddr_in      ip;
 337        };
 338        struct sin_list *sin_iter;
 339        struct sin_list *sin_temp;
 340
 341        LIST_HEAD(sin_list);
 342        if (ndev->reg_state >= NETREG_UNREGISTERING)
 343                return;
 344
 345        rcu_read_lock();
 346        in_dev = __in_dev_get_rcu(ndev);
 347        if (!in_dev) {
 348                rcu_read_unlock();
 349                return;
 350        }
 351
 352        for_ifa(in_dev) {
 353                struct sin_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
 354
 355                if (!entry)
 356                        continue;
 357
 358                entry->ip.sin_family = AF_INET;
 359                entry->ip.sin_addr.s_addr = ifa->ifa_address;
 360                list_add_tail(&entry->list, &sin_list);
 361        }
 362        endfor_ifa(in_dev);
 363        rcu_read_unlock();
 364
 365        list_for_each_entry_safe(sin_iter, sin_temp, &sin_list, list) {
 366                update_gid_ip(GID_ADD, ib_dev, port, ndev,
 367                              (struct sockaddr *)&sin_iter->ip);
 368                list_del(&sin_iter->list);
 369                kfree(sin_iter);
 370        }
 371}
 372
 373static void enum_netdev_ipv6_ips(struct ib_device *ib_dev,
 374                                 u8 port, struct net_device *ndev)
 375{
 376        struct inet6_ifaddr *ifp;
 377        struct inet6_dev *in6_dev;
 378        struct sin6_list {
 379                struct list_head        list;
 380                struct sockaddr_in6     sin6;
 381        };
 382        struct sin6_list *sin6_iter;
 383        struct sin6_list *sin6_temp;
 384        struct ib_gid_attr gid_attr = {.ndev = ndev};
 385        LIST_HEAD(sin6_list);
 386
 387        if (ndev->reg_state >= NETREG_UNREGISTERING)
 388                return;
 389
 390        in6_dev = in6_dev_get(ndev);
 391        if (!in6_dev)
 392                return;
 393
 394        read_lock_bh(&in6_dev->lock);
 395        list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
 396                struct sin6_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
 397
 398                if (!entry)
 399                        continue;
 400
 401                entry->sin6.sin6_family = AF_INET6;
 402                entry->sin6.sin6_addr = ifp->addr;
 403                list_add_tail(&entry->list, &sin6_list);
 404        }
 405        read_unlock_bh(&in6_dev->lock);
 406
 407        in6_dev_put(in6_dev);
 408
 409        list_for_each_entry_safe(sin6_iter, sin6_temp, &sin6_list, list) {
 410                union ib_gid    gid;
 411
 412                rdma_ip2gid((struct sockaddr *)&sin6_iter->sin6, &gid);
 413                update_gid(GID_ADD, ib_dev, port, &gid, &gid_attr);
 414                list_del(&sin6_iter->list);
 415                kfree(sin6_iter);
 416        }
 417}
 418
 419static void _add_netdev_ips(struct ib_device *ib_dev, u8 port,
 420                            struct net_device *ndev)
 421{
 422        enum_netdev_ipv4_ips(ib_dev, port, ndev);
 423        if (IS_ENABLED(CONFIG_IPV6))
 424                enum_netdev_ipv6_ips(ib_dev, port, ndev);
 425}
 426
 427static void add_netdev_ips(struct ib_device *ib_dev, u8 port,
 428                           struct net_device *rdma_ndev, void *cookie)
 429{
 430        _add_netdev_ips(ib_dev, port, cookie);
 431}
 432
 433static void del_netdev_ips(struct ib_device *ib_dev, u8 port,
 434                           struct net_device *rdma_ndev, void *cookie)
 435{
 436        ib_cache_gid_del_all_netdev_gids(ib_dev, port, cookie);
 437}
 438
 439/**
 440 * del_default_gids - Delete default GIDs of the event/cookie netdevice
 441 * @ib_dev:     RDMA device pointer
 442 * @port:       Port of the RDMA device whose GID table to consider
 443 * @rdma_ndev:  Unused rdma netdevice
 444 * @cookie:     Pointer to event netdevice
 445 *
 446 * del_default_gids() deletes the default GIDs of the event/cookie netdevice.
 447 */
 448static void del_default_gids(struct ib_device *ib_dev, u8 port,
 449                             struct net_device *rdma_ndev, void *cookie)
 450{
 451        struct net_device *cookie_ndev = cookie;
 452        unsigned long gid_type_mask;
 453
 454        gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
 455
 456        ib_cache_gid_set_default_gid(ib_dev, port, cookie_ndev, gid_type_mask,
 457                                     IB_CACHE_GID_DEFAULT_MODE_DELETE);
 458}
 459
 460static void add_default_gids(struct ib_device *ib_dev, u8 port,
 461                             struct net_device *rdma_ndev, void *cookie)
 462{
 463        struct net_device *event_ndev = cookie;
 464        unsigned long gid_type_mask;
 465
 466        gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
 467        ib_cache_gid_set_default_gid(ib_dev, port, event_ndev, gid_type_mask,
 468                                     IB_CACHE_GID_DEFAULT_MODE_SET);
 469}
 470
 471static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
 472                                    u8 port,
 473                                    struct net_device *rdma_ndev,
 474                                    void *cookie)
 475{
 476        struct net *net;
 477        struct net_device *ndev;
 478
 479        /* Lock the rtnl to make sure the netdevs does not move under
 480         * our feet
 481         */
 482        rtnl_lock();
 483        down_read(&net_rwsem);
 484        for_each_net(net)
 485                for_each_netdev(net, ndev) {
 486                        /*
 487                         * Filter and add default GIDs of the primary netdevice
 488                         * when not in bonding mode, or add default GIDs
 489                         * of bond master device, when in bonding mode.
 490                         */
 491                        if (is_ndev_for_default_gid_filter(ib_dev, port,
 492                                                           rdma_ndev, ndev))
 493                                add_default_gids(ib_dev, port, rdma_ndev, ndev);
 494
 495                        if (is_eth_port_of_netdev_filter(ib_dev, port,
 496                                                         rdma_ndev, ndev))
 497                                _add_netdev_ips(ib_dev, port, ndev);
 498                }
 499        up_read(&net_rwsem);
 500        rtnl_unlock();
 501}
 502
 503/**
 504 * rdma_roce_rescan_device - Rescan all of the network devices in the system
 505 * and add their gids, as needed, to the relevant RoCE devices.
 506 *
 507 * @device:         the rdma device
 508 */
 509void rdma_roce_rescan_device(struct ib_device *ib_dev)
 510{
 511        ib_enum_roce_netdev(ib_dev, pass_all_filter, NULL,
 512                            enum_all_gids_of_dev_cb, NULL);
 513}
 514EXPORT_SYMBOL(rdma_roce_rescan_device);
 515
 516static void callback_for_addr_gid_device_scan(struct ib_device *device,
 517                                              u8 port,
 518                                              struct net_device *rdma_ndev,
 519                                              void *cookie)
 520{
 521        struct update_gid_event_work *parsed = cookie;
 522
 523        return update_gid(parsed->gid_op, device,
 524                          port, &parsed->gid,
 525                          &parsed->gid_attr);
 526}
 527
 528struct upper_list {
 529        struct list_head list;
 530        struct net_device *upper;
 531};
 532
 533static int netdev_upper_walk(struct net_device *upper, void *data)
 534{
 535        struct upper_list *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
 536        struct list_head *upper_list = data;
 537
 538        if (!entry)
 539                return 0;
 540
 541        list_add_tail(&entry->list, upper_list);
 542        dev_hold(upper);
 543        entry->upper = upper;
 544
 545        return 0;
 546}
 547
 548static void handle_netdev_upper(struct ib_device *ib_dev, u8 port,
 549                                void *cookie,
 550                                void (*handle_netdev)(struct ib_device *ib_dev,
 551                                                      u8 port,
 552                                                      struct net_device *ndev))
 553{
 554        struct net_device *ndev = cookie;
 555        struct upper_list *upper_iter;
 556        struct upper_list *upper_temp;
 557        LIST_HEAD(upper_list);
 558
 559        rcu_read_lock();
 560        netdev_walk_all_upper_dev_rcu(ndev, netdev_upper_walk, &upper_list);
 561        rcu_read_unlock();
 562
 563        handle_netdev(ib_dev, port, ndev);
 564        list_for_each_entry_safe(upper_iter, upper_temp, &upper_list,
 565                                 list) {
 566                handle_netdev(ib_dev, port, upper_iter->upper);
 567                dev_put(upper_iter->upper);
 568                list_del(&upper_iter->list);
 569                kfree(upper_iter);
 570        }
 571}
 572
 573static void _roce_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
 574                                      struct net_device *event_ndev)
 575{
 576        ib_cache_gid_del_all_netdev_gids(ib_dev, port, event_ndev);
 577}
 578
 579static void del_netdev_upper_ips(struct ib_device *ib_dev, u8 port,
 580                                 struct net_device *rdma_ndev, void *cookie)
 581{
 582        handle_netdev_upper(ib_dev, port, cookie, _roce_del_all_netdev_gids);
 583}
 584
 585static void add_netdev_upper_ips(struct ib_device *ib_dev, u8 port,
 586                                 struct net_device *rdma_ndev, void *cookie)
 587{
 588        handle_netdev_upper(ib_dev, port, cookie, _add_netdev_ips);
 589}
 590
 591static void del_netdev_default_ips_join(struct ib_device *ib_dev, u8 port,
 592                                        struct net_device *rdma_ndev,
 593                                        void *cookie)
 594{
 595        struct net_device *master_ndev;
 596
 597        rcu_read_lock();
 598        master_ndev = netdev_master_upper_dev_get_rcu(rdma_ndev);
 599        if (master_ndev)
 600                dev_hold(master_ndev);
 601        rcu_read_unlock();
 602
 603        if (master_ndev) {
 604                bond_delete_netdev_default_gids(ib_dev, port, rdma_ndev,
 605                                                master_ndev);
 606                dev_put(master_ndev);
 607        }
 608}
 609
 610/* The following functions operate on all IB devices. netdevice_event and
 611 * addr_event execute ib_enum_all_roce_netdevs through a work.
 612 * ib_enum_all_roce_netdevs iterates through all IB devices.
 613 */
 614
 615static void netdevice_event_work_handler(struct work_struct *_work)
 616{
 617        struct netdev_event_work *work =
 618                container_of(_work, struct netdev_event_work, work);
 619        unsigned int i;
 620
 621        for (i = 0; i < ARRAY_SIZE(work->cmds) && work->cmds[i].cb; i++) {
 622                ib_enum_all_roce_netdevs(work->cmds[i].filter,
 623                                         work->cmds[i].filter_ndev,
 624                                         work->cmds[i].cb,
 625                                         work->cmds[i].ndev);
 626                dev_put(work->cmds[i].ndev);
 627                dev_put(work->cmds[i].filter_ndev);
 628        }
 629
 630        kfree(work);
 631}
 632
 633static int netdevice_queue_work(struct netdev_event_work_cmd *cmds,
 634                                struct net_device *ndev)
 635{
 636        unsigned int i;
 637        struct netdev_event_work *ndev_work =
 638                kmalloc(sizeof(*ndev_work), GFP_KERNEL);
 639
 640        if (!ndev_work)
 641                return NOTIFY_DONE;
 642
 643        memcpy(ndev_work->cmds, cmds, sizeof(ndev_work->cmds));
 644        for (i = 0; i < ARRAY_SIZE(ndev_work->cmds) && ndev_work->cmds[i].cb; i++) {
 645                if (!ndev_work->cmds[i].ndev)
 646                        ndev_work->cmds[i].ndev = ndev;
 647                if (!ndev_work->cmds[i].filter_ndev)
 648                        ndev_work->cmds[i].filter_ndev = ndev;
 649                dev_hold(ndev_work->cmds[i].ndev);
 650                dev_hold(ndev_work->cmds[i].filter_ndev);
 651        }
 652        INIT_WORK(&ndev_work->work, netdevice_event_work_handler);
 653
 654        queue_work(gid_cache_wq, &ndev_work->work);
 655
 656        return NOTIFY_DONE;
 657}
 658
 659static const struct netdev_event_work_cmd add_cmd = {
 660        .cb     = add_netdev_ips,
 661        .filter = is_eth_port_of_netdev_filter
 662};
 663
 664static const struct netdev_event_work_cmd add_cmd_upper_ips = {
 665        .cb     = add_netdev_upper_ips,
 666        .filter = is_eth_port_of_netdev_filter
 667};
 668
 669static void
 670ndev_event_unlink(struct netdev_notifier_changeupper_info *changeupper_info,
 671                  struct netdev_event_work_cmd *cmds)
 672{
 673        static const struct netdev_event_work_cmd
 674                        upper_ips_del_cmd = {
 675                                .cb     = del_netdev_upper_ips,
 676                                .filter = upper_device_filter
 677        };
 678
 679        cmds[0] = upper_ips_del_cmd;
 680        cmds[0].ndev = changeupper_info->upper_dev;
 681        cmds[1] = add_cmd;
 682}
 683
 684static const struct netdev_event_work_cmd bonding_default_add_cmd = {
 685        .cb     = add_default_gids,
 686        .filter = is_upper_ndev_bond_master_filter
 687};
 688
 689static void
 690ndev_event_link(struct net_device *event_ndev,
 691                struct netdev_notifier_changeupper_info *changeupper_info,
 692                struct netdev_event_work_cmd *cmds)
 693{
 694        static const struct netdev_event_work_cmd
 695                        bonding_default_del_cmd = {
 696                                .cb     = del_default_gids,
 697                                .filter = is_upper_ndev_bond_master_filter
 698                        };
 699        /*
 700         * When a lower netdev is linked to its upper bonding
 701         * netdev, delete lower slave netdev's default GIDs.
 702         */
 703        cmds[0] = bonding_default_del_cmd;
 704        cmds[0].ndev = event_ndev;
 705        cmds[0].filter_ndev = changeupper_info->upper_dev;
 706
 707        /* Now add bonding upper device default GIDs */
 708        cmds[1] = bonding_default_add_cmd;
 709        cmds[1].ndev = changeupper_info->upper_dev;
 710        cmds[1].filter_ndev = changeupper_info->upper_dev;
 711
 712        /* Now add bonding upper device IP based GIDs */
 713        cmds[2] = add_cmd_upper_ips;
 714        cmds[2].ndev = changeupper_info->upper_dev;
 715        cmds[2].filter_ndev = changeupper_info->upper_dev;
 716}
 717
 718static void netdevice_event_changeupper(struct net_device *event_ndev,
 719                struct netdev_notifier_changeupper_info *changeupper_info,
 720                struct netdev_event_work_cmd *cmds)
 721{
 722        if (changeupper_info->linking)
 723                ndev_event_link(event_ndev, changeupper_info, cmds);
 724        else
 725                ndev_event_unlink(changeupper_info, cmds);
 726}
 727
 728static const struct netdev_event_work_cmd add_default_gid_cmd = {
 729        .cb     = add_default_gids,
 730        .filter = is_ndev_for_default_gid_filter,
 731};
 732
 733static int netdevice_event(struct notifier_block *this, unsigned long event,
 734                           void *ptr)
 735{
 736        static const struct netdev_event_work_cmd del_cmd = {
 737                .cb = del_netdev_ips, .filter = pass_all_filter};
 738        static const struct netdev_event_work_cmd
 739                        bonding_default_del_cmd_join = {
 740                                .cb     = del_netdev_default_ips_join,
 741                                .filter = is_eth_port_inactive_slave_filter
 742                        };
 743        static const struct netdev_event_work_cmd
 744                        netdev_del_cmd = {
 745                                .cb     = del_netdev_ips,
 746                                .filter = is_eth_port_of_netdev_filter
 747                        };
 748        static const struct netdev_event_work_cmd bonding_event_ips_del_cmd = {
 749                .cb = del_netdev_upper_ips, .filter = upper_device_filter};
 750        struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
 751        struct netdev_event_work_cmd cmds[ROCE_NETDEV_CALLBACK_SZ] = { {NULL} };
 752
 753        if (ndev->type != ARPHRD_ETHER)
 754                return NOTIFY_DONE;
 755
 756        switch (event) {
 757        case NETDEV_REGISTER:
 758        case NETDEV_UP:
 759                cmds[0] = bonding_default_del_cmd_join;
 760                cmds[1] = add_default_gid_cmd;
 761                cmds[2] = add_cmd;
 762                break;
 763
 764        case NETDEV_UNREGISTER:
 765                if (ndev->reg_state < NETREG_UNREGISTERED)
 766                        cmds[0] = del_cmd;
 767                else
 768                        return NOTIFY_DONE;
 769                break;
 770
 771        case NETDEV_CHANGEADDR:
 772                cmds[0] = netdev_del_cmd;
 773                if (ndev->reg_state == NETREG_REGISTERED) {
 774                        cmds[1] = add_default_gid_cmd;
 775                        cmds[2] = add_cmd;
 776                }
 777                break;
 778
 779        case NETDEV_CHANGEUPPER:
 780                netdevice_event_changeupper(ndev,
 781                        container_of(ptr, struct netdev_notifier_changeupper_info, info),
 782                        cmds);
 783                break;
 784
 785        case NETDEV_BONDING_FAILOVER:
 786                cmds[0] = bonding_event_ips_del_cmd;
 787                /* Add default GIDs of the bond device */
 788                cmds[1] = bonding_default_add_cmd;
 789                /* Add IP based GIDs of the bond device */
 790                cmds[2] = add_cmd_upper_ips;
 791                break;
 792
 793        default:
 794                return NOTIFY_DONE;
 795        }
 796
 797        return netdevice_queue_work(cmds, ndev);
 798}
 799
 800static void update_gid_event_work_handler(struct work_struct *_work)
 801{
 802        struct update_gid_event_work *work =
 803                container_of(_work, struct update_gid_event_work, work);
 804
 805        ib_enum_all_roce_netdevs(is_eth_port_of_netdev_filter,
 806                                 work->gid_attr.ndev,
 807                                 callback_for_addr_gid_device_scan, work);
 808
 809        dev_put(work->gid_attr.ndev);
 810        kfree(work);
 811}
 812
 813static int addr_event(struct notifier_block *this, unsigned long event,
 814                      struct sockaddr *sa, struct net_device *ndev)
 815{
 816        struct update_gid_event_work *work;
 817        enum gid_op_type gid_op;
 818
 819        if (ndev->type != ARPHRD_ETHER)
 820                return NOTIFY_DONE;
 821
 822        switch (event) {
 823        case NETDEV_UP:
 824                gid_op = GID_ADD;
 825                break;
 826
 827        case NETDEV_DOWN:
 828                gid_op = GID_DEL;
 829                break;
 830
 831        default:
 832                return NOTIFY_DONE;
 833        }
 834
 835        work = kmalloc(sizeof(*work), GFP_ATOMIC);
 836        if (!work)
 837                return NOTIFY_DONE;
 838
 839        INIT_WORK(&work->work, update_gid_event_work_handler);
 840
 841        rdma_ip2gid(sa, &work->gid);
 842        work->gid_op = gid_op;
 843
 844        memset(&work->gid_attr, 0, sizeof(work->gid_attr));
 845        dev_hold(ndev);
 846        work->gid_attr.ndev   = ndev;
 847
 848        queue_work(gid_cache_wq, &work->work);
 849
 850        return NOTIFY_DONE;
 851}
 852
 853static int inetaddr_event(struct notifier_block *this, unsigned long event,
 854                          void *ptr)
 855{
 856        struct sockaddr_in      in;
 857        struct net_device       *ndev;
 858        struct in_ifaddr        *ifa = ptr;
 859
 860        in.sin_family = AF_INET;
 861        in.sin_addr.s_addr = ifa->ifa_address;
 862        ndev = ifa->ifa_dev->dev;
 863
 864        return addr_event(this, event, (struct sockaddr *)&in, ndev);
 865}
 866
 867static int inet6addr_event(struct notifier_block *this, unsigned long event,
 868                           void *ptr)
 869{
 870        struct sockaddr_in6     in6;
 871        struct net_device       *ndev;
 872        struct inet6_ifaddr     *ifa6 = ptr;
 873
 874        in6.sin6_family = AF_INET6;
 875        in6.sin6_addr = ifa6->addr;
 876        ndev = ifa6->idev->dev;
 877
 878        return addr_event(this, event, (struct sockaddr *)&in6, ndev);
 879}
 880
 881static struct notifier_block nb_netdevice = {
 882        .notifier_call = netdevice_event
 883};
 884
 885static struct notifier_block nb_inetaddr = {
 886        .notifier_call = inetaddr_event
 887};
 888
 889static struct notifier_block nb_inet6addr = {
 890        .notifier_call = inet6addr_event
 891};
 892
 893int __init roce_gid_mgmt_init(void)
 894{
 895        gid_cache_wq = alloc_ordered_workqueue("gid-cache-wq", 0);
 896        if (!gid_cache_wq)
 897                return -ENOMEM;
 898
 899        register_inetaddr_notifier(&nb_inetaddr);
 900        if (IS_ENABLED(CONFIG_IPV6))
 901                register_inet6addr_notifier(&nb_inet6addr);
 902        /* We relay on the netdevice notifier to enumerate all
 903         * existing devices in the system. Register to this notifier
 904         * last to make sure we will not miss any IP add/del
 905         * callbacks.
 906         */
 907        register_netdevice_notifier(&nb_netdevice);
 908
 909        return 0;
 910}
 911
 912void __exit roce_gid_mgmt_cleanup(void)
 913{
 914        if (IS_ENABLED(CONFIG_IPV6))
 915                unregister_inet6addr_notifier(&nb_inet6addr);
 916        unregister_inetaddr_notifier(&nb_inetaddr);
 917        unregister_netdevice_notifier(&nb_netdevice);
 918        /* Ensure all gid deletion tasks complete before we go down,
 919         * to avoid any reference to free'd memory. By the time
 920         * ib-core is removed, all physical devices have been removed,
 921         * so no issue with remaining hardware contexts.
 922         */
 923        destroy_workqueue(gid_cache_wq);
 924}
 925