linux/drivers/infiniband/core/roce_gid_mgmt.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2015, Mellanox Technologies inc.  All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include "core_priv.h"
  34
  35#include <linux/in.h>
  36#include <linux/in6.h>
  37
  38/* For in6_dev_get/in6_dev_put */
  39#include <net/addrconf.h>
  40#include <net/bonding.h>
  41
  42#include <rdma/ib_cache.h>
  43#include <rdma/ib_addr.h>
  44
  45static struct workqueue_struct *gid_cache_wq;
  46
  47enum gid_op_type {
  48        GID_DEL = 0,
  49        GID_ADD
  50};
  51
  52struct update_gid_event_work {
  53        struct work_struct work;
  54        union ib_gid       gid;
  55        struct ib_gid_attr gid_attr;
  56        enum gid_op_type gid_op;
  57};
  58
  59#define ROCE_NETDEV_CALLBACK_SZ         3
  60struct netdev_event_work_cmd {
  61        roce_netdev_callback    cb;
  62        roce_netdev_filter      filter;
  63        struct net_device       *ndev;
  64        struct net_device       *filter_ndev;
  65};
  66
  67struct netdev_event_work {
  68        struct work_struct              work;
  69        struct netdev_event_work_cmd    cmds[ROCE_NETDEV_CALLBACK_SZ];
  70};
  71
  72static const struct {
  73        bool (*is_supported)(const struct ib_device *device, u32 port_num);
  74        enum ib_gid_type gid_type;
  75} PORT_CAP_TO_GID_TYPE[] = {
  76        {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE},
  77        {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP},
  78};
  79
  80#define CAP_TO_GID_TABLE_SIZE   ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
  81
  82unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u32 port)
  83{
  84        int i;
  85        unsigned int ret_flags = 0;
  86
  87        if (!rdma_protocol_roce(ib_dev, port))
  88                return 1UL << IB_GID_TYPE_IB;
  89
  90        for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++)
  91                if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port))
  92                        ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type;
  93
  94        return ret_flags;
  95}
  96EXPORT_SYMBOL(roce_gid_type_mask_support);
  97
  98static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
  99                       u32 port, union ib_gid *gid,
 100                       struct ib_gid_attr *gid_attr)
 101{
 102        int i;
 103        unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
 104
 105        for (i = 0; i < IB_GID_TYPE_SIZE; i++) {
 106                if ((1UL << i) & gid_type_mask) {
 107                        gid_attr->gid_type = i;
 108                        switch (gid_op) {
 109                        case GID_ADD:
 110                                ib_cache_gid_add(ib_dev, port,
 111                                                 gid, gid_attr);
 112                                break;
 113                        case GID_DEL:
 114                                ib_cache_gid_del(ib_dev, port,
 115                                                 gid, gid_attr);
 116                                break;
 117                        }
 118                }
 119        }
 120}
 121
 122enum bonding_slave_state {
 123        BONDING_SLAVE_STATE_ACTIVE      = 1UL << 0,
 124        BONDING_SLAVE_STATE_INACTIVE    = 1UL << 1,
 125        /* No primary slave or the device isn't a slave in bonding */
 126        BONDING_SLAVE_STATE_NA          = 1UL << 2,
 127};
 128
 129static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_device *dev,
 130                                                                   struct net_device *upper)
 131{
 132        if (upper && netif_is_bond_master(upper)) {
 133                struct net_device *pdev =
 134                        bond_option_active_slave_get_rcu(netdev_priv(upper));
 135
 136                if (pdev)
 137                        return dev == pdev ? BONDING_SLAVE_STATE_ACTIVE :
 138                                BONDING_SLAVE_STATE_INACTIVE;
 139        }
 140
 141        return BONDING_SLAVE_STATE_NA;
 142}
 143
 144#define REQUIRED_BOND_STATES            (BONDING_SLAVE_STATE_ACTIVE |   \
 145                                         BONDING_SLAVE_STATE_NA)
 146static bool
 147is_eth_port_of_netdev_filter(struct ib_device *ib_dev, u32 port,
 148                             struct net_device *rdma_ndev, void *cookie)
 149{
 150        struct net_device *real_dev;
 151        bool res;
 152
 153        if (!rdma_ndev)
 154                return false;
 155
 156        rcu_read_lock();
 157        real_dev = rdma_vlan_dev_real_dev(cookie);
 158        if (!real_dev)
 159                real_dev = cookie;
 160
 161        res = ((rdma_is_upper_dev_rcu(rdma_ndev, cookie) &&
 162               (is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) &
 163                REQUIRED_BOND_STATES)) ||
 164               real_dev == rdma_ndev);
 165
 166        rcu_read_unlock();
 167        return res;
 168}
 169
 170static bool
 171is_eth_port_inactive_slave_filter(struct ib_device *ib_dev, u32 port,
 172                                  struct net_device *rdma_ndev, void *cookie)
 173{
 174        struct net_device *master_dev;
 175        bool res;
 176
 177        if (!rdma_ndev)
 178                return false;
 179
 180        rcu_read_lock();
 181        master_dev = netdev_master_upper_dev_get_rcu(rdma_ndev);
 182        res = is_eth_active_slave_of_bonding_rcu(rdma_ndev, master_dev) ==
 183                BONDING_SLAVE_STATE_INACTIVE;
 184        rcu_read_unlock();
 185
 186        return res;
 187}
 188
 189/** is_ndev_for_default_gid_filter - Check if a given netdevice
 190 * can be considered for default GIDs or not.
 191 * @ib_dev:             IB device to check
 192 * @port:               Port to consider for adding default GID
 193 * @rdma_ndev:          rdma netdevice pointer
 194 * @cookie_ndev:        Netdevice to consider to form a default GID
 195 *
 196 * is_ndev_for_default_gid_filter() returns true if a given netdevice can be
 197 * considered for deriving default RoCE GID, returns false otherwise.
 198 */
 199static bool
 200is_ndev_for_default_gid_filter(struct ib_device *ib_dev, u32 port,
 201                               struct net_device *rdma_ndev, void *cookie)
 202{
 203        struct net_device *cookie_ndev = cookie;
 204        bool res;
 205
 206        if (!rdma_ndev)
 207                return false;
 208
 209        rcu_read_lock();
 210
 211        /*
 212         * When rdma netdevice is used in bonding, bonding master netdevice
 213         * should be considered for default GIDs. Therefore, ignore slave rdma
 214         * netdevices when bonding is considered.
 215         * Additionally when event(cookie) netdevice is bond master device,
 216         * make sure that it the upper netdevice of rdma netdevice.
 217         */
 218        res = ((cookie_ndev == rdma_ndev && !netif_is_bond_slave(rdma_ndev)) ||
 219               (netif_is_bond_master(cookie_ndev) &&
 220                rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev)));
 221
 222        rcu_read_unlock();
 223        return res;
 224}
 225
 226static bool pass_all_filter(struct ib_device *ib_dev, u32 port,
 227                            struct net_device *rdma_ndev, void *cookie)
 228{
 229        return true;
 230}
 231
 232static bool upper_device_filter(struct ib_device *ib_dev, u32 port,
 233                                struct net_device *rdma_ndev, void *cookie)
 234{
 235        bool res;
 236
 237        if (!rdma_ndev)
 238                return false;
 239
 240        if (rdma_ndev == cookie)
 241                return true;
 242
 243        rcu_read_lock();
 244        res = rdma_is_upper_dev_rcu(rdma_ndev, cookie);
 245        rcu_read_unlock();
 246
 247        return res;
 248}
 249
 250/**
 251 * is_upper_ndev_bond_master_filter - Check if a given netdevice
 252 * is bond master device of netdevice of the the RDMA device of port.
 253 * @ib_dev:             IB device to check
 254 * @port:               Port to consider for adding default GID
 255 * @rdma_ndev:          Pointer to rdma netdevice
 256 * @cookie:             Netdevice to consider to form a default GID
 257 *
 258 * is_upper_ndev_bond_master_filter() returns true if a cookie_netdev
 259 * is bond master device and rdma_ndev is its lower netdevice. It might
 260 * not have been established as slave device yet.
 261 */
 262static bool
 263is_upper_ndev_bond_master_filter(struct ib_device *ib_dev, u32 port,
 264                                 struct net_device *rdma_ndev,
 265                                 void *cookie)
 266{
 267        struct net_device *cookie_ndev = cookie;
 268        bool match = false;
 269
 270        if (!rdma_ndev)
 271                return false;
 272
 273        rcu_read_lock();
 274        if (netif_is_bond_master(cookie_ndev) &&
 275            rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev))
 276                match = true;
 277        rcu_read_unlock();
 278        return match;
 279}
 280
 281static void update_gid_ip(enum gid_op_type gid_op,
 282                          struct ib_device *ib_dev,
 283                          u32 port, struct net_device *ndev,
 284                          struct sockaddr *addr)
 285{
 286        union ib_gid gid;
 287        struct ib_gid_attr gid_attr;
 288
 289        rdma_ip2gid(addr, &gid);
 290        memset(&gid_attr, 0, sizeof(gid_attr));
 291        gid_attr.ndev = ndev;
 292
 293        update_gid(gid_op, ib_dev, port, &gid, &gid_attr);
 294}
 295
 296static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
 297                                            u32 port,
 298                                            struct net_device *rdma_ndev,
 299                                            struct net_device *event_ndev)
 300{
 301        struct net_device *real_dev = rdma_vlan_dev_real_dev(event_ndev);
 302        unsigned long gid_type_mask;
 303
 304        if (!rdma_ndev)
 305                return;
 306
 307        if (!real_dev)
 308                real_dev = event_ndev;
 309
 310        rcu_read_lock();
 311
 312        if (((rdma_ndev != event_ndev &&
 313              !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
 314             is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev)
 315                                                 ==
 316             BONDING_SLAVE_STATE_INACTIVE)) {
 317                rcu_read_unlock();
 318                return;
 319        }
 320
 321        rcu_read_unlock();
 322
 323        gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
 324
 325        ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
 326                                     gid_type_mask,
 327                                     IB_CACHE_GID_DEFAULT_MODE_DELETE);
 328}
 329
 330static void enum_netdev_ipv4_ips(struct ib_device *ib_dev,
 331                                 u32 port, struct net_device *ndev)
 332{
 333        const struct in_ifaddr *ifa;
 334        struct in_device *in_dev;
 335        struct sin_list {
 336                struct list_head        list;
 337                struct sockaddr_in      ip;
 338        };
 339        struct sin_list *sin_iter;
 340        struct sin_list *sin_temp;
 341
 342        LIST_HEAD(sin_list);
 343        if (ndev->reg_state >= NETREG_UNREGISTERING)
 344                return;
 345
 346        rcu_read_lock();
 347        in_dev = __in_dev_get_rcu(ndev);
 348        if (!in_dev) {
 349                rcu_read_unlock();
 350                return;
 351        }
 352
 353        in_dev_for_each_ifa_rcu(ifa, in_dev) {
 354                struct sin_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
 355
 356                if (!entry)
 357                        continue;
 358
 359                entry->ip.sin_family = AF_INET;
 360                entry->ip.sin_addr.s_addr = ifa->ifa_address;
 361                list_add_tail(&entry->list, &sin_list);
 362        }
 363
 364        rcu_read_unlock();
 365
 366        list_for_each_entry_safe(sin_iter, sin_temp, &sin_list, list) {
 367                update_gid_ip(GID_ADD, ib_dev, port, ndev,
 368                              (struct sockaddr *)&sin_iter->ip);
 369                list_del(&sin_iter->list);
 370                kfree(sin_iter);
 371        }
 372}
 373
 374static void enum_netdev_ipv6_ips(struct ib_device *ib_dev,
 375                                 u32 port, struct net_device *ndev)
 376{
 377        struct inet6_ifaddr *ifp;
 378        struct inet6_dev *in6_dev;
 379        struct sin6_list {
 380                struct list_head        list;
 381                struct sockaddr_in6     sin6;
 382        };
 383        struct sin6_list *sin6_iter;
 384        struct sin6_list *sin6_temp;
 385        struct ib_gid_attr gid_attr = {.ndev = ndev};
 386        LIST_HEAD(sin6_list);
 387
 388        if (ndev->reg_state >= NETREG_UNREGISTERING)
 389                return;
 390
 391        in6_dev = in6_dev_get(ndev);
 392        if (!in6_dev)
 393                return;
 394
 395        read_lock_bh(&in6_dev->lock);
 396        list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
 397                struct sin6_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
 398
 399                if (!entry)
 400                        continue;
 401
 402                entry->sin6.sin6_family = AF_INET6;
 403                entry->sin6.sin6_addr = ifp->addr;
 404                list_add_tail(&entry->list, &sin6_list);
 405        }
 406        read_unlock_bh(&in6_dev->lock);
 407
 408        in6_dev_put(in6_dev);
 409
 410        list_for_each_entry_safe(sin6_iter, sin6_temp, &sin6_list, list) {
 411                union ib_gid    gid;
 412
 413                rdma_ip2gid((struct sockaddr *)&sin6_iter->sin6, &gid);
 414                update_gid(GID_ADD, ib_dev, port, &gid, &gid_attr);
 415                list_del(&sin6_iter->list);
 416                kfree(sin6_iter);
 417        }
 418}
 419
 420static void _add_netdev_ips(struct ib_device *ib_dev, u32 port,
 421                            struct net_device *ndev)
 422{
 423        enum_netdev_ipv4_ips(ib_dev, port, ndev);
 424        if (IS_ENABLED(CONFIG_IPV6))
 425                enum_netdev_ipv6_ips(ib_dev, port, ndev);
 426}
 427
 428static void add_netdev_ips(struct ib_device *ib_dev, u32 port,
 429                           struct net_device *rdma_ndev, void *cookie)
 430{
 431        _add_netdev_ips(ib_dev, port, cookie);
 432}
 433
 434static void del_netdev_ips(struct ib_device *ib_dev, u32 port,
 435                           struct net_device *rdma_ndev, void *cookie)
 436{
 437        ib_cache_gid_del_all_netdev_gids(ib_dev, port, cookie);
 438}
 439
 440/**
 441 * del_default_gids - Delete default GIDs of the event/cookie netdevice
 442 * @ib_dev:     RDMA device pointer
 443 * @port:       Port of the RDMA device whose GID table to consider
 444 * @rdma_ndev:  Unused rdma netdevice
 445 * @cookie:     Pointer to event netdevice
 446 *
 447 * del_default_gids() deletes the default GIDs of the event/cookie netdevice.
 448 */
 449static void del_default_gids(struct ib_device *ib_dev, u32 port,
 450                             struct net_device *rdma_ndev, void *cookie)
 451{
 452        struct net_device *cookie_ndev = cookie;
 453        unsigned long gid_type_mask;
 454
 455        gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
 456
 457        ib_cache_gid_set_default_gid(ib_dev, port, cookie_ndev, gid_type_mask,
 458                                     IB_CACHE_GID_DEFAULT_MODE_DELETE);
 459}
 460
 461static void add_default_gids(struct ib_device *ib_dev, u32 port,
 462                             struct net_device *rdma_ndev, void *cookie)
 463{
 464        struct net_device *event_ndev = cookie;
 465        unsigned long gid_type_mask;
 466
 467        gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
 468        ib_cache_gid_set_default_gid(ib_dev, port, event_ndev, gid_type_mask,
 469                                     IB_CACHE_GID_DEFAULT_MODE_SET);
 470}
 471
 472static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
 473                                    u32 port,
 474                                    struct net_device *rdma_ndev,
 475                                    void *cookie)
 476{
 477        struct net *net;
 478        struct net_device *ndev;
 479
 480        /* Lock the rtnl to make sure the netdevs does not move under
 481         * our feet
 482         */
 483        rtnl_lock();
 484        down_read(&net_rwsem);
 485        for_each_net(net)
 486                for_each_netdev(net, ndev) {
 487                        /*
 488                         * Filter and add default GIDs of the primary netdevice
 489                         * when not in bonding mode, or add default GIDs
 490                         * of bond master device, when in bonding mode.
 491                         */
 492                        if (is_ndev_for_default_gid_filter(ib_dev, port,
 493                                                           rdma_ndev, ndev))
 494                                add_default_gids(ib_dev, port, rdma_ndev, ndev);
 495
 496                        if (is_eth_port_of_netdev_filter(ib_dev, port,
 497                                                         rdma_ndev, ndev))
 498                                _add_netdev_ips(ib_dev, port, ndev);
 499                }
 500        up_read(&net_rwsem);
 501        rtnl_unlock();
 502}
 503
 504/**
 505 * rdma_roce_rescan_device - Rescan all of the network devices in the system
 506 * and add their gids, as needed, to the relevant RoCE devices.
 507 *
 508 * @ib_dev:         the rdma device
 509 */
 510void rdma_roce_rescan_device(struct ib_device *ib_dev)
 511{
 512        ib_enum_roce_netdev(ib_dev, pass_all_filter, NULL,
 513                            enum_all_gids_of_dev_cb, NULL);
 514}
 515EXPORT_SYMBOL(rdma_roce_rescan_device);
 516
 517static void callback_for_addr_gid_device_scan(struct ib_device *device,
 518                                              u32 port,
 519                                              struct net_device *rdma_ndev,
 520                                              void *cookie)
 521{
 522        struct update_gid_event_work *parsed = cookie;
 523
 524        return update_gid(parsed->gid_op, device,
 525                          port, &parsed->gid,
 526                          &parsed->gid_attr);
 527}
 528
 529struct upper_list {
 530        struct list_head list;
 531        struct net_device *upper;
 532};
 533
 534static int netdev_upper_walk(struct net_device *upper,
 535                             struct netdev_nested_priv *priv)
 536{
 537        struct upper_list *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
 538        struct list_head *upper_list = (struct list_head *)priv->data;
 539
 540        if (!entry)
 541                return 0;
 542
 543        list_add_tail(&entry->list, upper_list);
 544        dev_hold(upper);
 545        entry->upper = upper;
 546
 547        return 0;
 548}
 549
 550static void handle_netdev_upper(struct ib_device *ib_dev, u32 port,
 551                                void *cookie,
 552                                void (*handle_netdev)(struct ib_device *ib_dev,
 553                                                      u32 port,
 554                                                      struct net_device *ndev))
 555{
 556        struct net_device *ndev = cookie;
 557        struct netdev_nested_priv priv;
 558        struct upper_list *upper_iter;
 559        struct upper_list *upper_temp;
 560        LIST_HEAD(upper_list);
 561
 562        priv.data = &upper_list;
 563        rcu_read_lock();
 564        netdev_walk_all_upper_dev_rcu(ndev, netdev_upper_walk, &priv);
 565        rcu_read_unlock();
 566
 567        handle_netdev(ib_dev, port, ndev);
 568        list_for_each_entry_safe(upper_iter, upper_temp, &upper_list,
 569                                 list) {
 570                handle_netdev(ib_dev, port, upper_iter->upper);
 571                dev_put(upper_iter->upper);
 572                list_del(&upper_iter->list);
 573                kfree(upper_iter);
 574        }
 575}
 576
 577static void _roce_del_all_netdev_gids(struct ib_device *ib_dev, u32 port,
 578                                      struct net_device *event_ndev)
 579{
 580        ib_cache_gid_del_all_netdev_gids(ib_dev, port, event_ndev);
 581}
 582
 583static void del_netdev_upper_ips(struct ib_device *ib_dev, u32 port,
 584                                 struct net_device *rdma_ndev, void *cookie)
 585{
 586        handle_netdev_upper(ib_dev, port, cookie, _roce_del_all_netdev_gids);
 587}
 588
 589static void add_netdev_upper_ips(struct ib_device *ib_dev, u32 port,
 590                                 struct net_device *rdma_ndev, void *cookie)
 591{
 592        handle_netdev_upper(ib_dev, port, cookie, _add_netdev_ips);
 593}
 594
 595static void del_netdev_default_ips_join(struct ib_device *ib_dev, u32 port,
 596                                        struct net_device *rdma_ndev,
 597                                        void *cookie)
 598{
 599        struct net_device *master_ndev;
 600
 601        rcu_read_lock();
 602        master_ndev = netdev_master_upper_dev_get_rcu(rdma_ndev);
 603        if (master_ndev)
 604                dev_hold(master_ndev);
 605        rcu_read_unlock();
 606
 607        if (master_ndev) {
 608                bond_delete_netdev_default_gids(ib_dev, port, rdma_ndev,
 609                                                master_ndev);
 610                dev_put(master_ndev);
 611        }
 612}
 613
 614/* The following functions operate on all IB devices. netdevice_event and
 615 * addr_event execute ib_enum_all_roce_netdevs through a work.
 616 * ib_enum_all_roce_netdevs iterates through all IB devices.
 617 */
 618
 619static void netdevice_event_work_handler(struct work_struct *_work)
 620{
 621        struct netdev_event_work *work =
 622                container_of(_work, struct netdev_event_work, work);
 623        unsigned int i;
 624
 625        for (i = 0; i < ARRAY_SIZE(work->cmds) && work->cmds[i].cb; i++) {
 626                ib_enum_all_roce_netdevs(work->cmds[i].filter,
 627                                         work->cmds[i].filter_ndev,
 628                                         work->cmds[i].cb,
 629                                         work->cmds[i].ndev);
 630                dev_put(work->cmds[i].ndev);
 631                dev_put(work->cmds[i].filter_ndev);
 632        }
 633
 634        kfree(work);
 635}
 636
 637static int netdevice_queue_work(struct netdev_event_work_cmd *cmds,
 638                                struct net_device *ndev)
 639{
 640        unsigned int i;
 641        struct netdev_event_work *ndev_work =
 642                kmalloc(sizeof(*ndev_work), GFP_KERNEL);
 643
 644        if (!ndev_work)
 645                return NOTIFY_DONE;
 646
 647        memcpy(ndev_work->cmds, cmds, sizeof(ndev_work->cmds));
 648        for (i = 0; i < ARRAY_SIZE(ndev_work->cmds) && ndev_work->cmds[i].cb; i++) {
 649                if (!ndev_work->cmds[i].ndev)
 650                        ndev_work->cmds[i].ndev = ndev;
 651                if (!ndev_work->cmds[i].filter_ndev)
 652                        ndev_work->cmds[i].filter_ndev = ndev;
 653                dev_hold(ndev_work->cmds[i].ndev);
 654                dev_hold(ndev_work->cmds[i].filter_ndev);
 655        }
 656        INIT_WORK(&ndev_work->work, netdevice_event_work_handler);
 657
 658        queue_work(gid_cache_wq, &ndev_work->work);
 659
 660        return NOTIFY_DONE;
 661}
 662
 663static const struct netdev_event_work_cmd add_cmd = {
 664        .cb     = add_netdev_ips,
 665        .filter = is_eth_port_of_netdev_filter
 666};
 667
 668static const struct netdev_event_work_cmd add_cmd_upper_ips = {
 669        .cb     = add_netdev_upper_ips,
 670        .filter = is_eth_port_of_netdev_filter
 671};
 672
 673static void
 674ndev_event_unlink(struct netdev_notifier_changeupper_info *changeupper_info,
 675                  struct netdev_event_work_cmd *cmds)
 676{
 677        static const struct netdev_event_work_cmd
 678                        upper_ips_del_cmd = {
 679                                .cb     = del_netdev_upper_ips,
 680                                .filter = upper_device_filter
 681        };
 682
 683        cmds[0] = upper_ips_del_cmd;
 684        cmds[0].ndev = changeupper_info->upper_dev;
 685        cmds[1] = add_cmd;
 686}
 687
 688static const struct netdev_event_work_cmd bonding_default_add_cmd = {
 689        .cb     = add_default_gids,
 690        .filter = is_upper_ndev_bond_master_filter
 691};
 692
 693static void
 694ndev_event_link(struct net_device *event_ndev,
 695                struct netdev_notifier_changeupper_info *changeupper_info,
 696                struct netdev_event_work_cmd *cmds)
 697{
 698        static const struct netdev_event_work_cmd
 699                        bonding_default_del_cmd = {
 700                                .cb     = del_default_gids,
 701                                .filter = is_upper_ndev_bond_master_filter
 702                        };
 703        /*
 704         * When a lower netdev is linked to its upper bonding
 705         * netdev, delete lower slave netdev's default GIDs.
 706         */
 707        cmds[0] = bonding_default_del_cmd;
 708        cmds[0].ndev = event_ndev;
 709        cmds[0].filter_ndev = changeupper_info->upper_dev;
 710
 711        /* Now add bonding upper device default GIDs */
 712        cmds[1] = bonding_default_add_cmd;
 713        cmds[1].ndev = changeupper_info->upper_dev;
 714        cmds[1].filter_ndev = changeupper_info->upper_dev;
 715
 716        /* Now add bonding upper device IP based GIDs */
 717        cmds[2] = add_cmd_upper_ips;
 718        cmds[2].ndev = changeupper_info->upper_dev;
 719        cmds[2].filter_ndev = changeupper_info->upper_dev;
 720}
 721
 722static void netdevice_event_changeupper(struct net_device *event_ndev,
 723                struct netdev_notifier_changeupper_info *changeupper_info,
 724                struct netdev_event_work_cmd *cmds)
 725{
 726        if (changeupper_info->linking)
 727                ndev_event_link(event_ndev, changeupper_info, cmds);
 728        else
 729                ndev_event_unlink(changeupper_info, cmds);
 730}
 731
 732static const struct netdev_event_work_cmd add_default_gid_cmd = {
 733        .cb     = add_default_gids,
 734        .filter = is_ndev_for_default_gid_filter,
 735};
 736
 737static int netdevice_event(struct notifier_block *this, unsigned long event,
 738                           void *ptr)
 739{
 740        static const struct netdev_event_work_cmd del_cmd = {
 741                .cb = del_netdev_ips, .filter = pass_all_filter};
 742        static const struct netdev_event_work_cmd
 743                        bonding_default_del_cmd_join = {
 744                                .cb     = del_netdev_default_ips_join,
 745                                .filter = is_eth_port_inactive_slave_filter
 746                        };
 747        static const struct netdev_event_work_cmd
 748                        netdev_del_cmd = {
 749                                .cb     = del_netdev_ips,
 750                                .filter = is_eth_port_of_netdev_filter
 751                        };
 752        static const struct netdev_event_work_cmd bonding_event_ips_del_cmd = {
 753                .cb = del_netdev_upper_ips, .filter = upper_device_filter};
 754        struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
 755        struct netdev_event_work_cmd cmds[ROCE_NETDEV_CALLBACK_SZ] = { {NULL} };
 756
 757        if (ndev->type != ARPHRD_ETHER)
 758                return NOTIFY_DONE;
 759
 760        switch (event) {
 761        case NETDEV_REGISTER:
 762        case NETDEV_UP:
 763                cmds[0] = bonding_default_del_cmd_join;
 764                cmds[1] = add_default_gid_cmd;
 765                cmds[2] = add_cmd;
 766                break;
 767
 768        case NETDEV_UNREGISTER:
 769                if (ndev->reg_state < NETREG_UNREGISTERED)
 770                        cmds[0] = del_cmd;
 771                else
 772                        return NOTIFY_DONE;
 773                break;
 774
 775        case NETDEV_CHANGEADDR:
 776                cmds[0] = netdev_del_cmd;
 777                if (ndev->reg_state == NETREG_REGISTERED) {
 778                        cmds[1] = add_default_gid_cmd;
 779                        cmds[2] = add_cmd;
 780                }
 781                break;
 782
 783        case NETDEV_CHANGEUPPER:
 784                netdevice_event_changeupper(ndev,
 785                        container_of(ptr, struct netdev_notifier_changeupper_info, info),
 786                        cmds);
 787                break;
 788
 789        case NETDEV_BONDING_FAILOVER:
 790                cmds[0] = bonding_event_ips_del_cmd;
 791                /* Add default GIDs of the bond device */
 792                cmds[1] = bonding_default_add_cmd;
 793                /* Add IP based GIDs of the bond device */
 794                cmds[2] = add_cmd_upper_ips;
 795                break;
 796
 797        default:
 798                return NOTIFY_DONE;
 799        }
 800
 801        return netdevice_queue_work(cmds, ndev);
 802}
 803
 804static void update_gid_event_work_handler(struct work_struct *_work)
 805{
 806        struct update_gid_event_work *work =
 807                container_of(_work, struct update_gid_event_work, work);
 808
 809        ib_enum_all_roce_netdevs(is_eth_port_of_netdev_filter,
 810                                 work->gid_attr.ndev,
 811                                 callback_for_addr_gid_device_scan, work);
 812
 813        dev_put(work->gid_attr.ndev);
 814        kfree(work);
 815}
 816
 817static int addr_event(struct notifier_block *this, unsigned long event,
 818                      struct sockaddr *sa, struct net_device *ndev)
 819{
 820        struct update_gid_event_work *work;
 821        enum gid_op_type gid_op;
 822
 823        if (ndev->type != ARPHRD_ETHER)
 824                return NOTIFY_DONE;
 825
 826        switch (event) {
 827        case NETDEV_UP:
 828                gid_op = GID_ADD;
 829                break;
 830
 831        case NETDEV_DOWN:
 832                gid_op = GID_DEL;
 833                break;
 834
 835        default:
 836                return NOTIFY_DONE;
 837        }
 838
 839        work = kmalloc(sizeof(*work), GFP_ATOMIC);
 840        if (!work)
 841                return NOTIFY_DONE;
 842
 843        INIT_WORK(&work->work, update_gid_event_work_handler);
 844
 845        rdma_ip2gid(sa, &work->gid);
 846        work->gid_op = gid_op;
 847
 848        memset(&work->gid_attr, 0, sizeof(work->gid_attr));
 849        dev_hold(ndev);
 850        work->gid_attr.ndev   = ndev;
 851
 852        queue_work(gid_cache_wq, &work->work);
 853
 854        return NOTIFY_DONE;
 855}
 856
 857static int inetaddr_event(struct notifier_block *this, unsigned long event,
 858                          void *ptr)
 859{
 860        struct sockaddr_in      in;
 861        struct net_device       *ndev;
 862        struct in_ifaddr        *ifa = ptr;
 863
 864        in.sin_family = AF_INET;
 865        in.sin_addr.s_addr = ifa->ifa_address;
 866        ndev = ifa->ifa_dev->dev;
 867
 868        return addr_event(this, event, (struct sockaddr *)&in, ndev);
 869}
 870
 871static int inet6addr_event(struct notifier_block *this, unsigned long event,
 872                           void *ptr)
 873{
 874        struct sockaddr_in6     in6;
 875        struct net_device       *ndev;
 876        struct inet6_ifaddr     *ifa6 = ptr;
 877
 878        in6.sin6_family = AF_INET6;
 879        in6.sin6_addr = ifa6->addr;
 880        ndev = ifa6->idev->dev;
 881
 882        return addr_event(this, event, (struct sockaddr *)&in6, ndev);
 883}
 884
 885static struct notifier_block nb_netdevice = {
 886        .notifier_call = netdevice_event
 887};
 888
 889static struct notifier_block nb_inetaddr = {
 890        .notifier_call = inetaddr_event
 891};
 892
 893static struct notifier_block nb_inet6addr = {
 894        .notifier_call = inet6addr_event
 895};
 896
 897int __init roce_gid_mgmt_init(void)
 898{
 899        gid_cache_wq = alloc_ordered_workqueue("gid-cache-wq", 0);
 900        if (!gid_cache_wq)
 901                return -ENOMEM;
 902
 903        register_inetaddr_notifier(&nb_inetaddr);
 904        if (IS_ENABLED(CONFIG_IPV6))
 905                register_inet6addr_notifier(&nb_inet6addr);
 906        /* We relay on the netdevice notifier to enumerate all
 907         * existing devices in the system. Register to this notifier
 908         * last to make sure we will not miss any IP add/del
 909         * callbacks.
 910         */
 911        register_netdevice_notifier(&nb_netdevice);
 912
 913        return 0;
 914}
 915
 916void __exit roce_gid_mgmt_cleanup(void)
 917{
 918        if (IS_ENABLED(CONFIG_IPV6))
 919                unregister_inet6addr_notifier(&nb_inet6addr);
 920        unregister_inetaddr_notifier(&nb_inetaddr);
 921        unregister_netdevice_notifier(&nb_netdevice);
 922        /* Ensure all gid deletion tasks complete before we go down,
 923         * to avoid any reference to free'd memory. By the time
 924         * ib-core is removed, all physical devices have been removed,
 925         * so no issue with remaining hardware contexts.
 926         */
 927        destroy_workqueue(gid_cache_wq);
 928}
 929