linux/drivers/infiniband/core/cache.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
   3 * Copyright (c) 2005 Intel Corporation. All rights reserved.
   4 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
   5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
   6 *
   7 * This software is available to you under a choice of one of two
   8 * licenses.  You may choose to be licensed under the terms of the GNU
   9 * General Public License (GPL) Version 2, available from the file
  10 * COPYING in the main directory of this source tree, or the
  11 * OpenIB.org BSD license below:
  12 *
  13 *     Redistribution and use in source and binary forms, with or
  14 *     without modification, are permitted provided that the following
  15 *     conditions are met:
  16 *
  17 *      - Redistributions of source code must retain the above
  18 *        copyright notice, this list of conditions and the following
  19 *        disclaimer.
  20 *
  21 *      - Redistributions in binary form must reproduce the above
  22 *        copyright notice, this list of conditions and the following
  23 *        disclaimer in the documentation and/or other materials
  24 *        provided with the distribution.
  25 *
  26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33 * SOFTWARE.
  34 */
  35
  36#include <linux/module.h>
  37#include <linux/errno.h>
  38#include <linux/slab.h>
  39#include <linux/workqueue.h>
  40#include <linux/netdevice.h>
  41#include <net/addrconf.h>
  42
  43#include <rdma/ib_cache.h>
  44
  45#include "core_priv.h"
  46
  47struct ib_pkey_cache {
  48        int             table_len;
  49        u16             table[0];
  50};
  51
  52struct ib_update_work {
  53        struct work_struct work;
  54        struct ib_device  *device;
  55        u8                 port_num;
  56};
  57
  58union ib_gid zgid;
  59EXPORT_SYMBOL(zgid);
  60
  61static const struct ib_gid_attr zattr;
  62
  63enum gid_attr_find_mask {
  64        GID_ATTR_FIND_MASK_GID          = 1UL << 0,
  65        GID_ATTR_FIND_MASK_NETDEV       = 1UL << 1,
  66        GID_ATTR_FIND_MASK_DEFAULT      = 1UL << 2,
  67};
  68
  69enum gid_table_entry_props {
  70        GID_TABLE_ENTRY_INVALID         = 1UL << 0,
  71        GID_TABLE_ENTRY_DEFAULT         = 1UL << 1,
  72};
  73
  74enum gid_table_write_action {
  75        GID_TABLE_WRITE_ACTION_ADD,
  76        GID_TABLE_WRITE_ACTION_DEL,
  77        /* MODIFY only updates the GID table. Currently only used by
  78         * ib_cache_update.
  79         */
  80        GID_TABLE_WRITE_ACTION_MODIFY
  81};
  82
  83struct ib_gid_table_entry {
  84        /* This lock protects an entry from being
  85         * read and written simultaneously.
  86         */
  87        rwlock_t            lock;
  88        unsigned long       props;
  89        union ib_gid        gid;
  90        struct ib_gid_attr  attr;
  91        void               *context;
  92};
  93
  94struct ib_gid_table {
  95        int                  sz;
  96        /* In RoCE, adding a GID to the table requires:
  97         * (a) Find if this GID is already exists.
  98         * (b) Find a free space.
  99         * (c) Write the new GID
 100         *
 101         * Delete requires different set of operations:
 102         * (a) Find the GID
 103         * (b) Delete it.
 104         *
 105         * Add/delete should be carried out atomically.
 106         * This is done by locking this mutex from multiple
 107         * writers. We don't need this lock for IB, as the MAD
 108         * layer replaces all entries. All data_vec entries
 109         * are locked by this lock.
 110         **/
 111        struct mutex         lock;
 112        struct ib_gid_table_entry *data_vec;
 113};
 114
 115static int write_gid(struct ib_device *ib_dev, u8 port,
 116                     struct ib_gid_table *table, int ix,
 117                     const union ib_gid *gid,
 118                     const struct ib_gid_attr *attr,
 119                     enum gid_table_write_action action,
 120                     bool  default_gid)
 121{
 122        int ret = 0;
 123        struct net_device *old_net_dev;
 124        unsigned long flags;
 125
 126        /* in rdma_cap_roce_gid_table, this funciton should be protected by a
 127         * sleep-able lock.
 128         */
 129        write_lock_irqsave(&table->data_vec[ix].lock, flags);
 130
 131        if (rdma_cap_roce_gid_table(ib_dev, port)) {
 132                table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
 133                write_unlock_irqrestore(&table->data_vec[ix].lock, flags);
 134                /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by
 135                 * RoCE providers and thus only updates the cache.
 136                 */
 137                if (action == GID_TABLE_WRITE_ACTION_ADD)
 138                        ret = ib_dev->add_gid(ib_dev, port, ix, gid, attr,
 139                                              &table->data_vec[ix].context);
 140                else if (action == GID_TABLE_WRITE_ACTION_DEL)
 141                        ret = ib_dev->del_gid(ib_dev, port, ix,
 142                                              &table->data_vec[ix].context);
 143                write_lock_irqsave(&table->data_vec[ix].lock, flags);
 144        }
 145
 146        old_net_dev = table->data_vec[ix].attr.ndev;
 147        if (old_net_dev && old_net_dev != attr->ndev)
 148                dev_put(old_net_dev);
 149        /* if modify_gid failed, just delete the old gid */
 150        if (ret || action == GID_TABLE_WRITE_ACTION_DEL) {
 151                gid = &zgid;
 152                attr = &zattr;
 153                table->data_vec[ix].context = NULL;
 154        }
 155        if (default_gid)
 156                table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT;
 157        memcpy(&table->data_vec[ix].gid, gid, sizeof(*gid));
 158        memcpy(&table->data_vec[ix].attr, attr, sizeof(*attr));
 159        if (table->data_vec[ix].attr.ndev &&
 160            table->data_vec[ix].attr.ndev != old_net_dev)
 161                dev_hold(table->data_vec[ix].attr.ndev);
 162
 163        table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID;
 164
 165        write_unlock_irqrestore(&table->data_vec[ix].lock, flags);
 166
 167        if (!ret && rdma_cap_roce_gid_table(ib_dev, port)) {
 168                struct ib_event event;
 169
 170                event.device            = ib_dev;
 171                event.element.port_num  = port;
 172                event.event             = IB_EVENT_GID_CHANGE;
 173
 174                ib_dispatch_event(&event);
 175        }
 176        return ret;
 177}
 178
 179static int add_gid(struct ib_device *ib_dev, u8 port,
 180                   struct ib_gid_table *table, int ix,
 181                   const union ib_gid *gid,
 182                   const struct ib_gid_attr *attr,
 183                   bool  default_gid) {
 184        return write_gid(ib_dev, port, table, ix, gid, attr,
 185                         GID_TABLE_WRITE_ACTION_ADD, default_gid);
 186}
 187
 188static int modify_gid(struct ib_device *ib_dev, u8 port,
 189                      struct ib_gid_table *table, int ix,
 190                      const union ib_gid *gid,
 191                      const struct ib_gid_attr *attr,
 192                      bool  default_gid) {
 193        return write_gid(ib_dev, port, table, ix, gid, attr,
 194                         GID_TABLE_WRITE_ACTION_MODIFY, default_gid);
 195}
 196
 197static int del_gid(struct ib_device *ib_dev, u8 port,
 198                   struct ib_gid_table *table, int ix,
 199                   bool  default_gid) {
 200        return write_gid(ib_dev, port, table, ix, &zgid, &zattr,
 201                         GID_TABLE_WRITE_ACTION_DEL, default_gid);
 202}
 203
 204static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
 205                    const struct ib_gid_attr *val, bool default_gid,
 206                    unsigned long mask)
 207{
 208        int i;
 209
 210        for (i = 0; i < table->sz; i++) {
 211                unsigned long flags;
 212                struct ib_gid_attr *attr = &table->data_vec[i].attr;
 213
 214                read_lock_irqsave(&table->data_vec[i].lock, flags);
 215
 216                if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
 217                        goto next;
 218
 219                if (mask & GID_ATTR_FIND_MASK_GID &&
 220                    memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
 221                        goto next;
 222
 223                if (mask & GID_ATTR_FIND_MASK_NETDEV &&
 224                    attr->ndev != val->ndev)
 225                        goto next;
 226
 227                if (mask & GID_ATTR_FIND_MASK_DEFAULT &&
 228                    !!(table->data_vec[i].props & GID_TABLE_ENTRY_DEFAULT) !=
 229                    default_gid)
 230                        goto next;
 231
 232                read_unlock_irqrestore(&table->data_vec[i].lock, flags);
 233                return i;
 234next:
 235                read_unlock_irqrestore(&table->data_vec[i].lock, flags);
 236        }
 237
 238        return -1;
 239}
 240
 241static void make_default_gid(struct  net_device *dev, union ib_gid *gid)
 242{
 243        gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
 244        addrconf_ifid_eui48(&gid->raw[8], dev);
 245}
 246
 247int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
 248                     union ib_gid *gid, struct ib_gid_attr *attr)
 249{
 250        struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
 251        struct ib_gid_table *table;
 252        int ix;
 253        int ret = 0;
 254        struct net_device *idev;
 255
 256        table = ports_table[port - rdma_start_port(ib_dev)];
 257
 258        if (!memcmp(gid, &zgid, sizeof(*gid)))
 259                return -EINVAL;
 260
 261        if (ib_dev->get_netdev) {
 262                idev = ib_dev->get_netdev(ib_dev, port);
 263                if (idev && attr->ndev != idev) {
 264                        union ib_gid default_gid;
 265
 266                        /* Adding default GIDs in not permitted */
 267                        make_default_gid(idev, &default_gid);
 268                        if (!memcmp(gid, &default_gid, sizeof(*gid))) {
 269                                dev_put(idev);
 270                                return -EPERM;
 271                        }
 272                }
 273                if (idev)
 274                        dev_put(idev);
 275        }
 276
 277        mutex_lock(&table->lock);
 278
 279        ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID |
 280                      GID_ATTR_FIND_MASK_NETDEV);
 281        if (ix >= 0)
 282                goto out_unlock;
 283
 284        ix = find_gid(table, &zgid, NULL, false, GID_ATTR_FIND_MASK_GID |
 285                      GID_ATTR_FIND_MASK_DEFAULT);
 286        if (ix < 0) {
 287                ret = -ENOSPC;
 288                goto out_unlock;
 289        }
 290
 291        add_gid(ib_dev, port, table, ix, gid, attr, false);
 292
 293out_unlock:
 294        mutex_unlock(&table->lock);
 295        return ret;
 296}
 297
 298int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
 299                     union ib_gid *gid, struct ib_gid_attr *attr)
 300{
 301        struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
 302        struct ib_gid_table *table;
 303        int ix;
 304
 305        table = ports_table[port - rdma_start_port(ib_dev)];
 306
 307        mutex_lock(&table->lock);
 308
 309        ix = find_gid(table, gid, attr, false,
 310                      GID_ATTR_FIND_MASK_GID      |
 311                      GID_ATTR_FIND_MASK_NETDEV   |
 312                      GID_ATTR_FIND_MASK_DEFAULT);
 313        if (ix < 0)
 314                goto out_unlock;
 315
 316        del_gid(ib_dev, port, table, ix, false);
 317
 318out_unlock:
 319        mutex_unlock(&table->lock);
 320        return 0;
 321}
 322
 323int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
 324                                     struct net_device *ndev)
 325{
 326        struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
 327        struct ib_gid_table *table;
 328        int ix;
 329
 330        table  = ports_table[port - rdma_start_port(ib_dev)];
 331
 332        mutex_lock(&table->lock);
 333
 334        for (ix = 0; ix < table->sz; ix++)
 335                if (table->data_vec[ix].attr.ndev == ndev)
 336                        del_gid(ib_dev, port, table, ix, false);
 337
 338        mutex_unlock(&table->lock);
 339        return 0;
 340}
 341
 342static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
 343                              union ib_gid *gid, struct ib_gid_attr *attr)
 344{
 345        struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
 346        struct ib_gid_table *table;
 347        unsigned long flags;
 348
 349        table = ports_table[port - rdma_start_port(ib_dev)];
 350
 351        if (index < 0 || index >= table->sz)
 352                return -EINVAL;
 353
 354        read_lock_irqsave(&table->data_vec[index].lock, flags);
 355        if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID) {
 356                read_unlock_irqrestore(&table->data_vec[index].lock, flags);
 357                return -EAGAIN;
 358        }
 359
 360        memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
 361        if (attr) {
 362                memcpy(attr, &table->data_vec[index].attr, sizeof(*attr));
 363                if (attr->ndev)
 364                        dev_hold(attr->ndev);
 365        }
 366
 367        read_unlock_irqrestore(&table->data_vec[index].lock, flags);
 368        return 0;
 369}
 370
 371static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
 372                                    const union ib_gid *gid,
 373                                    const struct ib_gid_attr *val,
 374                                    unsigned long mask,
 375                                    u8 *port, u16 *index)
 376{
 377        struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
 378        struct ib_gid_table *table;
 379        u8 p;
 380        int local_index;
 381
 382        for (p = 0; p < ib_dev->phys_port_cnt; p++) {
 383                table = ports_table[p];
 384                local_index = find_gid(table, gid, val, false, mask);
 385                if (local_index >= 0) {
 386                        if (index)
 387                                *index = local_index;
 388                        if (port)
 389                                *port = p + rdma_start_port(ib_dev);
 390                        return 0;
 391                }
 392        }
 393
 394        return -ENOENT;
 395}
 396
 397static int ib_cache_gid_find(struct ib_device *ib_dev,
 398                             const union ib_gid *gid,
 399                             struct net_device *ndev, u8 *port,
 400                             u16 *index)
 401{
 402        unsigned long mask = GID_ATTR_FIND_MASK_GID;
 403        struct ib_gid_attr gid_attr_val = {.ndev = ndev};
 404
 405        if (ndev)
 406                mask |= GID_ATTR_FIND_MASK_NETDEV;
 407
 408        return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val,
 409                                        mask, port, index);
 410}
 411
 412int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
 413                               const union ib_gid *gid,
 414                               u8 port, struct net_device *ndev,
 415                               u16 *index)
 416{
 417        int local_index;
 418        struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
 419        struct ib_gid_table *table;
 420        unsigned long mask = GID_ATTR_FIND_MASK_GID;
 421        struct ib_gid_attr val = {.ndev = ndev};
 422
 423        if (port < rdma_start_port(ib_dev) ||
 424            port > rdma_end_port(ib_dev))
 425                return -ENOENT;
 426
 427        table = ports_table[port - rdma_start_port(ib_dev)];
 428
 429        if (ndev)
 430                mask |= GID_ATTR_FIND_MASK_NETDEV;
 431
 432        local_index = find_gid(table, gid, &val, false, mask);
 433        if (local_index >= 0) {
 434                if (index)
 435                        *index = local_index;
 436                return 0;
 437        }
 438
 439        return -ENOENT;
 440}
 441EXPORT_SYMBOL(ib_find_cached_gid_by_port);
 442
 443/**
 444 * ib_find_gid_by_filter - Returns the GID table index where a specified
 445 * GID value occurs
 446 * @device: The device to query.
 447 * @gid: The GID value to search for.
 448 * @port_num: The port number of the device where the GID value could be
 449 *   searched.
 450 * @filter: The filter function is executed on any matching GID in the table.
 451 *   If the filter function returns true, the corresponding index is returned,
 452 *   otherwise, we continue searching the GID table. It's guaranteed that
 453 *   while filter is executed, ndev field is valid and the structure won't
 454 *   change. filter is executed in an atomic context. filter must not be NULL.
 455 * @index: The index into the cached GID table where the GID was found.  This
 456 *   parameter may be NULL.
 457 *
 458 * ib_cache_gid_find_by_filter() searches for the specified GID value
 459 * of which the filter function returns true in the port's GID table.
 460 * This function is only supported on RoCE ports.
 461 *
 462 */
 463static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
 464                                       const union ib_gid *gid,
 465                                       u8 port,
 466                                       bool (*filter)(const union ib_gid *,
 467                                                      const struct ib_gid_attr *,
 468                                                      void *),
 469                                       void *context,
 470                                       u16 *index)
 471{
 472        struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
 473        struct ib_gid_table *table;
 474        unsigned int i;
 475        bool found = false;
 476
 477        if (!ports_table)
 478                return -EOPNOTSUPP;
 479
 480        if (port < rdma_start_port(ib_dev) ||
 481            port > rdma_end_port(ib_dev) ||
 482            !rdma_protocol_roce(ib_dev, port))
 483                return -EPROTONOSUPPORT;
 484
 485        table = ports_table[port - rdma_start_port(ib_dev)];
 486
 487        for (i = 0; i < table->sz; i++) {
 488                struct ib_gid_attr attr;
 489                unsigned long flags;
 490
 491                read_lock_irqsave(&table->data_vec[i].lock, flags);
 492                if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
 493                        goto next;
 494
 495                if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
 496                        goto next;
 497
 498                memcpy(&attr, &table->data_vec[i].attr, sizeof(attr));
 499
 500                if (filter(gid, &attr, context))
 501                        found = true;
 502
 503next:
 504                read_unlock_irqrestore(&table->data_vec[i].lock, flags);
 505
 506                if (found)
 507                        break;
 508        }
 509
 510        if (!found)
 511                return -ENOENT;
 512
 513        if (index)
 514                *index = i;
 515        return 0;
 516}
 517
 518static struct ib_gid_table *alloc_gid_table(int sz)
 519{
 520        unsigned int i;
 521        struct ib_gid_table *table =
 522                kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
 523        if (!table)
 524                return NULL;
 525
 526        table->data_vec = kcalloc(sz, sizeof(*table->data_vec), GFP_KERNEL);
 527        if (!table->data_vec)
 528                goto err_free_table;
 529
 530        mutex_init(&table->lock);
 531
 532        table->sz = sz;
 533
 534        for (i = 0; i < sz; i++)
 535                rwlock_init(&table->data_vec[i].lock);
 536
 537        return table;
 538
 539err_free_table:
 540        kfree(table);
 541        return NULL;
 542}
 543
 544static void release_gid_table(struct ib_gid_table *table)
 545{
 546        if (table) {
 547                kfree(table->data_vec);
 548                kfree(table);
 549        }
 550}
 551
 552static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
 553                                   struct ib_gid_table *table)
 554{
 555        int i;
 556
 557        if (!table)
 558                return;
 559
 560        for (i = 0; i < table->sz; ++i) {
 561                if (memcmp(&table->data_vec[i].gid, &zgid,
 562                           sizeof(table->data_vec[i].gid)))
 563                        del_gid(ib_dev, port, table, i,
 564                                table->data_vec[i].props &
 565                                GID_ATTR_FIND_MASK_DEFAULT);
 566        }
 567}
 568
 569void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
 570                                  struct net_device *ndev,
 571                                  enum ib_cache_gid_default_mode mode)
 572{
 573        struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
 574        union ib_gid gid;
 575        struct ib_gid_attr gid_attr;
 576        struct ib_gid_table *table;
 577        int ix;
 578        union ib_gid current_gid;
 579        struct ib_gid_attr current_gid_attr = {};
 580
 581        table  = ports_table[port - rdma_start_port(ib_dev)];
 582
 583        make_default_gid(ndev, &gid);
 584        memset(&gid_attr, 0, sizeof(gid_attr));
 585        gid_attr.ndev = ndev;
 586
 587        mutex_lock(&table->lock);
 588        ix = find_gid(table, NULL, NULL, true, GID_ATTR_FIND_MASK_DEFAULT);
 589
 590        /* Coudn't find default GID location */
 591        WARN_ON(ix < 0);
 592
 593        if (!__ib_cache_gid_get(ib_dev, port, ix,
 594                                &current_gid, &current_gid_attr) &&
 595            mode == IB_CACHE_GID_DEFAULT_MODE_SET &&
 596            !memcmp(&gid, &current_gid, sizeof(gid)) &&
 597            !memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr)))
 598                goto unlock;
 599
 600        if ((memcmp(&current_gid, &zgid, sizeof(current_gid)) ||
 601             memcmp(&current_gid_attr, &zattr,
 602                    sizeof(current_gid_attr))) &&
 603            del_gid(ib_dev, port, table, ix, true)) {
 604                pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n",
 605                        ix, gid.raw);
 606                goto unlock;
 607        }
 608
 609        if (mode == IB_CACHE_GID_DEFAULT_MODE_SET)
 610                if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true))
 611                        pr_warn("ib_cache_gid: unable to add default gid %pI6\n",
 612                                gid.raw);
 613
 614unlock:
 615        if (current_gid_attr.ndev)
 616                dev_put(current_gid_attr.ndev);
 617        mutex_unlock(&table->lock);
 618}
 619
 620static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
 621                                     struct ib_gid_table *table)
 622{
 623        if (rdma_protocol_roce(ib_dev, port)) {
 624                struct ib_gid_table_entry *entry = &table->data_vec[0];
 625
 626                entry->props |= GID_TABLE_ENTRY_DEFAULT;
 627        }
 628
 629        return 0;
 630}
 631
 632static int _gid_table_setup_one(struct ib_device *ib_dev)
 633{
 634        u8 port;
 635        struct ib_gid_table **table;
 636        int err = 0;
 637
 638        table = kcalloc(ib_dev->phys_port_cnt, sizeof(*table), GFP_KERNEL);
 639
 640        if (!table) {
 641                pr_warn("failed to allocate ib gid cache for %s\n",
 642                        ib_dev->name);
 643                return -ENOMEM;
 644        }
 645
 646        for (port = 0; port < ib_dev->phys_port_cnt; port++) {
 647                u8 rdma_port = port + rdma_start_port(ib_dev);
 648
 649                table[port] =
 650                        alloc_gid_table(
 651                                ib_dev->port_immutable[rdma_port].gid_tbl_len);
 652                if (!table[port]) {
 653                        err = -ENOMEM;
 654                        goto rollback_table_setup;
 655                }
 656
 657                err = gid_table_reserve_default(ib_dev,
 658                                                port + rdma_start_port(ib_dev),
 659                                                table[port]);
 660                if (err)
 661                        goto rollback_table_setup;
 662        }
 663
 664        ib_dev->cache.gid_cache = table;
 665        return 0;
 666
 667rollback_table_setup:
 668        for (port = 0; port < ib_dev->phys_port_cnt; port++) {
 669                cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
 670                                       table[port]);
 671                release_gid_table(table[port]);
 672        }
 673
 674        kfree(table);
 675        return err;
 676}
 677
 678static void gid_table_release_one(struct ib_device *ib_dev)
 679{
 680        struct ib_gid_table **table = ib_dev->cache.gid_cache;
 681        u8 port;
 682
 683        if (!table)
 684                return;
 685
 686        for (port = 0; port < ib_dev->phys_port_cnt; port++)
 687                release_gid_table(table[port]);
 688
 689        kfree(table);
 690        ib_dev->cache.gid_cache = NULL;
 691}
 692
 693static void gid_table_cleanup_one(struct ib_device *ib_dev)
 694{
 695        struct ib_gid_table **table = ib_dev->cache.gid_cache;
 696        u8 port;
 697
 698        if (!table)
 699                return;
 700
 701        for (port = 0; port < ib_dev->phys_port_cnt; port++)
 702                cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
 703                                       table[port]);
 704}
 705
 706static int gid_table_setup_one(struct ib_device *ib_dev)
 707{
 708        int err;
 709
 710        err = _gid_table_setup_one(ib_dev);
 711
 712        if (err)
 713                return err;
 714
 715        err = roce_rescan_device(ib_dev);
 716
 717        if (err) {
 718                gid_table_cleanup_one(ib_dev);
 719                gid_table_release_one(ib_dev);
 720        }
 721
 722        return err;
 723}
 724
 725int ib_get_cached_gid(struct ib_device *device,
 726                      u8                port_num,
 727                      int               index,
 728                      union ib_gid     *gid,
 729                      struct ib_gid_attr *gid_attr)
 730{
 731        if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
 732                return -EINVAL;
 733
 734        return __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
 735}
 736EXPORT_SYMBOL(ib_get_cached_gid);
 737
 738int ib_find_cached_gid(struct ib_device *device,
 739                       const union ib_gid *gid,
 740                       struct net_device *ndev,
 741                       u8               *port_num,
 742                       u16              *index)
 743{
 744        return ib_cache_gid_find(device, gid, ndev, port_num, index);
 745}
 746EXPORT_SYMBOL(ib_find_cached_gid);
 747
 748int ib_find_gid_by_filter(struct ib_device *device,
 749                          const union ib_gid *gid,
 750                          u8 port_num,
 751                          bool (*filter)(const union ib_gid *gid,
 752                                         const struct ib_gid_attr *,
 753                                         void *),
 754                          void *context, u16 *index)
 755{
 756        /* Only RoCE GID table supports filter function */
 757        if (!rdma_cap_roce_gid_table(device, port_num) && filter)
 758                return -EPROTONOSUPPORT;
 759
 760        return ib_cache_gid_find_by_filter(device, gid,
 761                                           port_num, filter,
 762                                           context, index);
 763}
 764EXPORT_SYMBOL(ib_find_gid_by_filter);
 765
 766int ib_get_cached_pkey(struct ib_device *device,
 767                       u8                port_num,
 768                       int               index,
 769                       u16              *pkey)
 770{
 771        struct ib_pkey_cache *cache;
 772        unsigned long flags;
 773        int ret = 0;
 774
 775        if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
 776                return -EINVAL;
 777
 778        read_lock_irqsave(&device->cache.lock, flags);
 779
 780        cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
 781
 782        if (index < 0 || index >= cache->table_len)
 783                ret = -EINVAL;
 784        else
 785                *pkey = cache->table[index];
 786
 787        read_unlock_irqrestore(&device->cache.lock, flags);
 788
 789        return ret;
 790}
 791EXPORT_SYMBOL(ib_get_cached_pkey);
 792
 793int ib_find_cached_pkey(struct ib_device *device,
 794                        u8                port_num,
 795                        u16               pkey,
 796                        u16              *index)
 797{
 798        struct ib_pkey_cache *cache;
 799        unsigned long flags;
 800        int i;
 801        int ret = -ENOENT;
 802        int partial_ix = -1;
 803
 804        if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
 805                return -EINVAL;
 806
 807        read_lock_irqsave(&device->cache.lock, flags);
 808
 809        cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
 810
 811        *index = -1;
 812
 813        for (i = 0; i < cache->table_len; ++i)
 814                if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
 815                        if (cache->table[i] & 0x8000) {
 816                                *index = i;
 817                                ret = 0;
 818                                break;
 819                        } else
 820                                partial_ix = i;
 821                }
 822
 823        if (ret && partial_ix >= 0) {
 824                *index = partial_ix;
 825                ret = 0;
 826        }
 827
 828        read_unlock_irqrestore(&device->cache.lock, flags);
 829
 830        return ret;
 831}
 832EXPORT_SYMBOL(ib_find_cached_pkey);
 833
 834int ib_find_exact_cached_pkey(struct ib_device *device,
 835                              u8                port_num,
 836                              u16               pkey,
 837                              u16              *index)
 838{
 839        struct ib_pkey_cache *cache;
 840        unsigned long flags;
 841        int i;
 842        int ret = -ENOENT;
 843
 844        if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
 845                return -EINVAL;
 846
 847        read_lock_irqsave(&device->cache.lock, flags);
 848
 849        cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
 850
 851        *index = -1;
 852
 853        for (i = 0; i < cache->table_len; ++i)
 854                if (cache->table[i] == pkey) {
 855                        *index = i;
 856                        ret = 0;
 857                        break;
 858                }
 859
 860        read_unlock_irqrestore(&device->cache.lock, flags);
 861
 862        return ret;
 863}
 864EXPORT_SYMBOL(ib_find_exact_cached_pkey);
 865
 866int ib_get_cached_lmc(struct ib_device *device,
 867                      u8                port_num,
 868                      u8                *lmc)
 869{
 870        unsigned long flags;
 871        int ret = 0;
 872
 873        if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
 874                return -EINVAL;
 875
 876        read_lock_irqsave(&device->cache.lock, flags);
 877        *lmc = device->cache.lmc_cache[port_num - rdma_start_port(device)];
 878        read_unlock_irqrestore(&device->cache.lock, flags);
 879
 880        return ret;
 881}
 882EXPORT_SYMBOL(ib_get_cached_lmc);
 883
 884static void ib_cache_update(struct ib_device *device,
 885                            u8                port)
 886{
 887        struct ib_port_attr       *tprops = NULL;
 888        struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
 889        struct ib_gid_cache {
 890                int             table_len;
 891                union ib_gid    table[0];
 892        }                         *gid_cache = NULL;
 893        int                        i;
 894        int                        ret;
 895        struct ib_gid_table       *table;
 896        struct ib_gid_table      **ports_table = device->cache.gid_cache;
 897        bool                       use_roce_gid_table =
 898                                        rdma_cap_roce_gid_table(device, port);
 899
 900        if (port < rdma_start_port(device) || port > rdma_end_port(device))
 901                return;
 902
 903        table = ports_table[port - rdma_start_port(device)];
 904
 905        tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
 906        if (!tprops)
 907                return;
 908
 909        ret = ib_query_port(device, port, tprops);
 910        if (ret) {
 911                printk(KERN_WARNING "ib_query_port failed (%d) for %s\n",
 912                       ret, device->name);
 913                goto err;
 914        }
 915
 916        pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len *
 917                             sizeof *pkey_cache->table, GFP_KERNEL);
 918        if (!pkey_cache)
 919                goto err;
 920
 921        pkey_cache->table_len = tprops->pkey_tbl_len;
 922
 923        if (!use_roce_gid_table) {
 924                gid_cache = kmalloc(sizeof(*gid_cache) + tprops->gid_tbl_len *
 925                            sizeof(*gid_cache->table), GFP_KERNEL);
 926                if (!gid_cache)
 927                        goto err;
 928
 929                gid_cache->table_len = tprops->gid_tbl_len;
 930        }
 931
 932        for (i = 0; i < pkey_cache->table_len; ++i) {
 933                ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
 934                if (ret) {
 935                        printk(KERN_WARNING "ib_query_pkey failed (%d) for %s (index %d)\n",
 936                               ret, device->name, i);
 937                        goto err;
 938                }
 939        }
 940
 941        if (!use_roce_gid_table) {
 942                for (i = 0;  i < gid_cache->table_len; ++i) {
 943                        ret = ib_query_gid(device, port, i,
 944                                           gid_cache->table + i, NULL);
 945                        if (ret) {
 946                                printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n",
 947                                       ret, device->name, i);
 948                                goto err;
 949                        }
 950                }
 951        }
 952
 953        write_lock_irq(&device->cache.lock);
 954
 955        old_pkey_cache = device->cache.pkey_cache[port - rdma_start_port(device)];
 956
 957        device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache;
 958        if (!use_roce_gid_table) {
 959                for (i = 0; i < gid_cache->table_len; i++) {
 960                        modify_gid(device, port, table, i, gid_cache->table + i,
 961                                   &zattr, false);
 962                }
 963        }
 964
 965        device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc;
 966
 967        write_unlock_irq(&device->cache.lock);
 968
 969        kfree(gid_cache);
 970        kfree(old_pkey_cache);
 971        kfree(tprops);
 972        return;
 973
 974err:
 975        kfree(pkey_cache);
 976        kfree(gid_cache);
 977        kfree(tprops);
 978}
 979
 980static void ib_cache_task(struct work_struct *_work)
 981{
 982        struct ib_update_work *work =
 983                container_of(_work, struct ib_update_work, work);
 984
 985        ib_cache_update(work->device, work->port_num);
 986        kfree(work);
 987}
 988
 989static void ib_cache_event(struct ib_event_handler *handler,
 990                           struct ib_event *event)
 991{
 992        struct ib_update_work *work;
 993
 994        if (event->event == IB_EVENT_PORT_ERR    ||
 995            event->event == IB_EVENT_PORT_ACTIVE ||
 996            event->event == IB_EVENT_LID_CHANGE  ||
 997            event->event == IB_EVENT_PKEY_CHANGE ||
 998            event->event == IB_EVENT_SM_CHANGE   ||
 999            event->event == IB_EVENT_CLIENT_REREGISTER ||
1000            event->event == IB_EVENT_GID_CHANGE) {
1001                work = kmalloc(sizeof *work, GFP_ATOMIC);
1002                if (work) {
1003                        INIT_WORK(&work->work, ib_cache_task);
1004                        work->device   = event->device;
1005                        work->port_num = event->element.port_num;
1006                        queue_work(ib_wq, &work->work);
1007                }
1008        }
1009}
1010
1011int ib_cache_setup_one(struct ib_device *device)
1012{
1013        int p;
1014        int err;
1015
1016        rwlock_init(&device->cache.lock);
1017
1018        device->cache.pkey_cache =
1019                kzalloc(sizeof *device->cache.pkey_cache *
1020                        (rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL);
1021        device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache *
1022                                          (rdma_end_port(device) -
1023                                           rdma_start_port(device) + 1),
1024                                          GFP_KERNEL);
1025        if (!device->cache.pkey_cache ||
1026            !device->cache.lmc_cache) {
1027                printk(KERN_WARNING "Couldn't allocate cache "
1028                       "for %s\n", device->name);
1029                return -ENOMEM;
1030        }
1031
1032        err = gid_table_setup_one(device);
1033        if (err)
1034                /* Allocated memory will be cleaned in the release function */
1035                return err;
1036
1037        for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
1038                ib_cache_update(device, p + rdma_start_port(device));
1039
1040        INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
1041                              device, ib_cache_event);
1042        err = ib_register_event_handler(&device->cache.event_handler);
1043        if (err)
1044                goto err;
1045
1046        return 0;
1047
1048err:
1049        gid_table_cleanup_one(device);
1050        return err;
1051}
1052
1053void ib_cache_release_one(struct ib_device *device)
1054{
1055        int p;
1056
1057        /*
1058         * The release function frees all the cache elements.
1059         * This function should be called as part of freeing
1060         * all the device's resources when the cache could no
1061         * longer be accessed.
1062         */
1063        if (device->cache.pkey_cache)
1064                for (p = 0;
1065                     p <= rdma_end_port(device) - rdma_start_port(device); ++p)
1066                        kfree(device->cache.pkey_cache[p]);
1067
1068        gid_table_release_one(device);
1069        kfree(device->cache.pkey_cache);
1070        kfree(device->cache.lmc_cache);
1071}
1072
1073void ib_cache_cleanup_one(struct ib_device *device)
1074{
1075        /* The cleanup function unregisters the event handler,
1076         * waits for all in-progress workqueue elements and cleans
1077         * up the GID cache. This function should be called after
1078         * the device was removed from the devices list and all
1079         * clients were removed, so the cache exists but is
1080         * non-functional and shouldn't be updated anymore.
1081         */
1082        ib_unregister_event_handler(&device->cache.event_handler);
1083        flush_workqueue(ib_wq);
1084        gid_table_cleanup_one(device);
1085}
1086
1087void __init ib_cache_setup(void)
1088{
1089        roce_gid_mgmt_init();
1090}
1091
1092void __exit ib_cache_cleanup(void)
1093{
1094        roce_gid_mgmt_cleanup();
1095}
1096