linux/drivers/infiniband/core/cache.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
   3 * Copyright (c) 2005 Intel Corporation. All rights reserved.
   4 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
   5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
   6 *
   7 * This software is available to you under a choice of one of two
   8 * licenses.  You may choose to be licensed under the terms of the GNU
   9 * General Public License (GPL) Version 2, available from the file
  10 * COPYING in the main directory of this source tree, or the
  11 * OpenIB.org BSD license below:
  12 *
  13 *     Redistribution and use in source and binary forms, with or
  14 *     without modification, are permitted provided that the following
  15 *     conditions are met:
  16 *
  17 *      - Redistributions of source code must retain the above
  18 *        copyright notice, this list of conditions and the following
  19 *        disclaimer.
  20 *
  21 *      - Redistributions in binary form must reproduce the above
  22 *        copyright notice, this list of conditions and the following
  23 *        disclaimer in the documentation and/or other materials
  24 *        provided with the distribution.
  25 *
  26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33 * SOFTWARE.
  34 */
  35
  36#include <linux/module.h>
  37#include <linux/errno.h>
  38#include <linux/slab.h>
  39#include <linux/workqueue.h>
  40#include <linux/netdevice.h>
  41#include <net/addrconf.h>
  42
  43#include <rdma/ib_cache.h>
  44
  45#include "core_priv.h"
  46
  47struct ib_pkey_cache {
  48        int             table_len;
  49        u16             table[0];
  50};
  51
  52struct ib_update_work {
  53        struct work_struct work;
  54        struct ib_device  *device;
  55        u8                 port_num;
  56        bool               enforce_security;
  57};
  58
  59union ib_gid zgid;
  60EXPORT_SYMBOL(zgid);
  61
  62enum gid_attr_find_mask {
  63        GID_ATTR_FIND_MASK_GID          = 1UL << 0,
  64        GID_ATTR_FIND_MASK_NETDEV       = 1UL << 1,
  65        GID_ATTR_FIND_MASK_DEFAULT      = 1UL << 2,
  66        GID_ATTR_FIND_MASK_GID_TYPE     = 1UL << 3,
  67};
  68
  69enum gid_table_entry_props {
  70        GID_TABLE_ENTRY_INVALID         = 1UL << 0,
  71        GID_TABLE_ENTRY_DEFAULT         = 1UL << 1,
  72};
  73
  74struct ib_gid_table_entry {
  75        unsigned long       props;
  76        union ib_gid        gid;
  77        struct ib_gid_attr  attr;
  78        void               *context;
  79};
  80
  81struct ib_gid_table {
  82        int                  sz;
  83        /* In RoCE, adding a GID to the table requires:
  84         * (a) Find if this GID is already exists.
  85         * (b) Find a free space.
  86         * (c) Write the new GID
  87         *
  88         * Delete requires different set of operations:
  89         * (a) Find the GID
  90         * (b) Delete it.
  91         *
  92         **/
  93        /* Any writer to data_vec must hold this lock and the write side of
  94         * rwlock. readers must hold only rwlock. All writers must be in a
  95         * sleepable context.
  96         */
  97        struct mutex         lock;
  98        /* rwlock protects data_vec[ix]->props. */
  99        rwlock_t             rwlock;
 100        struct ib_gid_table_entry *data_vec;
 101};
 102
 103static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
 104{
 105        struct ib_event event;
 106
 107        event.device            = ib_dev;
 108        event.element.port_num  = port;
 109        event.event             = IB_EVENT_GID_CHANGE;
 110
 111        ib_dispatch_event(&event);
 112}
 113
 114static const char * const gid_type_str[] = {
 115        [IB_GID_TYPE_IB]        = "IB/RoCE v1",
 116        [IB_GID_TYPE_ROCE_UDP_ENCAP]    = "RoCE v2",
 117};
 118
 119const char *ib_cache_gid_type_str(enum ib_gid_type gid_type)
 120{
 121        if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type])
 122                return gid_type_str[gid_type];
 123
 124        return "Invalid GID type";
 125}
 126EXPORT_SYMBOL(ib_cache_gid_type_str);
 127
 128/** rdma_is_zero_gid - Check if given GID is zero or not.
 129 * @gid:        GID to check
 130 * Returns true if given GID is zero, returns false otherwise.
 131 */
 132bool rdma_is_zero_gid(const union ib_gid *gid)
 133{
 134        return !memcmp(gid, &zgid, sizeof(*gid));
 135}
 136EXPORT_SYMBOL(rdma_is_zero_gid);
 137
 138int ib_cache_gid_parse_type_str(const char *buf)
 139{
 140        unsigned int i;
 141        size_t len;
 142        int err = -EINVAL;
 143
 144        len = strlen(buf);
 145        if (len == 0)
 146                return -EINVAL;
 147
 148        if (buf[len - 1] == '\n')
 149                len--;
 150
 151        for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i)
 152                if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) &&
 153                    len == strlen(gid_type_str[i])) {
 154                        err = i;
 155                        break;
 156                }
 157
 158        return err;
 159}
 160EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
 161
 162static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port)
 163{
 164        return device->cache.ports[port - rdma_start_port(device)].gid;
 165}
 166
 167static void del_roce_gid(struct ib_device *device, u8 port_num,
 168                         struct ib_gid_table *table, int ix)
 169{
 170        pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
 171                 device->name, port_num, ix,
 172                 table->data_vec[ix].gid.raw);
 173
 174        if (rdma_cap_roce_gid_table(device, port_num))
 175                device->del_gid(&table->data_vec[ix].attr,
 176                                &table->data_vec[ix].context);
 177        dev_put(table->data_vec[ix].attr.ndev);
 178}
 179
 180static int add_roce_gid(struct ib_gid_table *table,
 181                        const union ib_gid *gid,
 182                        const struct ib_gid_attr *attr)
 183{
 184        struct ib_gid_table_entry *entry;
 185        int ix = attr->index;
 186        int ret = 0;
 187
 188        if (!attr->ndev) {
 189                pr_err("%s NULL netdev device=%s port=%d index=%d\n",
 190                       __func__, attr->device->name, attr->port_num,
 191                       attr->index);
 192                return -EINVAL;
 193        }
 194
 195        entry = &table->data_vec[ix];
 196        if ((entry->props & GID_TABLE_ENTRY_INVALID) == 0) {
 197                WARN(1, "GID table corruption device=%s port=%d index=%d\n",
 198                     attr->device->name, attr->port_num,
 199                     attr->index);
 200                return -EINVAL;
 201        }
 202
 203        if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
 204                ret = attr->device->add_gid(gid, attr, &entry->context);
 205                if (ret) {
 206                        pr_err("%s GID add failed device=%s port=%d index=%d\n",
 207                               __func__, attr->device->name, attr->port_num,
 208                               attr->index);
 209                        goto add_err;
 210                }
 211        }
 212        dev_hold(attr->ndev);
 213
 214add_err:
 215        if (!ret)
 216                pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
 217                         attr->device->name, attr->port_num, ix, gid->raw);
 218        return ret;
 219}
 220
 221/**
 222 * add_modify_gid - Add or modify GID table entry
 223 *
 224 * @table:      GID table in which GID to be added or modified
 225 * @gid:        GID content
 226 * @attr:       Attributes of the GID
 227 *
 228 * Returns 0 on success or appropriate error code. It accepts zero
 229 * GID addition for non RoCE ports for HCA's who report them as valid
 230 * GID. However such zero GIDs are not added to the cache.
 231 */
 232static int add_modify_gid(struct ib_gid_table *table,
 233                          const union ib_gid *gid,
 234                          const struct ib_gid_attr *attr)
 235{
 236        int ret;
 237
 238        if (rdma_protocol_roce(attr->device, attr->port_num)) {
 239                ret = add_roce_gid(table, gid, attr);
 240                if (ret)
 241                        return ret;
 242        } else {
 243                /*
 244                 * Some HCA's report multiple GID entries with only one
 245                 * valid GID, but remaining as zero GID.
 246                 * So ignore such behavior for IB link layer and don't
 247                 * fail the call, but don't add such entry to GID cache.
 248                 */
 249                if (rdma_is_zero_gid(gid))
 250                        return 0;
 251        }
 252
 253        lockdep_assert_held(&table->lock);
 254        memcpy(&table->data_vec[attr->index].gid, gid, sizeof(*gid));
 255        memcpy(&table->data_vec[attr->index].attr, attr, sizeof(*attr));
 256
 257        write_lock_irq(&table->rwlock);
 258        table->data_vec[attr->index].props &= ~GID_TABLE_ENTRY_INVALID;
 259        write_unlock_irq(&table->rwlock);
 260        return 0;
 261}
 262
 263/**
 264 * del_gid - Delete GID table entry
 265 *
 266 * @ib_dev:     IB device whose GID entry to be deleted
 267 * @port:       Port number of the IB device
 268 * @table:      GID table of the IB device for a port
 269 * @ix:         GID entry index to delete
 270 *
 271 */
 272static void del_gid(struct ib_device *ib_dev, u8 port,
 273                    struct ib_gid_table *table, int ix)
 274{
 275        lockdep_assert_held(&table->lock);
 276        write_lock_irq(&table->rwlock);
 277        table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
 278        write_unlock_irq(&table->rwlock);
 279
 280        if (rdma_protocol_roce(ib_dev, port))
 281                del_roce_gid(ib_dev, port, table, ix);
 282        memset(&table->data_vec[ix].gid, 0, sizeof(table->data_vec[ix].gid));
 283        memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr));
 284        table->data_vec[ix].context = NULL;
 285}
 286
 287/* rwlock should be read locked, or lock should be held */
 288static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
 289                    const struct ib_gid_attr *val, bool default_gid,
 290                    unsigned long mask, int *pempty)
 291{
 292        int i = 0;
 293        int found = -1;
 294        int empty = pempty ? -1 : 0;
 295
 296        while (i < table->sz && (found < 0 || empty < 0)) {
 297                struct ib_gid_table_entry *data = &table->data_vec[i];
 298                struct ib_gid_attr *attr = &data->attr;
 299                int curr_index = i;
 300
 301                i++;
 302
 303                /* find_gid() is used during GID addition where it is expected
 304                 * to return a free entry slot which is not duplicate.
 305                 * Free entry slot is requested and returned if pempty is set,
 306                 * so lookup free slot only if requested.
 307                 */
 308                if (pempty && empty < 0) {
 309                        if (data->props & GID_TABLE_ENTRY_INVALID &&
 310                            (default_gid ==
 311                             !!(data->props & GID_TABLE_ENTRY_DEFAULT))) {
 312                                /*
 313                                 * Found an invalid (free) entry; allocate it.
 314                                 * If default GID is requested, then our
 315                                 * found slot must be one of the DEFAULT
 316                                 * reserved slots or we fail.
 317                                 * This ensures that only DEFAULT reserved
 318                                 * slots are used for default property GIDs.
 319                                 */
 320                                empty = curr_index;
 321                        }
 322                }
 323
 324                /*
 325                 * Additionally find_gid() is used to find valid entry during
 326                 * lookup operation, where validity needs to be checked. So
 327                 * find the empty entry first to continue to search for a free
 328                 * slot and ignore its INVALID flag.
 329                 */
 330                if (data->props & GID_TABLE_ENTRY_INVALID)
 331                        continue;
 332
 333                if (found >= 0)
 334                        continue;
 335
 336                if (mask & GID_ATTR_FIND_MASK_GID_TYPE &&
 337                    attr->gid_type != val->gid_type)
 338                        continue;
 339
 340                if (mask & GID_ATTR_FIND_MASK_GID &&
 341                    memcmp(gid, &data->gid, sizeof(*gid)))
 342                        continue;
 343
 344                if (mask & GID_ATTR_FIND_MASK_NETDEV &&
 345                    attr->ndev != val->ndev)
 346                        continue;
 347
 348                if (mask & GID_ATTR_FIND_MASK_DEFAULT &&
 349                    !!(data->props & GID_TABLE_ENTRY_DEFAULT) !=
 350                    default_gid)
 351                        continue;
 352
 353                found = curr_index;
 354        }
 355
 356        if (pempty)
 357                *pempty = empty;
 358
 359        return found;
 360}
 361
 362static void make_default_gid(struct  net_device *dev, union ib_gid *gid)
 363{
 364        gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
 365        addrconf_ifid_eui48(&gid->raw[8], dev);
 366}
 367
 368static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
 369                              union ib_gid *gid, struct ib_gid_attr *attr,
 370                              unsigned long mask, bool default_gid)
 371{
 372        struct ib_gid_table *table;
 373        int ret = 0;
 374        int empty;
 375        int ix;
 376
 377        /* Do not allow adding zero GID in support of
 378         * IB spec version 1.3 section 4.1.1 point (6) and
 379         * section 12.7.10 and section 12.7.20
 380         */
 381        if (rdma_is_zero_gid(gid))
 382                return -EINVAL;
 383
 384        table = rdma_gid_table(ib_dev, port);
 385
 386        mutex_lock(&table->lock);
 387
 388        ix = find_gid(table, gid, attr, default_gid, mask, &empty);
 389        if (ix >= 0)
 390                goto out_unlock;
 391
 392        if (empty < 0) {
 393                ret = -ENOSPC;
 394                goto out_unlock;
 395        }
 396        attr->device = ib_dev;
 397        attr->index = empty;
 398        attr->port_num = port;
 399        ret = add_modify_gid(table, gid, attr);
 400        if (!ret)
 401                dispatch_gid_change_event(ib_dev, port);
 402
 403out_unlock:
 404        mutex_unlock(&table->lock);
 405        if (ret)
 406                pr_warn("%s: unable to add gid %pI6 error=%d\n",
 407                        __func__, gid->raw, ret);
 408        return ret;
 409}
 410
 411int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
 412                     union ib_gid *gid, struct ib_gid_attr *attr)
 413{
 414        struct net_device *idev;
 415        unsigned long mask;
 416        int ret;
 417
 418        if (ib_dev->get_netdev) {
 419                idev = ib_dev->get_netdev(ib_dev, port);
 420                if (idev && attr->ndev != idev) {
 421                        union ib_gid default_gid;
 422
 423                        /* Adding default GIDs in not permitted */
 424                        make_default_gid(idev, &default_gid);
 425                        if (!memcmp(gid, &default_gid, sizeof(*gid))) {
 426                                dev_put(idev);
 427                                return -EPERM;
 428                        }
 429                }
 430                if (idev)
 431                        dev_put(idev);
 432        }
 433
 434        mask = GID_ATTR_FIND_MASK_GID |
 435               GID_ATTR_FIND_MASK_GID_TYPE |
 436               GID_ATTR_FIND_MASK_NETDEV;
 437
 438        ret = __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false);
 439        return ret;
 440}
 441
 442static int
 443_ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
 444                  union ib_gid *gid, struct ib_gid_attr *attr,
 445                  unsigned long mask, bool default_gid)
 446{
 447        struct ib_gid_table *table;
 448        int ret = 0;
 449        int ix;
 450
 451        table = rdma_gid_table(ib_dev, port);
 452
 453        mutex_lock(&table->lock);
 454
 455        ix = find_gid(table, gid, attr, default_gid, mask, NULL);
 456        if (ix < 0) {
 457                ret = -EINVAL;
 458                goto out_unlock;
 459        }
 460
 461        del_gid(ib_dev, port, table, ix);
 462        dispatch_gid_change_event(ib_dev, port);
 463
 464out_unlock:
 465        mutex_unlock(&table->lock);
 466        if (ret)
 467                pr_debug("%s: can't delete gid %pI6 error=%d\n",
 468                         __func__, gid->raw, ret);
 469        return ret;
 470}
 471
 472int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
 473                     union ib_gid *gid, struct ib_gid_attr *attr)
 474{
 475        unsigned long mask = GID_ATTR_FIND_MASK_GID       |
 476                             GID_ATTR_FIND_MASK_GID_TYPE |
 477                             GID_ATTR_FIND_MASK_DEFAULT  |
 478                             GID_ATTR_FIND_MASK_NETDEV;
 479
 480        return _ib_cache_gid_del(ib_dev, port, gid, attr, mask, false);
 481}
 482
 483int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
 484                                     struct net_device *ndev)
 485{
 486        struct ib_gid_table *table;
 487        int ix;
 488        bool deleted = false;
 489
 490        table = rdma_gid_table(ib_dev, port);
 491
 492        mutex_lock(&table->lock);
 493
 494        for (ix = 0; ix < table->sz; ix++) {
 495                if (table->data_vec[ix].attr.ndev == ndev) {
 496                        del_gid(ib_dev, port, table, ix);
 497                        deleted = true;
 498                }
 499        }
 500
 501        mutex_unlock(&table->lock);
 502
 503        if (deleted)
 504                dispatch_gid_change_event(ib_dev, port);
 505
 506        return 0;
 507}
 508
 509static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
 510                              union ib_gid *gid, struct ib_gid_attr *attr)
 511{
 512        struct ib_gid_table *table;
 513
 514        table = rdma_gid_table(ib_dev, port);
 515
 516        if (index < 0 || index >= table->sz)
 517                return -EINVAL;
 518
 519        if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID)
 520                return -EINVAL;
 521
 522        memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
 523        if (attr) {
 524                memcpy(attr, &table->data_vec[index].attr, sizeof(*attr));
 525                if (attr->ndev)
 526                        dev_hold(attr->ndev);
 527        }
 528
 529        return 0;
 530}
 531
 532static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
 533                                    const union ib_gid *gid,
 534                                    const struct ib_gid_attr *val,
 535                                    unsigned long mask,
 536                                    u8 *port, u16 *index)
 537{
 538        struct ib_gid_table *table;
 539        u8 p;
 540        int local_index;
 541        unsigned long flags;
 542
 543        for (p = 0; p < ib_dev->phys_port_cnt; p++) {
 544                table = ib_dev->cache.ports[p].gid;
 545                read_lock_irqsave(&table->rwlock, flags);
 546                local_index = find_gid(table, gid, val, false, mask, NULL);
 547                if (local_index >= 0) {
 548                        if (index)
 549                                *index = local_index;
 550                        if (port)
 551                                *port = p + rdma_start_port(ib_dev);
 552                        read_unlock_irqrestore(&table->rwlock, flags);
 553                        return 0;
 554                }
 555                read_unlock_irqrestore(&table->rwlock, flags);
 556        }
 557
 558        return -ENOENT;
 559}
 560
 561static int ib_cache_gid_find(struct ib_device *ib_dev,
 562                             const union ib_gid *gid,
 563                             enum ib_gid_type gid_type,
 564                             struct net_device *ndev, u8 *port,
 565                             u16 *index)
 566{
 567        unsigned long mask = GID_ATTR_FIND_MASK_GID |
 568                             GID_ATTR_FIND_MASK_GID_TYPE;
 569        struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
 570
 571        if (ndev)
 572                mask |= GID_ATTR_FIND_MASK_NETDEV;
 573
 574        return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val,
 575                                        mask, port, index);
 576}
 577
 578/**
 579 * ib_find_cached_gid_by_port - Returns the GID table index where a specified
 580 * GID value occurs. It searches for the specified GID value in the local
 581 * software cache.
 582 * @device: The device to query.
 583 * @gid: The GID value to search for.
 584 * @gid_type: The GID type to search for.
 585 * @port_num: The port number of the device where the GID value should be
 586 *   searched.
 587 * @ndev: In RoCE, the net device of the device. Null means ignore.
 588 * @index: The index into the cached GID table where the GID was found. This
 589 *   parameter may be NULL.
 590 */
 591int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
 592                               const union ib_gid *gid,
 593                               enum ib_gid_type gid_type,
 594                               u8 port, struct net_device *ndev,
 595                               u16 *index)
 596{
 597        int local_index;
 598        struct ib_gid_table *table;
 599        unsigned long mask = GID_ATTR_FIND_MASK_GID |
 600                             GID_ATTR_FIND_MASK_GID_TYPE;
 601        struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
 602        unsigned long flags;
 603
 604        if (!rdma_is_port_valid(ib_dev, port))
 605                return -ENOENT;
 606
 607        table = rdma_gid_table(ib_dev, port);
 608
 609        if (ndev)
 610                mask |= GID_ATTR_FIND_MASK_NETDEV;
 611
 612        read_lock_irqsave(&table->rwlock, flags);
 613        local_index = find_gid(table, gid, &val, false, mask, NULL);
 614        if (local_index >= 0) {
 615                if (index)
 616                        *index = local_index;
 617                read_unlock_irqrestore(&table->rwlock, flags);
 618                return 0;
 619        }
 620
 621        read_unlock_irqrestore(&table->rwlock, flags);
 622        return -ENOENT;
 623}
 624EXPORT_SYMBOL(ib_find_cached_gid_by_port);
 625
 626/**
 627 * ib_cache_gid_find_by_filter - Returns the GID table index where a specified
 628 * GID value occurs
 629 * @device: The device to query.
 630 * @gid: The GID value to search for.
 631 * @port_num: The port number of the device where the GID value could be
 632 *   searched.
 633 * @filter: The filter function is executed on any matching GID in the table.
 634 *   If the filter function returns true, the corresponding index is returned,
 635 *   otherwise, we continue searching the GID table. It's guaranteed that
 636 *   while filter is executed, ndev field is valid and the structure won't
 637 *   change. filter is executed in an atomic context. filter must not be NULL.
 638 * @index: The index into the cached GID table where the GID was found. This
 639 *   parameter may be NULL.
 640 *
 641 * ib_cache_gid_find_by_filter() searches for the specified GID value
 642 * of which the filter function returns true in the port's GID table.
 643 * This function is only supported on RoCE ports.
 644 *
 645 */
 646static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
 647                                       const union ib_gid *gid,
 648                                       u8 port,
 649                                       bool (*filter)(const union ib_gid *,
 650                                                      const struct ib_gid_attr *,
 651                                                      void *),
 652                                       void *context,
 653                                       u16 *index)
 654{
 655        struct ib_gid_table *table;
 656        unsigned int i;
 657        unsigned long flags;
 658        bool found = false;
 659
 660
 661        if (!rdma_is_port_valid(ib_dev, port) ||
 662            !rdma_protocol_roce(ib_dev, port))
 663                return -EPROTONOSUPPORT;
 664
 665        table = rdma_gid_table(ib_dev, port);
 666
 667        read_lock_irqsave(&table->rwlock, flags);
 668        for (i = 0; i < table->sz; i++) {
 669                struct ib_gid_attr attr;
 670
 671                if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
 672                        continue;
 673
 674                if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
 675                        continue;
 676
 677                memcpy(&attr, &table->data_vec[i].attr, sizeof(attr));
 678
 679                if (filter(gid, &attr, context)) {
 680                        found = true;
 681                        if (index)
 682                                *index = i;
 683                        break;
 684                }
 685        }
 686        read_unlock_irqrestore(&table->rwlock, flags);
 687
 688        if (!found)
 689                return -ENOENT;
 690        return 0;
 691}
 692
 693static struct ib_gid_table *alloc_gid_table(int sz)
 694{
 695        struct ib_gid_table *table =
 696                kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
 697        int i;
 698
 699        if (!table)
 700                return NULL;
 701
 702        table->data_vec = kcalloc(sz, sizeof(*table->data_vec), GFP_KERNEL);
 703        if (!table->data_vec)
 704                goto err_free_table;
 705
 706        mutex_init(&table->lock);
 707
 708        table->sz = sz;
 709        rwlock_init(&table->rwlock);
 710
 711        /* Mark all entries as invalid so that allocator can allocate
 712         * one of the invalid (free) entry.
 713         */
 714        for (i = 0; i < sz; i++)
 715                table->data_vec[i].props |= GID_TABLE_ENTRY_INVALID;
 716        return table;
 717
 718err_free_table:
 719        kfree(table);
 720        return NULL;
 721}
 722
 723static void release_gid_table(struct ib_gid_table *table)
 724{
 725        if (table) {
 726                kfree(table->data_vec);
 727                kfree(table);
 728        }
 729}
 730
 731static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
 732                                   struct ib_gid_table *table)
 733{
 734        int i;
 735        bool deleted = false;
 736
 737        if (!table)
 738                return;
 739
 740        mutex_lock(&table->lock);
 741        for (i = 0; i < table->sz; ++i) {
 742                if (!rdma_is_zero_gid(&table->data_vec[i].gid)) {
 743                        del_gid(ib_dev, port, table, i);
 744                        deleted = true;
 745                }
 746        }
 747        mutex_unlock(&table->lock);
 748
 749        if (deleted)
 750                dispatch_gid_change_event(ib_dev, port);
 751}
 752
 753void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
 754                                  struct net_device *ndev,
 755                                  unsigned long gid_type_mask,
 756                                  enum ib_cache_gid_default_mode mode)
 757{
 758        union ib_gid gid = { };
 759        struct ib_gid_attr gid_attr;
 760        struct ib_gid_table *table;
 761        unsigned int gid_type;
 762        unsigned long mask;
 763
 764        table = rdma_gid_table(ib_dev, port);
 765
 766        mask = GID_ATTR_FIND_MASK_GID_TYPE |
 767               GID_ATTR_FIND_MASK_DEFAULT |
 768               GID_ATTR_FIND_MASK_NETDEV;
 769        memset(&gid_attr, 0, sizeof(gid_attr));
 770        gid_attr.ndev = ndev;
 771
 772        for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) {
 773                if (1UL << gid_type & ~gid_type_mask)
 774                        continue;
 775
 776                gid_attr.gid_type = gid_type;
 777
 778                if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
 779                        make_default_gid(ndev, &gid);
 780                        __ib_cache_gid_add(ib_dev, port, &gid,
 781                                           &gid_attr, mask, true);
 782                } else if (mode == IB_CACHE_GID_DEFAULT_MODE_DELETE) {
 783                        _ib_cache_gid_del(ib_dev, port, &gid,
 784                                          &gid_attr, mask, true);
 785                }
 786        }
 787}
 788
 789static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
 790                                      struct ib_gid_table *table)
 791{
 792        unsigned int i;
 793        unsigned long roce_gid_type_mask;
 794        unsigned int num_default_gids;
 795        unsigned int current_gid = 0;
 796
 797        roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
 798        num_default_gids = hweight_long(roce_gid_type_mask);
 799        for (i = 0; i < num_default_gids && i < table->sz; i++) {
 800                struct ib_gid_table_entry *entry = &table->data_vec[i];
 801
 802                entry->props |= GID_TABLE_ENTRY_DEFAULT;
 803                current_gid = find_next_bit(&roce_gid_type_mask,
 804                                            BITS_PER_LONG,
 805                                            current_gid);
 806                entry->attr.gid_type = current_gid++;
 807        }
 808}
 809
 810
 811static void gid_table_release_one(struct ib_device *ib_dev)
 812{
 813        struct ib_gid_table *table;
 814        u8 port;
 815
 816        for (port = 0; port < ib_dev->phys_port_cnt; port++) {
 817                table = ib_dev->cache.ports[port].gid;
 818                release_gid_table(table);
 819                ib_dev->cache.ports[port].gid = NULL;
 820        }
 821}
 822
 823static int _gid_table_setup_one(struct ib_device *ib_dev)
 824{
 825        u8 port;
 826        struct ib_gid_table *table;
 827
 828        for (port = 0; port < ib_dev->phys_port_cnt; port++) {
 829                u8 rdma_port = port + rdma_start_port(ib_dev);
 830
 831                table = alloc_gid_table(
 832                                ib_dev->port_immutable[rdma_port].gid_tbl_len);
 833                if (!table)
 834                        goto rollback_table_setup;
 835
 836                gid_table_reserve_default(ib_dev, rdma_port, table);
 837                ib_dev->cache.ports[port].gid = table;
 838        }
 839        return 0;
 840
 841rollback_table_setup:
 842        gid_table_release_one(ib_dev);
 843        return -ENOMEM;
 844}
 845
 846static void gid_table_cleanup_one(struct ib_device *ib_dev)
 847{
 848        struct ib_gid_table *table;
 849        u8 port;
 850
 851        for (port = 0; port < ib_dev->phys_port_cnt; port++) {
 852                table = ib_dev->cache.ports[port].gid;
 853                cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
 854                                       table);
 855        }
 856}
 857
 858static int gid_table_setup_one(struct ib_device *ib_dev)
 859{
 860        int err;
 861
 862        err = _gid_table_setup_one(ib_dev);
 863
 864        if (err)
 865                return err;
 866
 867        rdma_roce_rescan_device(ib_dev);
 868
 869        return err;
 870}
 871
 872int ib_get_cached_gid(struct ib_device *device,
 873                      u8                port_num,
 874                      int               index,
 875                      union ib_gid     *gid,
 876                      struct ib_gid_attr *gid_attr)
 877{
 878        int res;
 879        unsigned long flags;
 880        struct ib_gid_table *table;
 881
 882        if (!rdma_is_port_valid(device, port_num))
 883                return -EINVAL;
 884
 885        table = rdma_gid_table(device, port_num);
 886        read_lock_irqsave(&table->rwlock, flags);
 887        res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
 888        read_unlock_irqrestore(&table->rwlock, flags);
 889
 890        return res;
 891}
 892EXPORT_SYMBOL(ib_get_cached_gid);
 893
 894/**
 895 * ib_find_cached_gid - Returns the port number and GID table index where
 896 *   a specified GID value occurs.
 897 * @device: The device to query.
 898 * @gid: The GID value to search for.
 899 * @gid_type: The GID type to search for.
 900 * @ndev: In RoCE, the net device of the device. NULL means ignore.
 901 * @port_num: The port number of the device where the GID value was found.
 902 * @index: The index into the cached GID table where the GID was found.  This
 903 *   parameter may be NULL.
 904 *
 905 * ib_find_cached_gid() searches for the specified GID value in
 906 * the local software cache.
 907 */
 908int ib_find_cached_gid(struct ib_device *device,
 909                       const union ib_gid *gid,
 910                       enum ib_gid_type gid_type,
 911                       struct net_device *ndev,
 912                       u8               *port_num,
 913                       u16              *index)
 914{
 915        return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index);
 916}
 917EXPORT_SYMBOL(ib_find_cached_gid);
 918
 919int ib_find_gid_by_filter(struct ib_device *device,
 920                          const union ib_gid *gid,
 921                          u8 port_num,
 922                          bool (*filter)(const union ib_gid *gid,
 923                                         const struct ib_gid_attr *,
 924                                         void *),
 925                          void *context, u16 *index)
 926{
 927        /* Only RoCE GID table supports filter function */
 928        if (!rdma_protocol_roce(device, port_num) && filter)
 929                return -EPROTONOSUPPORT;
 930
 931        return ib_cache_gid_find_by_filter(device, gid,
 932                                           port_num, filter,
 933                                           context, index);
 934}
 935
 936int ib_get_cached_pkey(struct ib_device *device,
 937                       u8                port_num,
 938                       int               index,
 939                       u16              *pkey)
 940{
 941        struct ib_pkey_cache *cache;
 942        unsigned long flags;
 943        int ret = 0;
 944
 945        if (!rdma_is_port_valid(device, port_num))
 946                return -EINVAL;
 947
 948        read_lock_irqsave(&device->cache.lock, flags);
 949
 950        cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
 951
 952        if (index < 0 || index >= cache->table_len)
 953                ret = -EINVAL;
 954        else
 955                *pkey = cache->table[index];
 956
 957        read_unlock_irqrestore(&device->cache.lock, flags);
 958
 959        return ret;
 960}
 961EXPORT_SYMBOL(ib_get_cached_pkey);
 962
 963int ib_get_cached_subnet_prefix(struct ib_device *device,
 964                                u8                port_num,
 965                                u64              *sn_pfx)
 966{
 967        unsigned long flags;
 968        int p;
 969
 970        if (!rdma_is_port_valid(device, port_num))
 971                return -EINVAL;
 972
 973        p = port_num - rdma_start_port(device);
 974        read_lock_irqsave(&device->cache.lock, flags);
 975        *sn_pfx = device->cache.ports[p].subnet_prefix;
 976        read_unlock_irqrestore(&device->cache.lock, flags);
 977
 978        return 0;
 979}
 980EXPORT_SYMBOL(ib_get_cached_subnet_prefix);
 981
 982int ib_find_cached_pkey(struct ib_device *device,
 983                        u8                port_num,
 984                        u16               pkey,
 985                        u16              *index)
 986{
 987        struct ib_pkey_cache *cache;
 988        unsigned long flags;
 989        int i;
 990        int ret = -ENOENT;
 991        int partial_ix = -1;
 992
 993        if (!rdma_is_port_valid(device, port_num))
 994                return -EINVAL;
 995
 996        read_lock_irqsave(&device->cache.lock, flags);
 997
 998        cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
 999
1000        *index = -1;
1001
1002        for (i = 0; i < cache->table_len; ++i)
1003                if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
1004                        if (cache->table[i] & 0x8000) {
1005                                *index = i;
1006                                ret = 0;
1007                                break;
1008                        } else
1009                                partial_ix = i;
1010                }
1011
1012        if (ret && partial_ix >= 0) {
1013                *index = partial_ix;
1014                ret = 0;
1015        }
1016
1017        read_unlock_irqrestore(&device->cache.lock, flags);
1018
1019        return ret;
1020}
1021EXPORT_SYMBOL(ib_find_cached_pkey);
1022
1023int ib_find_exact_cached_pkey(struct ib_device *device,
1024                              u8                port_num,
1025                              u16               pkey,
1026                              u16              *index)
1027{
1028        struct ib_pkey_cache *cache;
1029        unsigned long flags;
1030        int i;
1031        int ret = -ENOENT;
1032
1033        if (!rdma_is_port_valid(device, port_num))
1034                return -EINVAL;
1035
1036        read_lock_irqsave(&device->cache.lock, flags);
1037
1038        cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
1039
1040        *index = -1;
1041
1042        for (i = 0; i < cache->table_len; ++i)
1043                if (cache->table[i] == pkey) {
1044                        *index = i;
1045                        ret = 0;
1046                        break;
1047                }
1048
1049        read_unlock_irqrestore(&device->cache.lock, flags);
1050
1051        return ret;
1052}
1053EXPORT_SYMBOL(ib_find_exact_cached_pkey);
1054
1055int ib_get_cached_lmc(struct ib_device *device,
1056                      u8                port_num,
1057                      u8                *lmc)
1058{
1059        unsigned long flags;
1060        int ret = 0;
1061
1062        if (!rdma_is_port_valid(device, port_num))
1063                return -EINVAL;
1064
1065        read_lock_irqsave(&device->cache.lock, flags);
1066        *lmc = device->cache.ports[port_num - rdma_start_port(device)].lmc;
1067        read_unlock_irqrestore(&device->cache.lock, flags);
1068
1069        return ret;
1070}
1071EXPORT_SYMBOL(ib_get_cached_lmc);
1072
1073int ib_get_cached_port_state(struct ib_device   *device,
1074                             u8                  port_num,
1075                             enum ib_port_state *port_state)
1076{
1077        unsigned long flags;
1078        int ret = 0;
1079
1080        if (!rdma_is_port_valid(device, port_num))
1081                return -EINVAL;
1082
1083        read_lock_irqsave(&device->cache.lock, flags);
1084        *port_state = device->cache.ports[port_num
1085                - rdma_start_port(device)].port_state;
1086        read_unlock_irqrestore(&device->cache.lock, flags);
1087
1088        return ret;
1089}
1090EXPORT_SYMBOL(ib_get_cached_port_state);
1091
1092static int config_non_roce_gid_cache(struct ib_device *device,
1093                                     u8 port, int gid_tbl_len)
1094{
1095        struct ib_gid_attr gid_attr = {};
1096        struct ib_gid_table *table;
1097        union ib_gid gid;
1098        int ret = 0;
1099        int i;
1100
1101        gid_attr.device = device;
1102        gid_attr.port_num = port;
1103        table = rdma_gid_table(device, port);
1104
1105        mutex_lock(&table->lock);
1106        for (i = 0; i < gid_tbl_len; ++i) {
1107                if (!device->query_gid)
1108                        continue;
1109                ret = device->query_gid(device, port, i, &gid);
1110                if (ret) {
1111                        pr_warn("query_gid failed (%d) for %s (index %d)\n",
1112                                ret, device->name, i);
1113                        goto err;
1114                }
1115                gid_attr.index = i;
1116                add_modify_gid(table, &gid, &gid_attr);
1117        }
1118err:
1119        mutex_unlock(&table->lock);
1120        return ret;
1121}
1122
1123static void ib_cache_update(struct ib_device *device,
1124                            u8                port,
1125                            bool              enforce_security)
1126{
1127        struct ib_port_attr       *tprops = NULL;
1128        struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
1129        int                        i;
1130        int                        ret;
1131        struct ib_gid_table       *table;
1132
1133        if (!rdma_is_port_valid(device, port))
1134                return;
1135
1136        table = rdma_gid_table(device, port);
1137
1138        tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
1139        if (!tprops)
1140                return;
1141
1142        ret = ib_query_port(device, port, tprops);
1143        if (ret) {
1144                pr_warn("ib_query_port failed (%d) for %s\n",
1145                        ret, device->name);
1146                goto err;
1147        }
1148
1149        if (!rdma_protocol_roce(device, port)) {
1150                ret = config_non_roce_gid_cache(device, port,
1151                                                tprops->gid_tbl_len);
1152                if (ret)
1153                        goto err;
1154        }
1155
1156        pkey_cache = kmalloc(struct_size(pkey_cache, table,
1157                                         tprops->pkey_tbl_len),
1158                             GFP_KERNEL);
1159        if (!pkey_cache)
1160                goto err;
1161
1162        pkey_cache->table_len = tprops->pkey_tbl_len;
1163
1164        for (i = 0; i < pkey_cache->table_len; ++i) {
1165                ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
1166                if (ret) {
1167                        pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n",
1168                                ret, device->name, i);
1169                        goto err;
1170                }
1171        }
1172
1173        write_lock_irq(&device->cache.lock);
1174
1175        old_pkey_cache = device->cache.ports[port -
1176                rdma_start_port(device)].pkey;
1177
1178        device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache;
1179        device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc;
1180        device->cache.ports[port - rdma_start_port(device)].port_state =
1181                tprops->state;
1182
1183        device->cache.ports[port - rdma_start_port(device)].subnet_prefix =
1184                                                        tprops->subnet_prefix;
1185        write_unlock_irq(&device->cache.lock);
1186
1187        if (enforce_security)
1188                ib_security_cache_change(device,
1189                                         port,
1190                                         tprops->subnet_prefix);
1191
1192        kfree(old_pkey_cache);
1193        kfree(tprops);
1194        return;
1195
1196err:
1197        kfree(pkey_cache);
1198        kfree(tprops);
1199}
1200
1201static void ib_cache_task(struct work_struct *_work)
1202{
1203        struct ib_update_work *work =
1204                container_of(_work, struct ib_update_work, work);
1205
1206        ib_cache_update(work->device,
1207                        work->port_num,
1208                        work->enforce_security);
1209        kfree(work);
1210}
1211
1212static void ib_cache_event(struct ib_event_handler *handler,
1213                           struct ib_event *event)
1214{
1215        struct ib_update_work *work;
1216
1217        if (event->event == IB_EVENT_PORT_ERR    ||
1218            event->event == IB_EVENT_PORT_ACTIVE ||
1219            event->event == IB_EVENT_LID_CHANGE  ||
1220            event->event == IB_EVENT_PKEY_CHANGE ||
1221            event->event == IB_EVENT_SM_CHANGE   ||
1222            event->event == IB_EVENT_CLIENT_REREGISTER ||
1223            event->event == IB_EVENT_GID_CHANGE) {
1224                work = kmalloc(sizeof *work, GFP_ATOMIC);
1225                if (work) {
1226                        INIT_WORK(&work->work, ib_cache_task);
1227                        work->device   = event->device;
1228                        work->port_num = event->element.port_num;
1229                        if (event->event == IB_EVENT_PKEY_CHANGE ||
1230                            event->event == IB_EVENT_GID_CHANGE)
1231                                work->enforce_security = true;
1232                        else
1233                                work->enforce_security = false;
1234
1235                        queue_work(ib_wq, &work->work);
1236                }
1237        }
1238}
1239
1240int ib_cache_setup_one(struct ib_device *device)
1241{
1242        int p;
1243        int err;
1244
1245        rwlock_init(&device->cache.lock);
1246
1247        device->cache.ports =
1248                kcalloc(rdma_end_port(device) - rdma_start_port(device) + 1,
1249                        sizeof(*device->cache.ports),
1250                        GFP_KERNEL);
1251        if (!device->cache.ports)
1252                return -ENOMEM;
1253
1254        err = gid_table_setup_one(device);
1255        if (err) {
1256                kfree(device->cache.ports);
1257                device->cache.ports = NULL;
1258                return err;
1259        }
1260
1261        for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
1262                ib_cache_update(device, p + rdma_start_port(device), true);
1263
1264        INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
1265                              device, ib_cache_event);
1266        ib_register_event_handler(&device->cache.event_handler);
1267        return 0;
1268}
1269
1270void ib_cache_release_one(struct ib_device *device)
1271{
1272        int p;
1273
1274        /*
1275         * The release function frees all the cache elements.
1276         * This function should be called as part of freeing
1277         * all the device's resources when the cache could no
1278         * longer be accessed.
1279         */
1280        for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
1281                kfree(device->cache.ports[p].pkey);
1282
1283        gid_table_release_one(device);
1284        kfree(device->cache.ports);
1285}
1286
1287void ib_cache_cleanup_one(struct ib_device *device)
1288{
1289        /* The cleanup function unregisters the event handler,
1290         * waits for all in-progress workqueue elements and cleans
1291         * up the GID cache. This function should be called after
1292         * the device was removed from the devices list and all
1293         * clients were removed, so the cache exists but is
1294         * non-functional and shouldn't be updated anymore.
1295         */
1296        ib_unregister_event_handler(&device->cache.event_handler);
1297        flush_workqueue(ib_wq);
1298        gid_table_cleanup_one(device);
1299}
1300