linux/drivers/infiniband/core/cache.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
   3 * Copyright (c) 2005 Intel Corporation. All rights reserved.
   4 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
   5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
   6 *
   7 * This software is available to you under a choice of one of two
   8 * licenses.  You may choose to be licensed under the terms of the GNU
   9 * General Public License (GPL) Version 2, available from the file
  10 * COPYING in the main directory of this source tree, or the
  11 * OpenIB.org BSD license below:
  12 *
  13 *     Redistribution and use in source and binary forms, with or
  14 *     without modification, are permitted provided that the following
  15 *     conditions are met:
  16 *
  17 *      - Redistributions of source code must retain the above
  18 *        copyright notice, this list of conditions and the following
  19 *        disclaimer.
  20 *
  21 *      - Redistributions in binary form must reproduce the above
  22 *        copyright notice, this list of conditions and the following
  23 *        disclaimer in the documentation and/or other materials
  24 *        provided with the distribution.
  25 *
  26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33 * SOFTWARE.
  34 */
  35
  36#include <linux/module.h>
  37#include <linux/errno.h>
  38#include <linux/slab.h>
  39#include <linux/workqueue.h>
  40#include <linux/netdevice.h>
  41#include <net/addrconf.h>
  42
  43#include <rdma/ib_cache.h>
  44
  45#include "core_priv.h"
  46
  47struct ib_pkey_cache {
  48        int             table_len;
  49        u16             table[0];
  50};
  51
  52struct ib_update_work {
  53        struct work_struct work;
  54        struct ib_device  *device;
  55        u8                 port_num;
  56        bool               enforce_security;
  57};
  58
  59union ib_gid zgid;
  60EXPORT_SYMBOL(zgid);
  61
  62static const struct ib_gid_attr zattr;
  63
  64enum gid_attr_find_mask {
  65        GID_ATTR_FIND_MASK_GID          = 1UL << 0,
  66        GID_ATTR_FIND_MASK_NETDEV       = 1UL << 1,
  67        GID_ATTR_FIND_MASK_DEFAULT      = 1UL << 2,
  68        GID_ATTR_FIND_MASK_GID_TYPE     = 1UL << 3,
  69};
  70
  71enum gid_table_entry_props {
  72        GID_TABLE_ENTRY_INVALID         = 1UL << 0,
  73        GID_TABLE_ENTRY_DEFAULT         = 1UL << 1,
  74};
  75
  76enum gid_table_write_action {
  77        GID_TABLE_WRITE_ACTION_ADD,
  78        GID_TABLE_WRITE_ACTION_DEL,
  79        /* MODIFY only updates the GID table. Currently only used by
  80         * ib_cache_update.
  81         */
  82        GID_TABLE_WRITE_ACTION_MODIFY
  83};
  84
  85struct ib_gid_table_entry {
  86        unsigned long       props;
  87        union ib_gid        gid;
  88        struct ib_gid_attr  attr;
  89        void               *context;
  90};
  91
  92struct ib_gid_table {
  93        int                  sz;
  94        /* In RoCE, adding a GID to the table requires:
  95         * (a) Find if this GID is already exists.
  96         * (b) Find a free space.
  97         * (c) Write the new GID
  98         *
  99         * Delete requires different set of operations:
 100         * (a) Find the GID
 101         * (b) Delete it.
 102         *
 103         * Add/delete should be carried out atomically.
 104         * This is done by locking this mutex from multiple
 105         * writers. We don't need this lock for IB, as the MAD
 106         * layer replaces all entries. All data_vec entries
 107         * are locked by this lock.
 108         **/
 109        struct mutex         lock;
 110        /* This lock protects the table entries from being
 111         * read and written simultaneously.
 112         */
 113        rwlock_t             rwlock;
 114        struct ib_gid_table_entry *data_vec;
 115};
 116
 117static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
 118{
 119        if (rdma_cap_roce_gid_table(ib_dev, port)) {
 120                struct ib_event event;
 121
 122                event.device            = ib_dev;
 123                event.element.port_num  = port;
 124                event.event             = IB_EVENT_GID_CHANGE;
 125
 126                ib_dispatch_event(&event);
 127        }
 128}
 129
 130static const char * const gid_type_str[] = {
 131        [IB_GID_TYPE_IB]        = "IB/RoCE v1",
 132        [IB_GID_TYPE_ROCE_UDP_ENCAP]    = "RoCE v2",
 133};
 134
 135const char *ib_cache_gid_type_str(enum ib_gid_type gid_type)
 136{
 137        if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type])
 138                return gid_type_str[gid_type];
 139
 140        return "Invalid GID type";
 141}
 142EXPORT_SYMBOL(ib_cache_gid_type_str);
 143
 144int ib_cache_gid_parse_type_str(const char *buf)
 145{
 146        unsigned int i;
 147        size_t len;
 148        int err = -EINVAL;
 149
 150        len = strlen(buf);
 151        if (len == 0)
 152                return -EINVAL;
 153
 154        if (buf[len - 1] == '\n')
 155                len--;
 156
 157        for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i)
 158                if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) &&
 159                    len == strlen(gid_type_str[i])) {
 160                        err = i;
 161                        break;
 162                }
 163
 164        return err;
 165}
 166EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
 167
 168/* This function expects that rwlock will be write locked in all
 169 * scenarios and that lock will be locked in sleep-able (RoCE)
 170 * scenarios.
 171 */
 172static int write_gid(struct ib_device *ib_dev, u8 port,
 173                     struct ib_gid_table *table, int ix,
 174                     const union ib_gid *gid,
 175                     const struct ib_gid_attr *attr,
 176                     enum gid_table_write_action action,
 177                     bool  default_gid)
 178        __releases(&table->rwlock) __acquires(&table->rwlock)
 179{
 180        int ret = 0;
 181        struct net_device *old_net_dev;
 182        enum ib_gid_type old_gid_type;
 183
 184        /* in rdma_cap_roce_gid_table, this funciton should be protected by a
 185         * sleep-able lock.
 186         */
 187
 188        if (rdma_cap_roce_gid_table(ib_dev, port)) {
 189                table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
 190                write_unlock_irq(&table->rwlock);
 191                /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by
 192                 * RoCE providers and thus only updates the cache.
 193                 */
 194                if (action == GID_TABLE_WRITE_ACTION_ADD)
 195                        ret = ib_dev->add_gid(ib_dev, port, ix, gid, attr,
 196                                              &table->data_vec[ix].context);
 197                else if (action == GID_TABLE_WRITE_ACTION_DEL)
 198                        ret = ib_dev->del_gid(ib_dev, port, ix,
 199                                              &table->data_vec[ix].context);
 200                write_lock_irq(&table->rwlock);
 201        }
 202
 203        old_net_dev = table->data_vec[ix].attr.ndev;
 204        old_gid_type = table->data_vec[ix].attr.gid_type;
 205        if (old_net_dev && old_net_dev != attr->ndev)
 206                dev_put(old_net_dev);
 207        /* if modify_gid failed, just delete the old gid */
 208        if (ret || action == GID_TABLE_WRITE_ACTION_DEL) {
 209                gid = &zgid;
 210                attr = &zattr;
 211                table->data_vec[ix].context = NULL;
 212        }
 213
 214        memcpy(&table->data_vec[ix].gid, gid, sizeof(*gid));
 215        memcpy(&table->data_vec[ix].attr, attr, sizeof(*attr));
 216        if (default_gid) {
 217                table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT;
 218                if (action == GID_TABLE_WRITE_ACTION_DEL)
 219                        table->data_vec[ix].attr.gid_type = old_gid_type;
 220        }
 221        if (table->data_vec[ix].attr.ndev &&
 222            table->data_vec[ix].attr.ndev != old_net_dev)
 223                dev_hold(table->data_vec[ix].attr.ndev);
 224
 225        table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID;
 226
 227        return ret;
 228}
 229
 230static int add_gid(struct ib_device *ib_dev, u8 port,
 231                   struct ib_gid_table *table, int ix,
 232                   const union ib_gid *gid,
 233                   const struct ib_gid_attr *attr,
 234                   bool  default_gid) {
 235        return write_gid(ib_dev, port, table, ix, gid, attr,
 236                         GID_TABLE_WRITE_ACTION_ADD, default_gid);
 237}
 238
 239static int modify_gid(struct ib_device *ib_dev, u8 port,
 240                      struct ib_gid_table *table, int ix,
 241                      const union ib_gid *gid,
 242                      const struct ib_gid_attr *attr,
 243                      bool  default_gid) {
 244        return write_gid(ib_dev, port, table, ix, gid, attr,
 245                         GID_TABLE_WRITE_ACTION_MODIFY, default_gid);
 246}
 247
 248static int del_gid(struct ib_device *ib_dev, u8 port,
 249                   struct ib_gid_table *table, int ix,
 250                   bool  default_gid) {
 251        return write_gid(ib_dev, port, table, ix, &zgid, &zattr,
 252                         GID_TABLE_WRITE_ACTION_DEL, default_gid);
 253}
 254
 255/* rwlock should be read locked */
 256static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
 257                    const struct ib_gid_attr *val, bool default_gid,
 258                    unsigned long mask, int *pempty)
 259{
 260        int i = 0;
 261        int found = -1;
 262        int empty = pempty ? -1 : 0;
 263
 264        while (i < table->sz && (found < 0 || empty < 0)) {
 265                struct ib_gid_table_entry *data = &table->data_vec[i];
 266                struct ib_gid_attr *attr = &data->attr;
 267                int curr_index = i;
 268
 269                i++;
 270
 271                if (data->props & GID_TABLE_ENTRY_INVALID)
 272                        continue;
 273
 274                if (empty < 0)
 275                        if (!memcmp(&data->gid, &zgid, sizeof(*gid)) &&
 276                            !memcmp(attr, &zattr, sizeof(*attr)) &&
 277                            !data->props)
 278                                empty = curr_index;
 279
 280                if (found >= 0)
 281                        continue;
 282
 283                if (mask & GID_ATTR_FIND_MASK_GID_TYPE &&
 284                    attr->gid_type != val->gid_type)
 285                        continue;
 286
 287                if (mask & GID_ATTR_FIND_MASK_GID &&
 288                    memcmp(gid, &data->gid, sizeof(*gid)))
 289                        continue;
 290
 291                if (mask & GID_ATTR_FIND_MASK_NETDEV &&
 292                    attr->ndev != val->ndev)
 293                        continue;
 294
 295                if (mask & GID_ATTR_FIND_MASK_DEFAULT &&
 296                    !!(data->props & GID_TABLE_ENTRY_DEFAULT) !=
 297                    default_gid)
 298                        continue;
 299
 300                found = curr_index;
 301        }
 302
 303        if (pempty)
 304                *pempty = empty;
 305
 306        return found;
 307}
 308
 309static void make_default_gid(struct  net_device *dev, union ib_gid *gid)
 310{
 311        gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
 312        addrconf_ifid_eui48(&gid->raw[8], dev);
 313}
 314
 315int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
 316                     union ib_gid *gid, struct ib_gid_attr *attr)
 317{
 318        struct ib_gid_table *table;
 319        int ix;
 320        int ret = 0;
 321        struct net_device *idev;
 322        int empty;
 323
 324        table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
 325
 326        if (!memcmp(gid, &zgid, sizeof(*gid)))
 327                return -EINVAL;
 328
 329        if (ib_dev->get_netdev) {
 330                idev = ib_dev->get_netdev(ib_dev, port);
 331                if (idev && attr->ndev != idev) {
 332                        union ib_gid default_gid;
 333
 334                        /* Adding default GIDs in not permitted */
 335                        make_default_gid(idev, &default_gid);
 336                        if (!memcmp(gid, &default_gid, sizeof(*gid))) {
 337                                dev_put(idev);
 338                                return -EPERM;
 339                        }
 340                }
 341                if (idev)
 342                        dev_put(idev);
 343        }
 344
 345        mutex_lock(&table->lock);
 346        write_lock_irq(&table->rwlock);
 347
 348        ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID |
 349                      GID_ATTR_FIND_MASK_GID_TYPE |
 350                      GID_ATTR_FIND_MASK_NETDEV, &empty);
 351        if (ix >= 0)
 352                goto out_unlock;
 353
 354        if (empty < 0) {
 355                ret = -ENOSPC;
 356                goto out_unlock;
 357        }
 358
 359        ret = add_gid(ib_dev, port, table, empty, gid, attr, false);
 360        if (!ret)
 361                dispatch_gid_change_event(ib_dev, port);
 362
 363out_unlock:
 364        write_unlock_irq(&table->rwlock);
 365        mutex_unlock(&table->lock);
 366        return ret;
 367}
 368
 369int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
 370                     union ib_gid *gid, struct ib_gid_attr *attr)
 371{
 372        struct ib_gid_table *table;
 373        int ix;
 374
 375        table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
 376
 377        mutex_lock(&table->lock);
 378        write_lock_irq(&table->rwlock);
 379
 380        ix = find_gid(table, gid, attr, false,
 381                      GID_ATTR_FIND_MASK_GID      |
 382                      GID_ATTR_FIND_MASK_GID_TYPE |
 383                      GID_ATTR_FIND_MASK_NETDEV   |
 384                      GID_ATTR_FIND_MASK_DEFAULT,
 385                      NULL);
 386        if (ix < 0)
 387                goto out_unlock;
 388
 389        if (!del_gid(ib_dev, port, table, ix, false))
 390                dispatch_gid_change_event(ib_dev, port);
 391
 392out_unlock:
 393        write_unlock_irq(&table->rwlock);
 394        mutex_unlock(&table->lock);
 395        return 0;
 396}
 397
 398int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
 399                                     struct net_device *ndev)
 400{
 401        struct ib_gid_table *table;
 402        int ix;
 403        bool deleted = false;
 404
 405        table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
 406
 407        mutex_lock(&table->lock);
 408        write_lock_irq(&table->rwlock);
 409
 410        for (ix = 0; ix < table->sz; ix++)
 411                if (table->data_vec[ix].attr.ndev == ndev)
 412                        if (!del_gid(ib_dev, port, table, ix,
 413                                     !!(table->data_vec[ix].props &
 414                                        GID_TABLE_ENTRY_DEFAULT)))
 415                                deleted = true;
 416
 417        write_unlock_irq(&table->rwlock);
 418        mutex_unlock(&table->lock);
 419
 420        if (deleted)
 421                dispatch_gid_change_event(ib_dev, port);
 422
 423        return 0;
 424}
 425
 426static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
 427                              union ib_gid *gid, struct ib_gid_attr *attr)
 428{
 429        struct ib_gid_table *table;
 430
 431        table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
 432
 433        if (index < 0 || index >= table->sz)
 434                return -EINVAL;
 435
 436        if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID)
 437                return -EAGAIN;
 438
 439        memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
 440        if (attr) {
 441                memcpy(attr, &table->data_vec[index].attr, sizeof(*attr));
 442                if (attr->ndev)
 443                        dev_hold(attr->ndev);
 444        }
 445
 446        return 0;
 447}
 448
 449static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
 450                                    const union ib_gid *gid,
 451                                    const struct ib_gid_attr *val,
 452                                    unsigned long mask,
 453                                    u8 *port, u16 *index)
 454{
 455        struct ib_gid_table *table;
 456        u8 p;
 457        int local_index;
 458        unsigned long flags;
 459
 460        for (p = 0; p < ib_dev->phys_port_cnt; p++) {
 461                table = ib_dev->cache.ports[p].gid;
 462                read_lock_irqsave(&table->rwlock, flags);
 463                local_index = find_gid(table, gid, val, false, mask, NULL);
 464                if (local_index >= 0) {
 465                        if (index)
 466                                *index = local_index;
 467                        if (port)
 468                                *port = p + rdma_start_port(ib_dev);
 469                        read_unlock_irqrestore(&table->rwlock, flags);
 470                        return 0;
 471                }
 472                read_unlock_irqrestore(&table->rwlock, flags);
 473        }
 474
 475        return -ENOENT;
 476}
 477
 478static int ib_cache_gid_find(struct ib_device *ib_dev,
 479                             const union ib_gid *gid,
 480                             enum ib_gid_type gid_type,
 481                             struct net_device *ndev, u8 *port,
 482                             u16 *index)
 483{
 484        unsigned long mask = GID_ATTR_FIND_MASK_GID |
 485                             GID_ATTR_FIND_MASK_GID_TYPE;
 486        struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
 487
 488        if (ndev)
 489                mask |= GID_ATTR_FIND_MASK_NETDEV;
 490
 491        return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val,
 492                                        mask, port, index);
 493}
 494
 495int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
 496                               const union ib_gid *gid,
 497                               enum ib_gid_type gid_type,
 498                               u8 port, struct net_device *ndev,
 499                               u16 *index)
 500{
 501        int local_index;
 502        struct ib_gid_table *table;
 503        unsigned long mask = GID_ATTR_FIND_MASK_GID |
 504                             GID_ATTR_FIND_MASK_GID_TYPE;
 505        struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
 506        unsigned long flags;
 507
 508        if (!rdma_is_port_valid(ib_dev, port))
 509                return -ENOENT;
 510
 511        table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
 512
 513        if (ndev)
 514                mask |= GID_ATTR_FIND_MASK_NETDEV;
 515
 516        read_lock_irqsave(&table->rwlock, flags);
 517        local_index = find_gid(table, gid, &val, false, mask, NULL);
 518        if (local_index >= 0) {
 519                if (index)
 520                        *index = local_index;
 521                read_unlock_irqrestore(&table->rwlock, flags);
 522                return 0;
 523        }
 524
 525        read_unlock_irqrestore(&table->rwlock, flags);
 526        return -ENOENT;
 527}
 528EXPORT_SYMBOL(ib_find_cached_gid_by_port);
 529
 530/**
 531 * ib_find_gid_by_filter - Returns the GID table index where a specified
 532 * GID value occurs
 533 * @device: The device to query.
 534 * @gid: The GID value to search for.
 535 * @port_num: The port number of the device where the GID value could be
 536 *   searched.
 537 * @filter: The filter function is executed on any matching GID in the table.
 538 *   If the filter function returns true, the corresponding index is returned,
 539 *   otherwise, we continue searching the GID table. It's guaranteed that
 540 *   while filter is executed, ndev field is valid and the structure won't
 541 *   change. filter is executed in an atomic context. filter must not be NULL.
 542 * @index: The index into the cached GID table where the GID was found.  This
 543 *   parameter may be NULL.
 544 *
 545 * ib_cache_gid_find_by_filter() searches for the specified GID value
 546 * of which the filter function returns true in the port's GID table.
 547 * This function is only supported on RoCE ports.
 548 *
 549 */
 550static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
 551                                       const union ib_gid *gid,
 552                                       u8 port,
 553                                       bool (*filter)(const union ib_gid *,
 554                                                      const struct ib_gid_attr *,
 555                                                      void *),
 556                                       void *context,
 557                                       u16 *index)
 558{
 559        struct ib_gid_table *table;
 560        unsigned int i;
 561        unsigned long flags;
 562        bool found = false;
 563
 564
 565        if (!rdma_is_port_valid(ib_dev, port) ||
 566            !rdma_protocol_roce(ib_dev, port))
 567                return -EPROTONOSUPPORT;
 568
 569        table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
 570
 571        read_lock_irqsave(&table->rwlock, flags);
 572        for (i = 0; i < table->sz; i++) {
 573                struct ib_gid_attr attr;
 574
 575                if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
 576                        goto next;
 577
 578                if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
 579                        goto next;
 580
 581                memcpy(&attr, &table->data_vec[i].attr, sizeof(attr));
 582
 583                if (filter(gid, &attr, context))
 584                        found = true;
 585
 586next:
 587                if (found)
 588                        break;
 589        }
 590        read_unlock_irqrestore(&table->rwlock, flags);
 591
 592        if (!found)
 593                return -ENOENT;
 594
 595        if (index)
 596                *index = i;
 597        return 0;
 598}
 599
 600static struct ib_gid_table *alloc_gid_table(int sz)
 601{
 602        struct ib_gid_table *table =
 603                kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
 604
 605        if (!table)
 606                return NULL;
 607
 608        table->data_vec = kcalloc(sz, sizeof(*table->data_vec), GFP_KERNEL);
 609        if (!table->data_vec)
 610                goto err_free_table;
 611
 612        mutex_init(&table->lock);
 613
 614        table->sz = sz;
 615        rwlock_init(&table->rwlock);
 616
 617        return table;
 618
 619err_free_table:
 620        kfree(table);
 621        return NULL;
 622}
 623
 624static void release_gid_table(struct ib_gid_table *table)
 625{
 626        if (table) {
 627                kfree(table->data_vec);
 628                kfree(table);
 629        }
 630}
 631
 632static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
 633                                   struct ib_gid_table *table)
 634{
 635        int i;
 636        bool deleted = false;
 637
 638        if (!table)
 639                return;
 640
 641        write_lock_irq(&table->rwlock);
 642        for (i = 0; i < table->sz; ++i) {
 643                if (memcmp(&table->data_vec[i].gid, &zgid,
 644                           sizeof(table->data_vec[i].gid)))
 645                        if (!del_gid(ib_dev, port, table, i,
 646                                     table->data_vec[i].props &
 647                                     GID_ATTR_FIND_MASK_DEFAULT))
 648                                deleted = true;
 649        }
 650        write_unlock_irq(&table->rwlock);
 651
 652        if (deleted)
 653                dispatch_gid_change_event(ib_dev, port);
 654}
 655
 656void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
 657                                  struct net_device *ndev,
 658                                  unsigned long gid_type_mask,
 659                                  enum ib_cache_gid_default_mode mode)
 660{
 661        union ib_gid gid;
 662        struct ib_gid_attr gid_attr;
 663        struct ib_gid_attr zattr_type = zattr;
 664        struct ib_gid_table *table;
 665        unsigned int gid_type;
 666
 667        table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
 668
 669        make_default_gid(ndev, &gid);
 670        memset(&gid_attr, 0, sizeof(gid_attr));
 671        gid_attr.ndev = ndev;
 672
 673        for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) {
 674                int ix;
 675                union ib_gid current_gid;
 676                struct ib_gid_attr current_gid_attr = {};
 677
 678                if (1UL << gid_type & ~gid_type_mask)
 679                        continue;
 680
 681                gid_attr.gid_type = gid_type;
 682
 683                mutex_lock(&table->lock);
 684                write_lock_irq(&table->rwlock);
 685                ix = find_gid(table, NULL, &gid_attr, true,
 686                              GID_ATTR_FIND_MASK_GID_TYPE |
 687                              GID_ATTR_FIND_MASK_DEFAULT,
 688                              NULL);
 689
 690                /* Coudn't find default GID location */
 691                if (WARN_ON(ix < 0))
 692                        goto release;
 693
 694                zattr_type.gid_type = gid_type;
 695
 696                if (!__ib_cache_gid_get(ib_dev, port, ix,
 697                                        &current_gid, &current_gid_attr) &&
 698                    mode == IB_CACHE_GID_DEFAULT_MODE_SET &&
 699                    !memcmp(&gid, &current_gid, sizeof(gid)) &&
 700                    !memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr)))
 701                        goto release;
 702
 703                if (memcmp(&current_gid, &zgid, sizeof(current_gid)) ||
 704                    memcmp(&current_gid_attr, &zattr_type,
 705                           sizeof(current_gid_attr))) {
 706                        if (del_gid(ib_dev, port, table, ix, true)) {
 707                                pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n",
 708                                        ix, gid.raw);
 709                                goto release;
 710                        } else {
 711                                dispatch_gid_change_event(ib_dev, port);
 712                        }
 713                }
 714
 715                if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
 716                        if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true))
 717                                pr_warn("ib_cache_gid: unable to add default gid %pI6\n",
 718                                        gid.raw);
 719                        else
 720                                dispatch_gid_change_event(ib_dev, port);
 721                }
 722
 723release:
 724                if (current_gid_attr.ndev)
 725                        dev_put(current_gid_attr.ndev);
 726                write_unlock_irq(&table->rwlock);
 727                mutex_unlock(&table->lock);
 728        }
 729}
 730
 731static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
 732                                     struct ib_gid_table *table)
 733{
 734        unsigned int i;
 735        unsigned long roce_gid_type_mask;
 736        unsigned int num_default_gids;
 737        unsigned int current_gid = 0;
 738
 739        roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
 740        num_default_gids = hweight_long(roce_gid_type_mask);
 741        for (i = 0; i < num_default_gids && i < table->sz; i++) {
 742                struct ib_gid_table_entry *entry =
 743                        &table->data_vec[i];
 744
 745                entry->props |= GID_TABLE_ENTRY_DEFAULT;
 746                current_gid = find_next_bit(&roce_gid_type_mask,
 747                                            BITS_PER_LONG,
 748                                            current_gid);
 749                entry->attr.gid_type = current_gid++;
 750        }
 751
 752        return 0;
 753}
 754
 755static int _gid_table_setup_one(struct ib_device *ib_dev)
 756{
 757        u8 port;
 758        struct ib_gid_table *table;
 759        int err = 0;
 760
 761        for (port = 0; port < ib_dev->phys_port_cnt; port++) {
 762                u8 rdma_port = port + rdma_start_port(ib_dev);
 763
 764                table =
 765                        alloc_gid_table(
 766                                ib_dev->port_immutable[rdma_port].gid_tbl_len);
 767                if (!table) {
 768                        err = -ENOMEM;
 769                        goto rollback_table_setup;
 770                }
 771
 772                err = gid_table_reserve_default(ib_dev,
 773                                                port + rdma_start_port(ib_dev),
 774                                                table);
 775                if (err)
 776                        goto rollback_table_setup;
 777                ib_dev->cache.ports[port].gid = table;
 778        }
 779
 780        return 0;
 781
 782rollback_table_setup:
 783        for (port = 0; port < ib_dev->phys_port_cnt; port++) {
 784                table = ib_dev->cache.ports[port].gid;
 785
 786                cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
 787                                       table);
 788                release_gid_table(table);
 789        }
 790
 791        return err;
 792}
 793
 794static void gid_table_release_one(struct ib_device *ib_dev)
 795{
 796        struct ib_gid_table *table;
 797        u8 port;
 798
 799        for (port = 0; port < ib_dev->phys_port_cnt; port++) {
 800                table = ib_dev->cache.ports[port].gid;
 801                release_gid_table(table);
 802                ib_dev->cache.ports[port].gid = NULL;
 803        }
 804}
 805
 806static void gid_table_cleanup_one(struct ib_device *ib_dev)
 807{
 808        struct ib_gid_table *table;
 809        u8 port;
 810
 811        for (port = 0; port < ib_dev->phys_port_cnt; port++) {
 812                table = ib_dev->cache.ports[port].gid;
 813                cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
 814                                       table);
 815        }
 816}
 817
 818static int gid_table_setup_one(struct ib_device *ib_dev)
 819{
 820        int err;
 821
 822        err = _gid_table_setup_one(ib_dev);
 823
 824        if (err)
 825                return err;
 826
 827        err = roce_rescan_device(ib_dev);
 828
 829        if (err) {
 830                gid_table_cleanup_one(ib_dev);
 831                gid_table_release_one(ib_dev);
 832        }
 833
 834        return err;
 835}
 836
 837int ib_get_cached_gid(struct ib_device *device,
 838                      u8                port_num,
 839                      int               index,
 840                      union ib_gid     *gid,
 841                      struct ib_gid_attr *gid_attr)
 842{
 843        int res;
 844        unsigned long flags;
 845        struct ib_gid_table *table;
 846
 847        if (!rdma_is_port_valid(device, port_num))
 848                return -EINVAL;
 849
 850        table = device->cache.ports[port_num - rdma_start_port(device)].gid;
 851        read_lock_irqsave(&table->rwlock, flags);
 852        res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
 853        read_unlock_irqrestore(&table->rwlock, flags);
 854
 855        return res;
 856}
 857EXPORT_SYMBOL(ib_get_cached_gid);
 858
 859int ib_find_cached_gid(struct ib_device *device,
 860                       const union ib_gid *gid,
 861                       enum ib_gid_type gid_type,
 862                       struct net_device *ndev,
 863                       u8               *port_num,
 864                       u16              *index)
 865{
 866        return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index);
 867}
 868EXPORT_SYMBOL(ib_find_cached_gid);
 869
 870int ib_find_gid_by_filter(struct ib_device *device,
 871                          const union ib_gid *gid,
 872                          u8 port_num,
 873                          bool (*filter)(const union ib_gid *gid,
 874                                         const struct ib_gid_attr *,
 875                                         void *),
 876                          void *context, u16 *index)
 877{
 878        /* Only RoCE GID table supports filter function */
 879        if (!rdma_cap_roce_gid_table(device, port_num) && filter)
 880                return -EPROTONOSUPPORT;
 881
 882        return ib_cache_gid_find_by_filter(device, gid,
 883                                           port_num, filter,
 884                                           context, index);
 885}
 886EXPORT_SYMBOL(ib_find_gid_by_filter);
 887
 888int ib_get_cached_pkey(struct ib_device *device,
 889                       u8                port_num,
 890                       int               index,
 891                       u16              *pkey)
 892{
 893        struct ib_pkey_cache *cache;
 894        unsigned long flags;
 895        int ret = 0;
 896
 897        if (!rdma_is_port_valid(device, port_num))
 898                return -EINVAL;
 899
 900        read_lock_irqsave(&device->cache.lock, flags);
 901
 902        cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
 903
 904        if (index < 0 || index >= cache->table_len)
 905                ret = -EINVAL;
 906        else
 907                *pkey = cache->table[index];
 908
 909        read_unlock_irqrestore(&device->cache.lock, flags);
 910
 911        return ret;
 912}
 913EXPORT_SYMBOL(ib_get_cached_pkey);
 914
 915int ib_get_cached_subnet_prefix(struct ib_device *device,
 916                                u8                port_num,
 917                                u64              *sn_pfx)
 918{
 919        unsigned long flags;
 920        int p;
 921
 922        if (port_num < rdma_start_port(device) ||
 923            port_num > rdma_end_port(device))
 924                return -EINVAL;
 925
 926        p = port_num - rdma_start_port(device);
 927        read_lock_irqsave(&device->cache.lock, flags);
 928        *sn_pfx = device->cache.ports[p].subnet_prefix;
 929        read_unlock_irqrestore(&device->cache.lock, flags);
 930
 931        return 0;
 932}
 933EXPORT_SYMBOL(ib_get_cached_subnet_prefix);
 934
 935int ib_find_cached_pkey(struct ib_device *device,
 936                        u8                port_num,
 937                        u16               pkey,
 938                        u16              *index)
 939{
 940        struct ib_pkey_cache *cache;
 941        unsigned long flags;
 942        int i;
 943        int ret = -ENOENT;
 944        int partial_ix = -1;
 945
 946        if (!rdma_is_port_valid(device, port_num))
 947                return -EINVAL;
 948
 949        read_lock_irqsave(&device->cache.lock, flags);
 950
 951        cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
 952
 953        *index = -1;
 954
 955        for (i = 0; i < cache->table_len; ++i)
 956                if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
 957                        if (cache->table[i] & 0x8000) {
 958                                *index = i;
 959                                ret = 0;
 960                                break;
 961                        } else
 962                                partial_ix = i;
 963                }
 964
 965        if (ret && partial_ix >= 0) {
 966                *index = partial_ix;
 967                ret = 0;
 968        }
 969
 970        read_unlock_irqrestore(&device->cache.lock, flags);
 971
 972        return ret;
 973}
 974EXPORT_SYMBOL(ib_find_cached_pkey);
 975
 976int ib_find_exact_cached_pkey(struct ib_device *device,
 977                              u8                port_num,
 978                              u16               pkey,
 979                              u16              *index)
 980{
 981        struct ib_pkey_cache *cache;
 982        unsigned long flags;
 983        int i;
 984        int ret = -ENOENT;
 985
 986        if (!rdma_is_port_valid(device, port_num))
 987                return -EINVAL;
 988
 989        read_lock_irqsave(&device->cache.lock, flags);
 990
 991        cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
 992
 993        *index = -1;
 994
 995        for (i = 0; i < cache->table_len; ++i)
 996                if (cache->table[i] == pkey) {
 997                        *index = i;
 998                        ret = 0;
 999                        break;
1000                }
1001
1002        read_unlock_irqrestore(&device->cache.lock, flags);
1003
1004        return ret;
1005}
1006EXPORT_SYMBOL(ib_find_exact_cached_pkey);
1007
1008int ib_get_cached_lmc(struct ib_device *device,
1009                      u8                port_num,
1010                      u8                *lmc)
1011{
1012        unsigned long flags;
1013        int ret = 0;
1014
1015        if (!rdma_is_port_valid(device, port_num))
1016                return -EINVAL;
1017
1018        read_lock_irqsave(&device->cache.lock, flags);
1019        *lmc = device->cache.ports[port_num - rdma_start_port(device)].lmc;
1020        read_unlock_irqrestore(&device->cache.lock, flags);
1021
1022        return ret;
1023}
1024EXPORT_SYMBOL(ib_get_cached_lmc);
1025
1026int ib_get_cached_port_state(struct ib_device   *device,
1027                             u8                  port_num,
1028                             enum ib_port_state *port_state)
1029{
1030        unsigned long flags;
1031        int ret = 0;
1032
1033        if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
1034                return -EINVAL;
1035
1036        read_lock_irqsave(&device->cache.lock, flags);
1037        *port_state = device->cache.ports[port_num
1038                - rdma_start_port(device)].port_state;
1039        read_unlock_irqrestore(&device->cache.lock, flags);
1040
1041        return ret;
1042}
1043EXPORT_SYMBOL(ib_get_cached_port_state);
1044
1045static void ib_cache_update(struct ib_device *device,
1046                            u8                port,
1047                            bool              enforce_security)
1048{
1049        struct ib_port_attr       *tprops = NULL;
1050        struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
1051        struct ib_gid_cache {
1052                int             table_len;
1053                union ib_gid    table[0];
1054        }                         *gid_cache = NULL;
1055        int                        i;
1056        int                        ret;
1057        struct ib_gid_table       *table;
1058        bool                       use_roce_gid_table =
1059                                        rdma_cap_roce_gid_table(device, port);
1060
1061        if (!rdma_is_port_valid(device, port))
1062                return;
1063
1064        table = device->cache.ports[port - rdma_start_port(device)].gid;
1065
1066        tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
1067        if (!tprops)
1068                return;
1069
1070        ret = ib_query_port(device, port, tprops);
1071        if (ret) {
1072                pr_warn("ib_query_port failed (%d) for %s\n",
1073                        ret, device->name);
1074                goto err;
1075        }
1076
1077        pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len *
1078                             sizeof *pkey_cache->table, GFP_KERNEL);
1079        if (!pkey_cache)
1080                goto err;
1081
1082        pkey_cache->table_len = tprops->pkey_tbl_len;
1083
1084        if (!use_roce_gid_table) {
1085                gid_cache = kmalloc(sizeof(*gid_cache) + tprops->gid_tbl_len *
1086                            sizeof(*gid_cache->table), GFP_KERNEL);
1087                if (!gid_cache)
1088                        goto err;
1089
1090                gid_cache->table_len = tprops->gid_tbl_len;
1091        }
1092
1093        for (i = 0; i < pkey_cache->table_len; ++i) {
1094                ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
1095                if (ret) {
1096                        pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n",
1097                                ret, device->name, i);
1098                        goto err;
1099                }
1100        }
1101
1102        if (!use_roce_gid_table) {
1103                for (i = 0;  i < gid_cache->table_len; ++i) {
1104                        ret = ib_query_gid(device, port, i,
1105                                           gid_cache->table + i, NULL);
1106                        if (ret) {
1107                                pr_warn("ib_query_gid failed (%d) for %s (index %d)\n",
1108                                        ret, device->name, i);
1109                                goto err;
1110                        }
1111                }
1112        }
1113
1114        write_lock_irq(&device->cache.lock);
1115
1116        old_pkey_cache = device->cache.ports[port -
1117                rdma_start_port(device)].pkey;
1118
1119        device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache;
1120        if (!use_roce_gid_table) {
1121                write_lock(&table->rwlock);
1122                for (i = 0; i < gid_cache->table_len; i++) {
1123                        modify_gid(device, port, table, i, gid_cache->table + i,
1124                                   &zattr, false);
1125                }
1126                write_unlock(&table->rwlock);
1127        }
1128
1129        device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc;
1130        device->cache.ports[port - rdma_start_port(device)].port_state =
1131                tprops->state;
1132
1133        device->cache.ports[port - rdma_start_port(device)].subnet_prefix =
1134                                                        tprops->subnet_prefix;
1135        write_unlock_irq(&device->cache.lock);
1136
1137        if (enforce_security)
1138                ib_security_cache_change(device,
1139                                         port,
1140                                         tprops->subnet_prefix);
1141
1142        kfree(gid_cache);
1143        kfree(old_pkey_cache);
1144        kfree(tprops);
1145        return;
1146
1147err:
1148        kfree(pkey_cache);
1149        kfree(gid_cache);
1150        kfree(tprops);
1151}
1152
1153static void ib_cache_task(struct work_struct *_work)
1154{
1155        struct ib_update_work *work =
1156                container_of(_work, struct ib_update_work, work);
1157
1158        ib_cache_update(work->device,
1159                        work->port_num,
1160                        work->enforce_security);
1161        kfree(work);
1162}
1163
1164static void ib_cache_event(struct ib_event_handler *handler,
1165                           struct ib_event *event)
1166{
1167        struct ib_update_work *work;
1168
1169        if (event->event == IB_EVENT_PORT_ERR    ||
1170            event->event == IB_EVENT_PORT_ACTIVE ||
1171            event->event == IB_EVENT_LID_CHANGE  ||
1172            event->event == IB_EVENT_PKEY_CHANGE ||
1173            event->event == IB_EVENT_SM_CHANGE   ||
1174            event->event == IB_EVENT_CLIENT_REREGISTER ||
1175            event->event == IB_EVENT_GID_CHANGE) {
1176                work = kmalloc(sizeof *work, GFP_ATOMIC);
1177                if (work) {
1178                        INIT_WORK(&work->work, ib_cache_task);
1179                        work->device   = event->device;
1180                        work->port_num = event->element.port_num;
1181                        if (event->event == IB_EVENT_PKEY_CHANGE ||
1182                            event->event == IB_EVENT_GID_CHANGE)
1183                                work->enforce_security = true;
1184                        else
1185                                work->enforce_security = false;
1186
1187                        queue_work(ib_wq, &work->work);
1188                }
1189        }
1190}
1191
1192int ib_cache_setup_one(struct ib_device *device)
1193{
1194        int p;
1195        int err;
1196
1197        rwlock_init(&device->cache.lock);
1198
1199        device->cache.ports =
1200                kzalloc(sizeof(*device->cache.ports) *
1201                        (rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL);
1202        if (!device->cache.ports)
1203                return -ENOMEM;
1204
1205        err = gid_table_setup_one(device);
1206        if (err) {
1207                kfree(device->cache.ports);
1208                device->cache.ports = NULL;
1209                return err;
1210        }
1211
1212        for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
1213                ib_cache_update(device, p + rdma_start_port(device), true);
1214
1215        INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
1216                              device, ib_cache_event);
1217        ib_register_event_handler(&device->cache.event_handler);
1218        return 0;
1219}
1220
1221void ib_cache_release_one(struct ib_device *device)
1222{
1223        int p;
1224
1225        /*
1226         * The release function frees all the cache elements.
1227         * This function should be called as part of freeing
1228         * all the device's resources when the cache could no
1229         * longer be accessed.
1230         */
1231        for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
1232                kfree(device->cache.ports[p].pkey);
1233
1234        gid_table_release_one(device);
1235        kfree(device->cache.ports);
1236}
1237
1238void ib_cache_cleanup_one(struct ib_device *device)
1239{
1240        /* The cleanup function unregisters the event handler,
1241         * waits for all in-progress workqueue elements and cleans
1242         * up the GID cache. This function should be called after
1243         * the device was removed from the devices list and all
1244         * clients were removed, so the cache exists but is
1245         * non-functional and shouldn't be updated anymore.
1246         */
1247        ib_unregister_event_handler(&device->cache.event_handler);
1248        flush_workqueue(ib_wq);
1249        gid_table_cleanup_one(device);
1250}
1251
1252void __init ib_cache_setup(void)
1253{
1254        roce_gid_mgmt_init();
1255}
1256
1257void __exit ib_cache_cleanup(void)
1258{
1259        roce_gid_mgmt_cleanup();
1260}
1261