linux/drivers/infiniband/core/cache.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
   3 * Copyright (c) 2005 Intel Corporation. All rights reserved.
   4 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
   5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
   6 *
   7 * This software is available to you under a choice of one of two
   8 * licenses.  You may choose to be licensed under the terms of the GNU
   9 * General Public License (GPL) Version 2, available from the file
  10 * COPYING in the main directory of this source tree, or the
  11 * OpenIB.org BSD license below:
  12 *
  13 *     Redistribution and use in source and binary forms, with or
  14 *     without modification, are permitted provided that the following
  15 *     conditions are met:
  16 *
  17 *      - Redistributions of source code must retain the above
  18 *        copyright notice, this list of conditions and the following
  19 *        disclaimer.
  20 *
  21 *      - Redistributions in binary form must reproduce the above
  22 *        copyright notice, this list of conditions and the following
  23 *        disclaimer in the documentation and/or other materials
  24 *        provided with the distribution.
  25 *
  26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33 * SOFTWARE.
  34 */
  35
  36#include <linux/module.h>
  37#include <linux/errno.h>
  38#include <linux/slab.h>
  39#include <linux/workqueue.h>
  40#include <linux/netdevice.h>
  41#include <net/addrconf.h>
  42
  43#include <rdma/ib_cache.h>
  44
  45#include "core_priv.h"
  46
  47struct ib_pkey_cache {
  48        int             table_len;
  49        u16             table[0];
  50};
  51
  52struct ib_update_work {
  53        struct work_struct work;
  54        struct ib_device  *device;
  55        u8                 port_num;
  56        bool               enforce_security;
  57};
  58
  59union ib_gid zgid;
  60EXPORT_SYMBOL(zgid);
  61
  62static const struct ib_gid_attr zattr;
  63
  64enum gid_attr_find_mask {
  65        GID_ATTR_FIND_MASK_GID          = 1UL << 0,
  66        GID_ATTR_FIND_MASK_NETDEV       = 1UL << 1,
  67        GID_ATTR_FIND_MASK_DEFAULT      = 1UL << 2,
  68        GID_ATTR_FIND_MASK_GID_TYPE     = 1UL << 3,
  69};
  70
  71enum gid_table_entry_props {
  72        GID_TABLE_ENTRY_INVALID         = 1UL << 0,
  73        GID_TABLE_ENTRY_DEFAULT         = 1UL << 1,
  74};
  75
  76enum gid_table_write_action {
  77        GID_TABLE_WRITE_ACTION_ADD,
  78        GID_TABLE_WRITE_ACTION_DEL,
  79        /* MODIFY only updates the GID table. Currently only used by
  80         * ib_cache_update.
  81         */
  82        GID_TABLE_WRITE_ACTION_MODIFY
  83};
  84
  85struct ib_gid_table_entry {
  86        unsigned long       props;
  87        union ib_gid        gid;
  88        struct ib_gid_attr  attr;
  89        void               *context;
  90};
  91
  92struct ib_gid_table {
  93        int                  sz;
  94        /* In RoCE, adding a GID to the table requires:
  95         * (a) Find if this GID is already exists.
  96         * (b) Find a free space.
  97         * (c) Write the new GID
  98         *
  99         * Delete requires different set of operations:
 100         * (a) Find the GID
 101         * (b) Delete it.
 102         *
 103         * Add/delete should be carried out atomically.
 104         * This is done by locking this mutex from multiple
 105         * writers. We don't need this lock for IB, as the MAD
 106         * layer replaces all entries. All data_vec entries
 107         * are locked by this lock.
 108         **/
 109        struct mutex         lock;
 110        /* This lock protects the table entries from being
 111         * read and written simultaneously.
 112         */
 113        rwlock_t             rwlock;
 114        struct ib_gid_table_entry *data_vec;
 115};
 116
 117static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
 118{
 119        if (rdma_cap_roce_gid_table(ib_dev, port)) {
 120                struct ib_event event;
 121
 122                event.device            = ib_dev;
 123                event.element.port_num  = port;
 124                event.event             = IB_EVENT_GID_CHANGE;
 125
 126                ib_dispatch_event(&event);
 127        }
 128}
 129
 130static const char * const gid_type_str[] = {
 131        [IB_GID_TYPE_IB]        = "IB/RoCE v1",
 132        [IB_GID_TYPE_ROCE_UDP_ENCAP]    = "RoCE v2",
 133};
 134
 135const char *ib_cache_gid_type_str(enum ib_gid_type gid_type)
 136{
 137        if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type])
 138                return gid_type_str[gid_type];
 139
 140        return "Invalid GID type";
 141}
 142EXPORT_SYMBOL(ib_cache_gid_type_str);
 143
 144int ib_cache_gid_parse_type_str(const char *buf)
 145{
 146        unsigned int i;
 147        size_t len;
 148        int err = -EINVAL;
 149
 150        len = strlen(buf);
 151        if (len == 0)
 152                return -EINVAL;
 153
 154        if (buf[len - 1] == '\n')
 155                len--;
 156
 157        for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i)
 158                if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) &&
 159                    len == strlen(gid_type_str[i])) {
 160                        err = i;
 161                        break;
 162                }
 163
 164        return err;
 165}
 166EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
 167
 168/* This function expects that rwlock will be write locked in all
 169 * scenarios and that lock will be locked in sleep-able (RoCE)
 170 * scenarios.
 171 */
 172static int write_gid(struct ib_device *ib_dev, u8 port,
 173                     struct ib_gid_table *table, int ix,
 174                     const union ib_gid *gid,
 175                     const struct ib_gid_attr *attr,
 176                     enum gid_table_write_action action,
 177                     bool  default_gid)
 178        __releases(&table->rwlock) __acquires(&table->rwlock)
 179{
 180        int ret = 0;
 181        struct net_device *old_net_dev;
 182        enum ib_gid_type old_gid_type;
 183
 184        /* in rdma_cap_roce_gid_table, this funciton should be protected by a
 185         * sleep-able lock.
 186         */
 187
 188        if (rdma_cap_roce_gid_table(ib_dev, port)) {
 189                table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
 190                write_unlock_irq(&table->rwlock);
 191                /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by
 192                 * RoCE providers and thus only updates the cache.
 193                 */
 194                if (action == GID_TABLE_WRITE_ACTION_ADD)
 195                        ret = ib_dev->add_gid(ib_dev, port, ix, gid, attr,
 196                                              &table->data_vec[ix].context);
 197                else if (action == GID_TABLE_WRITE_ACTION_DEL)
 198                        ret = ib_dev->del_gid(ib_dev, port, ix,
 199                                              &table->data_vec[ix].context);
 200                write_lock_irq(&table->rwlock);
 201        }
 202
 203        old_net_dev = table->data_vec[ix].attr.ndev;
 204        old_gid_type = table->data_vec[ix].attr.gid_type;
 205        if (old_net_dev && old_net_dev != attr->ndev)
 206                dev_put(old_net_dev);
 207        /* if modify_gid failed, just delete the old gid */
 208        if (ret || action == GID_TABLE_WRITE_ACTION_DEL) {
 209                gid = &zgid;
 210                attr = &zattr;
 211                table->data_vec[ix].context = NULL;
 212        }
 213
 214        memcpy(&table->data_vec[ix].gid, gid, sizeof(*gid));
 215        memcpy(&table->data_vec[ix].attr, attr, sizeof(*attr));
 216        if (default_gid) {
 217                table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT;
 218                if (action == GID_TABLE_WRITE_ACTION_DEL)
 219                        table->data_vec[ix].attr.gid_type = old_gid_type;
 220        }
 221        if (table->data_vec[ix].attr.ndev &&
 222            table->data_vec[ix].attr.ndev != old_net_dev)
 223                dev_hold(table->data_vec[ix].attr.ndev);
 224
 225        table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID;
 226
 227        return ret;
 228}
 229
 230static int add_gid(struct ib_device *ib_dev, u8 port,
 231                   struct ib_gid_table *table, int ix,
 232                   const union ib_gid *gid,
 233                   const struct ib_gid_attr *attr,
 234                   bool  default_gid) {
 235        return write_gid(ib_dev, port, table, ix, gid, attr,
 236                         GID_TABLE_WRITE_ACTION_ADD, default_gid);
 237}
 238
 239static int modify_gid(struct ib_device *ib_dev, u8 port,
 240                      struct ib_gid_table *table, int ix,
 241                      const union ib_gid *gid,
 242                      const struct ib_gid_attr *attr,
 243                      bool  default_gid) {
 244        return write_gid(ib_dev, port, table, ix, gid, attr,
 245                         GID_TABLE_WRITE_ACTION_MODIFY, default_gid);
 246}
 247
 248static int del_gid(struct ib_device *ib_dev, u8 port,
 249                   struct ib_gid_table *table, int ix,
 250                   bool  default_gid) {
 251        return write_gid(ib_dev, port, table, ix, &zgid, &zattr,
 252                         GID_TABLE_WRITE_ACTION_DEL, default_gid);
 253}
 254
 255/* rwlock should be read locked */
 256static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
 257                    const struct ib_gid_attr *val, bool default_gid,
 258                    unsigned long mask, int *pempty)
 259{
 260        int i = 0;
 261        int found = -1;
 262        int empty = pempty ? -1 : 0;
 263
 264        while (i < table->sz && (found < 0 || empty < 0)) {
 265                struct ib_gid_table_entry *data = &table->data_vec[i];
 266                struct ib_gid_attr *attr = &data->attr;
 267                int curr_index = i;
 268
 269                i++;
 270
 271                if (data->props & GID_TABLE_ENTRY_INVALID)
 272                        continue;
 273
 274                if (empty < 0)
 275                        if (!memcmp(&data->gid, &zgid, sizeof(*gid)) &&
 276                            !memcmp(attr, &zattr, sizeof(*attr)) &&
 277                            !data->props)
 278                                empty = curr_index;
 279
 280                if (found >= 0)
 281                        continue;
 282
 283                if (mask & GID_ATTR_FIND_MASK_GID_TYPE &&
 284                    attr->gid_type != val->gid_type)
 285                        continue;
 286
 287                if (mask & GID_ATTR_FIND_MASK_GID &&
 288                    memcmp(gid, &data->gid, sizeof(*gid)))
 289                        continue;
 290
 291                if (mask & GID_ATTR_FIND_MASK_NETDEV &&
 292                    attr->ndev != val->ndev)
 293                        continue;
 294
 295                if (mask & GID_ATTR_FIND_MASK_DEFAULT &&
 296                    !!(data->props & GID_TABLE_ENTRY_DEFAULT) !=
 297                    default_gid)
 298                        continue;
 299
 300                found = curr_index;
 301        }
 302
 303        if (pempty)
 304                *pempty = empty;
 305
 306        return found;
 307}
 308
 309static void make_default_gid(struct  net_device *dev, union ib_gid *gid)
 310{
 311        gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
 312        addrconf_ifid_eui48(&gid->raw[8], dev);
 313}
 314
 315int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
 316                     union ib_gid *gid, struct ib_gid_attr *attr)
 317{
 318        struct ib_gid_table *table;
 319        int ix;
 320        int ret = 0;
 321        struct net_device *idev;
 322        int empty;
 323
 324        table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
 325
 326        if (!memcmp(gid, &zgid, sizeof(*gid)))
 327                return -EINVAL;
 328
 329        if (ib_dev->get_netdev) {
 330                idev = ib_dev->get_netdev(ib_dev, port);
 331                if (idev && attr->ndev != idev) {
 332                        union ib_gid default_gid;
 333
 334                        /* Adding default GIDs in not permitted */
 335                        make_default_gid(idev, &default_gid);
 336                        if (!memcmp(gid, &default_gid, sizeof(*gid))) {
 337                                dev_put(idev);
 338                                return -EPERM;
 339                        }
 340                }
 341                if (idev)
 342                        dev_put(idev);
 343        }
 344
 345        mutex_lock(&table->lock);
 346        write_lock_irq(&table->rwlock);
 347
 348        ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID |
 349                      GID_ATTR_FIND_MASK_GID_TYPE |
 350                      GID_ATTR_FIND_MASK_NETDEV, &empty);
 351        if (ix >= 0)
 352                goto out_unlock;
 353
 354        if (empty < 0) {
 355                ret = -ENOSPC;
 356                goto out_unlock;
 357        }
 358
 359        ret = add_gid(ib_dev, port, table, empty, gid, attr, false);
 360        if (!ret)
 361                dispatch_gid_change_event(ib_dev, port);
 362
 363out_unlock:
 364        write_unlock_irq(&table->rwlock);
 365        mutex_unlock(&table->lock);
 366        return ret;
 367}
 368
 369int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
 370                     union ib_gid *gid, struct ib_gid_attr *attr)
 371{
 372        struct ib_gid_table *table;
 373        int ix;
 374
 375        table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
 376
 377        mutex_lock(&table->lock);
 378        write_lock_irq(&table->rwlock);
 379
 380        ix = find_gid(table, gid, attr, false,
 381                      GID_ATTR_FIND_MASK_GID      |
 382                      GID_ATTR_FIND_MASK_GID_TYPE |
 383                      GID_ATTR_FIND_MASK_NETDEV   |
 384                      GID_ATTR_FIND_MASK_DEFAULT,
 385                      NULL);
 386        if (ix < 0)
 387                goto out_unlock;
 388
 389        if (!del_gid(ib_dev, port, table, ix, false))
 390                dispatch_gid_change_event(ib_dev, port);
 391
 392out_unlock:
 393        write_unlock_irq(&table->rwlock);
 394        mutex_unlock(&table->lock);
 395        return 0;
 396}
 397
 398int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
 399                                     struct net_device *ndev)
 400{
 401        struct ib_gid_table *table;
 402        int ix;
 403        bool deleted = false;
 404
 405        table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
 406
 407        mutex_lock(&table->lock);
 408        write_lock_irq(&table->rwlock);
 409
 410        for (ix = 0; ix < table->sz; ix++)
 411                if (table->data_vec[ix].attr.ndev == ndev)
 412                        if (!del_gid(ib_dev, port, table, ix,
 413                                     !!(table->data_vec[ix].props &
 414                                        GID_TABLE_ENTRY_DEFAULT)))
 415                                deleted = true;
 416
 417        write_unlock_irq(&table->rwlock);
 418        mutex_unlock(&table->lock);
 419
 420        if (deleted)
 421                dispatch_gid_change_event(ib_dev, port);
 422
 423        return 0;
 424}
 425
 426static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
 427                              union ib_gid *gid, struct ib_gid_attr *attr)
 428{
 429        struct ib_gid_table *table;
 430
 431        table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
 432
 433        if (index < 0 || index >= table->sz)
 434                return -EINVAL;
 435
 436        if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID)
 437                return -EAGAIN;
 438
 439        memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
 440        if (attr) {
 441                memcpy(attr, &table->data_vec[index].attr, sizeof(*attr));
 442                if (attr->ndev)
 443                        dev_hold(attr->ndev);
 444        }
 445
 446        return 0;
 447}
 448
 449static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
 450                                    const union ib_gid *gid,
 451                                    const struct ib_gid_attr *val,
 452                                    unsigned long mask,
 453                                    u8 *port, u16 *index)
 454{
 455        struct ib_gid_table *table;
 456        u8 p;
 457        int local_index;
 458        unsigned long flags;
 459
 460        for (p = 0; p < ib_dev->phys_port_cnt; p++) {
 461                table = ib_dev->cache.ports[p].gid;
 462                read_lock_irqsave(&table->rwlock, flags);
 463                local_index = find_gid(table, gid, val, false, mask, NULL);
 464                if (local_index >= 0) {
 465                        if (index)
 466                                *index = local_index;
 467                        if (port)
 468                                *port = p + rdma_start_port(ib_dev);
 469                        read_unlock_irqrestore(&table->rwlock, flags);
 470                        return 0;
 471                }
 472                read_unlock_irqrestore(&table->rwlock, flags);
 473        }
 474
 475        return -ENOENT;
 476}
 477
 478static int ib_cache_gid_find(struct ib_device *ib_dev,
 479                             const union ib_gid *gid,
 480                             enum ib_gid_type gid_type,
 481                             struct net_device *ndev, u8 *port,
 482                             u16 *index)
 483{
 484        unsigned long mask = GID_ATTR_FIND_MASK_GID |
 485                             GID_ATTR_FIND_MASK_GID_TYPE;
 486        struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
 487
 488        if (ndev)
 489                mask |= GID_ATTR_FIND_MASK_NETDEV;
 490
 491        return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val,
 492                                        mask, port, index);
 493}
 494
 495int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
 496                               const union ib_gid *gid,
 497                               enum ib_gid_type gid_type,
 498                               u8 port, struct net_device *ndev,
 499                               u16 *index)
 500{
 501        int local_index;
 502        struct ib_gid_table *table;
 503        unsigned long mask = GID_ATTR_FIND_MASK_GID |
 504                             GID_ATTR_FIND_MASK_GID_TYPE;
 505        struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
 506        unsigned long flags;
 507
 508        if (!rdma_is_port_valid(ib_dev, port))
 509                return -ENOENT;
 510
 511        table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
 512
 513        if (ndev)
 514                mask |= GID_ATTR_FIND_MASK_NETDEV;
 515
 516        read_lock_irqsave(&table->rwlock, flags);
 517        local_index = find_gid(table, gid, &val, false, mask, NULL);
 518        if (local_index >= 0) {
 519                if (index)
 520                        *index = local_index;
 521                read_unlock_irqrestore(&table->rwlock, flags);
 522                return 0;
 523        }
 524
 525        read_unlock_irqrestore(&table->rwlock, flags);
 526        return -ENOENT;
 527}
 528EXPORT_SYMBOL(ib_find_cached_gid_by_port);
 529
 530/**
 531 * ib_find_gid_by_filter - Returns the GID table index where a specified
 532 * GID value occurs
 533 * @device: The device to query.
 534 * @gid: The GID value to search for.
 535 * @port_num: The port number of the device where the GID value could be
 536 *   searched.
 537 * @filter: The filter function is executed on any matching GID in the table.
 538 *   If the filter function returns true, the corresponding index is returned,
 539 *   otherwise, we continue searching the GID table. It's guaranteed that
 540 *   while filter is executed, ndev field is valid and the structure won't
 541 *   change. filter is executed in an atomic context. filter must not be NULL.
 542 * @index: The index into the cached GID table where the GID was found.  This
 543 *   parameter may be NULL.
 544 *
 545 * ib_cache_gid_find_by_filter() searches for the specified GID value
 546 * of which the filter function returns true in the port's GID table.
 547 * This function is only supported on RoCE ports.
 548 *
 549 */
 550static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
 551                                       const union ib_gid *gid,
 552                                       u8 port,
 553                                       bool (*filter)(const union ib_gid *,
 554                                                      const struct ib_gid_attr *,
 555                                                      void *),
 556                                       void *context,
 557                                       u16 *index)
 558{
 559        struct ib_gid_table *table;
 560        unsigned int i;
 561        unsigned long flags;
 562        bool found = false;
 563
 564
 565        if (!rdma_is_port_valid(ib_dev, port) ||
 566            !rdma_protocol_roce(ib_dev, port))
 567                return -EPROTONOSUPPORT;
 568
 569        table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
 570
 571        read_lock_irqsave(&table->rwlock, flags);
 572        for (i = 0; i < table->sz; i++) {
 573                struct ib_gid_attr attr;
 574
 575                if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
 576                        continue;
 577
 578                if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
 579                        continue;
 580
 581                memcpy(&attr, &table->data_vec[i].attr, sizeof(attr));
 582
 583                if (filter(gid, &attr, context)) {
 584                        found = true;
 585                        if (index)
 586                                *index = i;
 587                        break;
 588                }
 589        }
 590        read_unlock_irqrestore(&table->rwlock, flags);
 591
 592        if (!found)
 593                return -ENOENT;
 594        return 0;
 595}
 596
 597static struct ib_gid_table *alloc_gid_table(int sz)
 598{
 599        struct ib_gid_table *table =
 600                kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
 601
 602        if (!table)
 603                return NULL;
 604
 605        table->data_vec = kcalloc(sz, sizeof(*table->data_vec), GFP_KERNEL);
 606        if (!table->data_vec)
 607                goto err_free_table;
 608
 609        mutex_init(&table->lock);
 610
 611        table->sz = sz;
 612        rwlock_init(&table->rwlock);
 613
 614        return table;
 615
 616err_free_table:
 617        kfree(table);
 618        return NULL;
 619}
 620
 621static void release_gid_table(struct ib_gid_table *table)
 622{
 623        if (table) {
 624                kfree(table->data_vec);
 625                kfree(table);
 626        }
 627}
 628
 629static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
 630                                   struct ib_gid_table *table)
 631{
 632        int i;
 633        bool deleted = false;
 634
 635        if (!table)
 636                return;
 637
 638        write_lock_irq(&table->rwlock);
 639        for (i = 0; i < table->sz; ++i) {
 640                if (memcmp(&table->data_vec[i].gid, &zgid,
 641                           sizeof(table->data_vec[i].gid)))
 642                        if (!del_gid(ib_dev, port, table, i,
 643                                     table->data_vec[i].props &
 644                                     GID_ATTR_FIND_MASK_DEFAULT))
 645                                deleted = true;
 646        }
 647        write_unlock_irq(&table->rwlock);
 648
 649        if (deleted)
 650                dispatch_gid_change_event(ib_dev, port);
 651}
 652
 653void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
 654                                  struct net_device *ndev,
 655                                  unsigned long gid_type_mask,
 656                                  enum ib_cache_gid_default_mode mode)
 657{
 658        union ib_gid gid;
 659        struct ib_gid_attr gid_attr;
 660        struct ib_gid_attr zattr_type = zattr;
 661        struct ib_gid_table *table;
 662        unsigned int gid_type;
 663
 664        table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
 665
 666        make_default_gid(ndev, &gid);
 667        memset(&gid_attr, 0, sizeof(gid_attr));
 668        gid_attr.ndev = ndev;
 669
 670        for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) {
 671                int ix;
 672                union ib_gid current_gid;
 673                struct ib_gid_attr current_gid_attr = {};
 674
 675                if (1UL << gid_type & ~gid_type_mask)
 676                        continue;
 677
 678                gid_attr.gid_type = gid_type;
 679
 680                mutex_lock(&table->lock);
 681                write_lock_irq(&table->rwlock);
 682                ix = find_gid(table, NULL, &gid_attr, true,
 683                              GID_ATTR_FIND_MASK_GID_TYPE |
 684                              GID_ATTR_FIND_MASK_DEFAULT,
 685                              NULL);
 686
 687                /* Coudn't find default GID location */
 688                if (WARN_ON(ix < 0))
 689                        goto release;
 690
 691                zattr_type.gid_type = gid_type;
 692
 693                if (!__ib_cache_gid_get(ib_dev, port, ix,
 694                                        &current_gid, &current_gid_attr) &&
 695                    mode == IB_CACHE_GID_DEFAULT_MODE_SET &&
 696                    !memcmp(&gid, &current_gid, sizeof(gid)) &&
 697                    !memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr)))
 698                        goto release;
 699
 700                if (memcmp(&current_gid, &zgid, sizeof(current_gid)) ||
 701                    memcmp(&current_gid_attr, &zattr_type,
 702                           sizeof(current_gid_attr))) {
 703                        if (del_gid(ib_dev, port, table, ix, true)) {
 704                                pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n",
 705                                        ix, gid.raw);
 706                                goto release;
 707                        } else {
 708                                dispatch_gid_change_event(ib_dev, port);
 709                        }
 710                }
 711
 712                if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
 713                        if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true))
 714                                pr_warn("ib_cache_gid: unable to add default gid %pI6\n",
 715                                        gid.raw);
 716                        else
 717                                dispatch_gid_change_event(ib_dev, port);
 718                }
 719
 720release:
 721                if (current_gid_attr.ndev)
 722                        dev_put(current_gid_attr.ndev);
 723                write_unlock_irq(&table->rwlock);
 724                mutex_unlock(&table->lock);
 725        }
 726}
 727
 728static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
 729                                     struct ib_gid_table *table)
 730{
 731        unsigned int i;
 732        unsigned long roce_gid_type_mask;
 733        unsigned int num_default_gids;
 734        unsigned int current_gid = 0;
 735
 736        roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
 737        num_default_gids = hweight_long(roce_gid_type_mask);
 738        for (i = 0; i < num_default_gids && i < table->sz; i++) {
 739                struct ib_gid_table_entry *entry =
 740                        &table->data_vec[i];
 741
 742                entry->props |= GID_TABLE_ENTRY_DEFAULT;
 743                current_gid = find_next_bit(&roce_gid_type_mask,
 744                                            BITS_PER_LONG,
 745                                            current_gid);
 746                entry->attr.gid_type = current_gid++;
 747        }
 748
 749        return 0;
 750}
 751
 752static int _gid_table_setup_one(struct ib_device *ib_dev)
 753{
 754        u8 port;
 755        struct ib_gid_table *table;
 756        int err = 0;
 757
 758        for (port = 0; port < ib_dev->phys_port_cnt; port++) {
 759                u8 rdma_port = port + rdma_start_port(ib_dev);
 760
 761                table =
 762                        alloc_gid_table(
 763                                ib_dev->port_immutable[rdma_port].gid_tbl_len);
 764                if (!table) {
 765                        err = -ENOMEM;
 766                        goto rollback_table_setup;
 767                }
 768
 769                err = gid_table_reserve_default(ib_dev,
 770                                                port + rdma_start_port(ib_dev),
 771                                                table);
 772                if (err)
 773                        goto rollback_table_setup;
 774                ib_dev->cache.ports[port].gid = table;
 775        }
 776
 777        return 0;
 778
 779rollback_table_setup:
 780        for (port = 0; port < ib_dev->phys_port_cnt; port++) {
 781                table = ib_dev->cache.ports[port].gid;
 782
 783                cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
 784                                       table);
 785                release_gid_table(table);
 786        }
 787
 788        return err;
 789}
 790
 791static void gid_table_release_one(struct ib_device *ib_dev)
 792{
 793        struct ib_gid_table *table;
 794        u8 port;
 795
 796        for (port = 0; port < ib_dev->phys_port_cnt; port++) {
 797                table = ib_dev->cache.ports[port].gid;
 798                release_gid_table(table);
 799                ib_dev->cache.ports[port].gid = NULL;
 800        }
 801}
 802
 803static void gid_table_cleanup_one(struct ib_device *ib_dev)
 804{
 805        struct ib_gid_table *table;
 806        u8 port;
 807
 808        for (port = 0; port < ib_dev->phys_port_cnt; port++) {
 809                table = ib_dev->cache.ports[port].gid;
 810                cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
 811                                       table);
 812        }
 813}
 814
 815static int gid_table_setup_one(struct ib_device *ib_dev)
 816{
 817        int err;
 818
 819        err = _gid_table_setup_one(ib_dev);
 820
 821        if (err)
 822                return err;
 823
 824        rdma_roce_rescan_device(ib_dev);
 825
 826        return err;
 827}
 828
 829int ib_get_cached_gid(struct ib_device *device,
 830                      u8                port_num,
 831                      int               index,
 832                      union ib_gid     *gid,
 833                      struct ib_gid_attr *gid_attr)
 834{
 835        int res;
 836        unsigned long flags;
 837        struct ib_gid_table *table;
 838
 839        if (!rdma_is_port_valid(device, port_num))
 840                return -EINVAL;
 841
 842        table = device->cache.ports[port_num - rdma_start_port(device)].gid;
 843        read_lock_irqsave(&table->rwlock, flags);
 844        res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
 845        read_unlock_irqrestore(&table->rwlock, flags);
 846
 847        return res;
 848}
 849EXPORT_SYMBOL(ib_get_cached_gid);
 850
 851int ib_find_cached_gid(struct ib_device *device,
 852                       const union ib_gid *gid,
 853                       enum ib_gid_type gid_type,
 854                       struct net_device *ndev,
 855                       u8               *port_num,
 856                       u16              *index)
 857{
 858        return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index);
 859}
 860EXPORT_SYMBOL(ib_find_cached_gid);
 861
 862int ib_find_gid_by_filter(struct ib_device *device,
 863                          const union ib_gid *gid,
 864                          u8 port_num,
 865                          bool (*filter)(const union ib_gid *gid,
 866                                         const struct ib_gid_attr *,
 867                                         void *),
 868                          void *context, u16 *index)
 869{
 870        /* Only RoCE GID table supports filter function */
 871        if (!rdma_cap_roce_gid_table(device, port_num) && filter)
 872                return -EPROTONOSUPPORT;
 873
 874        return ib_cache_gid_find_by_filter(device, gid,
 875                                           port_num, filter,
 876                                           context, index);
 877}
 878
 879int ib_get_cached_pkey(struct ib_device *device,
 880                       u8                port_num,
 881                       int               index,
 882                       u16              *pkey)
 883{
 884        struct ib_pkey_cache *cache;
 885        unsigned long flags;
 886        int ret = 0;
 887
 888        if (!rdma_is_port_valid(device, port_num))
 889                return -EINVAL;
 890
 891        read_lock_irqsave(&device->cache.lock, flags);
 892
 893        cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
 894
 895        if (index < 0 || index >= cache->table_len)
 896                ret = -EINVAL;
 897        else
 898                *pkey = cache->table[index];
 899
 900        read_unlock_irqrestore(&device->cache.lock, flags);
 901
 902        return ret;
 903}
 904EXPORT_SYMBOL(ib_get_cached_pkey);
 905
 906int ib_get_cached_subnet_prefix(struct ib_device *device,
 907                                u8                port_num,
 908                                u64              *sn_pfx)
 909{
 910        unsigned long flags;
 911        int p;
 912
 913        if (port_num < rdma_start_port(device) ||
 914            port_num > rdma_end_port(device))
 915                return -EINVAL;
 916
 917        p = port_num - rdma_start_port(device);
 918        read_lock_irqsave(&device->cache.lock, flags);
 919        *sn_pfx = device->cache.ports[p].subnet_prefix;
 920        read_unlock_irqrestore(&device->cache.lock, flags);
 921
 922        return 0;
 923}
 924EXPORT_SYMBOL(ib_get_cached_subnet_prefix);
 925
 926int ib_find_cached_pkey(struct ib_device *device,
 927                        u8                port_num,
 928                        u16               pkey,
 929                        u16              *index)
 930{
 931        struct ib_pkey_cache *cache;
 932        unsigned long flags;
 933        int i;
 934        int ret = -ENOENT;
 935        int partial_ix = -1;
 936
 937        if (!rdma_is_port_valid(device, port_num))
 938                return -EINVAL;
 939
 940        read_lock_irqsave(&device->cache.lock, flags);
 941
 942        cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
 943
 944        *index = -1;
 945
 946        for (i = 0; i < cache->table_len; ++i)
 947                if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
 948                        if (cache->table[i] & 0x8000) {
 949                                *index = i;
 950                                ret = 0;
 951                                break;
 952                        } else
 953                                partial_ix = i;
 954                }
 955
 956        if (ret && partial_ix >= 0) {
 957                *index = partial_ix;
 958                ret = 0;
 959        }
 960
 961        read_unlock_irqrestore(&device->cache.lock, flags);
 962
 963        return ret;
 964}
 965EXPORT_SYMBOL(ib_find_cached_pkey);
 966
 967int ib_find_exact_cached_pkey(struct ib_device *device,
 968                              u8                port_num,
 969                              u16               pkey,
 970                              u16              *index)
 971{
 972        struct ib_pkey_cache *cache;
 973        unsigned long flags;
 974        int i;
 975        int ret = -ENOENT;
 976
 977        if (!rdma_is_port_valid(device, port_num))
 978                return -EINVAL;
 979
 980        read_lock_irqsave(&device->cache.lock, flags);
 981
 982        cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
 983
 984        *index = -1;
 985
 986        for (i = 0; i < cache->table_len; ++i)
 987                if (cache->table[i] == pkey) {
 988                        *index = i;
 989                        ret = 0;
 990                        break;
 991                }
 992
 993        read_unlock_irqrestore(&device->cache.lock, flags);
 994
 995        return ret;
 996}
 997EXPORT_SYMBOL(ib_find_exact_cached_pkey);
 998
 999int ib_get_cached_lmc(struct ib_device *device,
1000                      u8                port_num,
1001                      u8                *lmc)
1002{
1003        unsigned long flags;
1004        int ret = 0;
1005
1006        if (!rdma_is_port_valid(device, port_num))
1007                return -EINVAL;
1008
1009        read_lock_irqsave(&device->cache.lock, flags);
1010        *lmc = device->cache.ports[port_num - rdma_start_port(device)].lmc;
1011        read_unlock_irqrestore(&device->cache.lock, flags);
1012
1013        return ret;
1014}
1015EXPORT_SYMBOL(ib_get_cached_lmc);
1016
1017int ib_get_cached_port_state(struct ib_device   *device,
1018                             u8                  port_num,
1019                             enum ib_port_state *port_state)
1020{
1021        unsigned long flags;
1022        int ret = 0;
1023
1024        if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
1025                return -EINVAL;
1026
1027        read_lock_irqsave(&device->cache.lock, flags);
1028        *port_state = device->cache.ports[port_num
1029                - rdma_start_port(device)].port_state;
1030        read_unlock_irqrestore(&device->cache.lock, flags);
1031
1032        return ret;
1033}
1034EXPORT_SYMBOL(ib_get_cached_port_state);
1035
1036static void ib_cache_update(struct ib_device *device,
1037                            u8                port,
1038                            bool              enforce_security)
1039{
1040        struct ib_port_attr       *tprops = NULL;
1041        struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
1042        struct ib_gid_cache {
1043                int             table_len;
1044                union ib_gid    table[0];
1045        }                         *gid_cache = NULL;
1046        int                        i;
1047        int                        ret;
1048        struct ib_gid_table       *table;
1049        bool                       use_roce_gid_table =
1050                                        rdma_cap_roce_gid_table(device, port);
1051
1052        if (!rdma_is_port_valid(device, port))
1053                return;
1054
1055        table = device->cache.ports[port - rdma_start_port(device)].gid;
1056
1057        tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
1058        if (!tprops)
1059                return;
1060
1061        ret = ib_query_port(device, port, tprops);
1062        if (ret) {
1063                pr_warn("ib_query_port failed (%d) for %s\n",
1064                        ret, device->name);
1065                goto err;
1066        }
1067
1068        pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len *
1069                             sizeof *pkey_cache->table, GFP_KERNEL);
1070        if (!pkey_cache)
1071                goto err;
1072
1073        pkey_cache->table_len = tprops->pkey_tbl_len;
1074
1075        if (!use_roce_gid_table) {
1076                gid_cache = kmalloc(sizeof(*gid_cache) + tprops->gid_tbl_len *
1077                            sizeof(*gid_cache->table), GFP_KERNEL);
1078                if (!gid_cache)
1079                        goto err;
1080
1081                gid_cache->table_len = tprops->gid_tbl_len;
1082        }
1083
1084        for (i = 0; i < pkey_cache->table_len; ++i) {
1085                ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
1086                if (ret) {
1087                        pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n",
1088                                ret, device->name, i);
1089                        goto err;
1090                }
1091        }
1092
1093        if (!use_roce_gid_table) {
1094                for (i = 0;  i < gid_cache->table_len; ++i) {
1095                        ret = ib_query_gid(device, port, i,
1096                                           gid_cache->table + i, NULL);
1097                        if (ret) {
1098                                pr_warn("ib_query_gid failed (%d) for %s (index %d)\n",
1099                                        ret, device->name, i);
1100                                goto err;
1101                        }
1102                }
1103        }
1104
1105        write_lock_irq(&device->cache.lock);
1106
1107        old_pkey_cache = device->cache.ports[port -
1108                rdma_start_port(device)].pkey;
1109
1110        device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache;
1111        if (!use_roce_gid_table) {
1112                write_lock(&table->rwlock);
1113                for (i = 0; i < gid_cache->table_len; i++) {
1114                        modify_gid(device, port, table, i, gid_cache->table + i,
1115                                   &zattr, false);
1116                }
1117                write_unlock(&table->rwlock);
1118        }
1119
1120        device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc;
1121        device->cache.ports[port - rdma_start_port(device)].port_state =
1122                tprops->state;
1123
1124        device->cache.ports[port - rdma_start_port(device)].subnet_prefix =
1125                                                        tprops->subnet_prefix;
1126        write_unlock_irq(&device->cache.lock);
1127
1128        if (enforce_security)
1129                ib_security_cache_change(device,
1130                                         port,
1131                                         tprops->subnet_prefix);
1132
1133        kfree(gid_cache);
1134        kfree(old_pkey_cache);
1135        kfree(tprops);
1136        return;
1137
1138err:
1139        kfree(pkey_cache);
1140        kfree(gid_cache);
1141        kfree(tprops);
1142}
1143
1144static void ib_cache_task(struct work_struct *_work)
1145{
1146        struct ib_update_work *work =
1147                container_of(_work, struct ib_update_work, work);
1148
1149        ib_cache_update(work->device,
1150                        work->port_num,
1151                        work->enforce_security);
1152        kfree(work);
1153}
1154
1155static void ib_cache_event(struct ib_event_handler *handler,
1156                           struct ib_event *event)
1157{
1158        struct ib_update_work *work;
1159
1160        if (event->event == IB_EVENT_PORT_ERR    ||
1161            event->event == IB_EVENT_PORT_ACTIVE ||
1162            event->event == IB_EVENT_LID_CHANGE  ||
1163            event->event == IB_EVENT_PKEY_CHANGE ||
1164            event->event == IB_EVENT_SM_CHANGE   ||
1165            event->event == IB_EVENT_CLIENT_REREGISTER ||
1166            event->event == IB_EVENT_GID_CHANGE) {
1167                work = kmalloc(sizeof *work, GFP_ATOMIC);
1168                if (work) {
1169                        INIT_WORK(&work->work, ib_cache_task);
1170                        work->device   = event->device;
1171                        work->port_num = event->element.port_num;
1172                        if (event->event == IB_EVENT_PKEY_CHANGE ||
1173                            event->event == IB_EVENT_GID_CHANGE)
1174                                work->enforce_security = true;
1175                        else
1176                                work->enforce_security = false;
1177
1178                        queue_work(ib_wq, &work->work);
1179                }
1180        }
1181}
1182
1183int ib_cache_setup_one(struct ib_device *device)
1184{
1185        int p;
1186        int err;
1187
1188        rwlock_init(&device->cache.lock);
1189
1190        device->cache.ports =
1191                kzalloc(sizeof(*device->cache.ports) *
1192                        (rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL);
1193        if (!device->cache.ports)
1194                return -ENOMEM;
1195
1196        err = gid_table_setup_one(device);
1197        if (err) {
1198                kfree(device->cache.ports);
1199                device->cache.ports = NULL;
1200                return err;
1201        }
1202
1203        for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
1204                ib_cache_update(device, p + rdma_start_port(device), true);
1205
1206        INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
1207                              device, ib_cache_event);
1208        ib_register_event_handler(&device->cache.event_handler);
1209        return 0;
1210}
1211
1212void ib_cache_release_one(struct ib_device *device)
1213{
1214        int p;
1215
1216        /*
1217         * The release function frees all the cache elements.
1218         * This function should be called as part of freeing
1219         * all the device's resources when the cache could no
1220         * longer be accessed.
1221         */
1222        for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
1223                kfree(device->cache.ports[p].pkey);
1224
1225        gid_table_release_one(device);
1226        kfree(device->cache.ports);
1227}
1228
1229void ib_cache_cleanup_one(struct ib_device *device)
1230{
1231        /* The cleanup function unregisters the event handler,
1232         * waits for all in-progress workqueue elements and cleans
1233         * up the GID cache. This function should be called after
1234         * the device was removed from the devices list and all
1235         * clients were removed, so the cache exists but is
1236         * non-functional and shouldn't be updated anymore.
1237         */
1238        ib_unregister_event_handler(&device->cache.event_handler);
1239        flush_workqueue(ib_wq);
1240        gid_table_cleanup_one(device);
1241}
1242
1243void __init ib_cache_setup(void)
1244{
1245        roce_gid_mgmt_init();
1246}
1247
1248void __exit ib_cache_cleanup(void)
1249{
1250        roce_gid_mgmt_cleanup();
1251}
1252