linux/drivers/infiniband/hw/mlx5/main.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include <asm-generic/kmap_types.h>
  34#include <linux/module.h>
  35#include <linux/init.h>
  36#include <linux/errno.h>
  37#include <linux/pci.h>
  38#include <linux/dma-mapping.h>
  39#include <linux/slab.h>
  40#include <linux/io-mapping.h>
  41#include <linux/sched.h>
  42#include <rdma/ib_user_verbs.h>
  43#include <linux/mlx5/vport.h>
  44#include <rdma/ib_smi.h>
  45#include <rdma/ib_umem.h>
  46#include "user.h"
  47#include "mlx5_ib.h"
  48
  49#define DRIVER_NAME "mlx5_ib"
  50#define DRIVER_VERSION "2.2-1"
  51#define DRIVER_RELDATE  "Feb 2014"
  52
  53MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
  54MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
  55MODULE_LICENSE("Dual BSD/GPL");
  56MODULE_VERSION(DRIVER_VERSION);
  57
  58static int deprecated_prof_sel = 2;
  59module_param_named(prof_sel, deprecated_prof_sel, int, 0444);
  60MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
  61
  62static char mlx5_version[] =
  63        DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
  64        DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
  65
  66static enum rdma_link_layer
  67mlx5_ib_port_link_layer(struct ib_device *device)
  68{
  69        struct mlx5_ib_dev *dev = to_mdev(device);
  70
  71        switch (MLX5_CAP_GEN(dev->mdev, port_type)) {
  72        case MLX5_CAP_PORT_TYPE_IB:
  73                return IB_LINK_LAYER_INFINIBAND;
  74        case MLX5_CAP_PORT_TYPE_ETH:
  75                return IB_LINK_LAYER_ETHERNET;
  76        default:
  77                return IB_LINK_LAYER_UNSPECIFIED;
  78        }
  79}
  80
  81static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
  82{
  83        return !dev->mdev->issi;
  84}
  85
  86enum {
  87        MLX5_VPORT_ACCESS_METHOD_MAD,
  88        MLX5_VPORT_ACCESS_METHOD_HCA,
  89        MLX5_VPORT_ACCESS_METHOD_NIC,
  90};
  91
  92static int mlx5_get_vport_access_method(struct ib_device *ibdev)
  93{
  94        if (mlx5_use_mad_ifc(to_mdev(ibdev)))
  95                return MLX5_VPORT_ACCESS_METHOD_MAD;
  96
  97        if (mlx5_ib_port_link_layer(ibdev) ==
  98            IB_LINK_LAYER_ETHERNET)
  99                return MLX5_VPORT_ACCESS_METHOD_NIC;
 100
 101        return MLX5_VPORT_ACCESS_METHOD_HCA;
 102}
 103
 104static int mlx5_query_system_image_guid(struct ib_device *ibdev,
 105                                        __be64 *sys_image_guid)
 106{
 107        struct mlx5_ib_dev *dev = to_mdev(ibdev);
 108        struct mlx5_core_dev *mdev = dev->mdev;
 109        u64 tmp;
 110        int err;
 111
 112        switch (mlx5_get_vport_access_method(ibdev)) {
 113        case MLX5_VPORT_ACCESS_METHOD_MAD:
 114                return mlx5_query_mad_ifc_system_image_guid(ibdev,
 115                                                            sys_image_guid);
 116
 117        case MLX5_VPORT_ACCESS_METHOD_HCA:
 118                err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
 119                if (!err)
 120                        *sys_image_guid = cpu_to_be64(tmp);
 121                return err;
 122
 123        default:
 124                return -EINVAL;
 125        }
 126}
 127
 128static int mlx5_query_max_pkeys(struct ib_device *ibdev,
 129                                u16 *max_pkeys)
 130{
 131        struct mlx5_ib_dev *dev = to_mdev(ibdev);
 132        struct mlx5_core_dev *mdev = dev->mdev;
 133
 134        switch (mlx5_get_vport_access_method(ibdev)) {
 135        case MLX5_VPORT_ACCESS_METHOD_MAD:
 136                return mlx5_query_mad_ifc_max_pkeys(ibdev, max_pkeys);
 137
 138        case MLX5_VPORT_ACCESS_METHOD_HCA:
 139        case MLX5_VPORT_ACCESS_METHOD_NIC:
 140                *max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev,
 141                                                pkey_table_size));
 142                return 0;
 143
 144        default:
 145                return -EINVAL;
 146        }
 147}
 148
 149static int mlx5_query_vendor_id(struct ib_device *ibdev,
 150                                u32 *vendor_id)
 151{
 152        struct mlx5_ib_dev *dev = to_mdev(ibdev);
 153
 154        switch (mlx5_get_vport_access_method(ibdev)) {
 155        case MLX5_VPORT_ACCESS_METHOD_MAD:
 156                return mlx5_query_mad_ifc_vendor_id(ibdev, vendor_id);
 157
 158        case MLX5_VPORT_ACCESS_METHOD_HCA:
 159        case MLX5_VPORT_ACCESS_METHOD_NIC:
 160                return mlx5_core_query_vendor_id(dev->mdev, vendor_id);
 161
 162        default:
 163                return -EINVAL;
 164        }
 165}
 166
 167static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
 168                                __be64 *node_guid)
 169{
 170        u64 tmp;
 171        int err;
 172
 173        switch (mlx5_get_vport_access_method(&dev->ib_dev)) {
 174        case MLX5_VPORT_ACCESS_METHOD_MAD:
 175                return mlx5_query_mad_ifc_node_guid(dev, node_guid);
 176
 177        case MLX5_VPORT_ACCESS_METHOD_HCA:
 178                err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
 179                if (!err)
 180                        *node_guid = cpu_to_be64(tmp);
 181                return err;
 182
 183        default:
 184                return -EINVAL;
 185        }
 186}
 187
 188struct mlx5_reg_node_desc {
 189        u8      desc[64];
 190};
 191
 192static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
 193{
 194        struct mlx5_reg_node_desc in;
 195
 196        if (mlx5_use_mad_ifc(dev))
 197                return mlx5_query_mad_ifc_node_desc(dev, node_desc);
 198
 199        memset(&in, 0, sizeof(in));
 200
 201        return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc,
 202                                    sizeof(struct mlx5_reg_node_desc),
 203                                    MLX5_REG_NODE_DESC, 0, 0);
 204}
 205
 206static int mlx5_ib_query_device(struct ib_device *ibdev,
 207                                struct ib_device_attr *props,
 208                                struct ib_udata *uhw)
 209{
 210        struct mlx5_ib_dev *dev = to_mdev(ibdev);
 211        struct mlx5_core_dev *mdev = dev->mdev;
 212        int err = -ENOMEM;
 213        int max_rq_sg;
 214        int max_sq_sg;
 215        u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz);
 216
 217        if (uhw->inlen || uhw->outlen)
 218                return -EINVAL;
 219
 220        memset(props, 0, sizeof(*props));
 221        err = mlx5_query_system_image_guid(ibdev,
 222                                           &props->sys_image_guid);
 223        if (err)
 224                return err;
 225
 226        err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys);
 227        if (err)
 228                return err;
 229
 230        err = mlx5_query_vendor_id(ibdev, &props->vendor_id);
 231        if (err)
 232                return err;
 233
 234        props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) |
 235                (fw_rev_min(dev->mdev) << 16) |
 236                fw_rev_sub(dev->mdev);
 237        props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |
 238                IB_DEVICE_PORT_ACTIVE_EVENT             |
 239                IB_DEVICE_SYS_IMAGE_GUID                |
 240                IB_DEVICE_RC_RNR_NAK_GEN;
 241
 242        if (MLX5_CAP_GEN(mdev, pkv))
 243                props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
 244        if (MLX5_CAP_GEN(mdev, qkv))
 245                props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
 246        if (MLX5_CAP_GEN(mdev, apm))
 247                props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
 248        if (MLX5_CAP_GEN(mdev, xrc))
 249                props->device_cap_flags |= IB_DEVICE_XRC;
 250        props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
 251        if (MLX5_CAP_GEN(mdev, sho)) {
 252                props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
 253                /* At this stage no support for signature handover */
 254                props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 |
 255                                      IB_PROT_T10DIF_TYPE_2 |
 256                                      IB_PROT_T10DIF_TYPE_3;
 257                props->sig_guard_cap = IB_GUARD_T10DIF_CRC |
 258                                       IB_GUARD_T10DIF_CSUM;
 259        }
 260        if (MLX5_CAP_GEN(mdev, block_lb_mc))
 261                props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
 262
 263        props->vendor_part_id      = mdev->pdev->device;
 264        props->hw_ver              = mdev->pdev->revision;
 265
 266        props->max_mr_size         = ~0ull;
 267        props->page_size_cap       = ~(min_page_size - 1);
 268        props->max_qp              = 1 << MLX5_CAP_GEN(mdev, log_max_qp);
 269        props->max_qp_wr           = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
 270        max_rq_sg =  MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
 271                     sizeof(struct mlx5_wqe_data_seg);
 272        max_sq_sg = (MLX5_CAP_GEN(mdev, max_wqe_sz_sq) -
 273                     sizeof(struct mlx5_wqe_ctrl_seg)) /
 274                     sizeof(struct mlx5_wqe_data_seg);
 275        props->max_sge = min(max_rq_sg, max_sq_sg);
 276        props->max_sge_rd = props->max_sge;
 277        props->max_cq              = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
 278        props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_eq_sz)) - 1;
 279        props->max_mr              = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
 280        props->max_pd              = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
 281        props->max_qp_rd_atom      = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
 282        props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp);
 283        props->max_srq             = 1 << MLX5_CAP_GEN(mdev, log_max_srq);
 284        props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1;
 285        props->local_ca_ack_delay  = MLX5_CAP_GEN(mdev, local_ca_ack_delay);
 286        props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
 287        props->max_srq_sge         = max_rq_sg - 1;
 288        props->max_fast_reg_page_list_len = (unsigned int)-1;
 289        props->atomic_cap          = IB_ATOMIC_NONE;
 290        props->masked_atomic_cap   = IB_ATOMIC_NONE;
 291        props->max_mcast_grp       = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
 292        props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
 293        props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
 294                                           props->max_mcast_grp;
 295        props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
 296
 297#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 298        if (MLX5_CAP_GEN(mdev, pg))
 299                props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING;
 300        props->odp_caps = dev->odp_caps;
 301#endif
 302
 303        return 0;
 304}
 305
 306enum mlx5_ib_width {
 307        MLX5_IB_WIDTH_1X        = 1 << 0,
 308        MLX5_IB_WIDTH_2X        = 1 << 1,
 309        MLX5_IB_WIDTH_4X        = 1 << 2,
 310        MLX5_IB_WIDTH_8X        = 1 << 3,
 311        MLX5_IB_WIDTH_12X       = 1 << 4
 312};
 313
 314static int translate_active_width(struct ib_device *ibdev, u8 active_width,
 315                                  u8 *ib_width)
 316{
 317        struct mlx5_ib_dev *dev = to_mdev(ibdev);
 318        int err = 0;
 319
 320        if (active_width & MLX5_IB_WIDTH_1X) {
 321                *ib_width = IB_WIDTH_1X;
 322        } else if (active_width & MLX5_IB_WIDTH_2X) {
 323                mlx5_ib_dbg(dev, "active_width %d is not supported by IB spec\n",
 324                            (int)active_width);
 325                err = -EINVAL;
 326        } else if (active_width & MLX5_IB_WIDTH_4X) {
 327                *ib_width = IB_WIDTH_4X;
 328        } else if (active_width & MLX5_IB_WIDTH_8X) {
 329                *ib_width = IB_WIDTH_8X;
 330        } else if (active_width & MLX5_IB_WIDTH_12X) {
 331                *ib_width = IB_WIDTH_12X;
 332        } else {
 333                mlx5_ib_dbg(dev, "Invalid active_width %d\n",
 334                            (int)active_width);
 335                err = -EINVAL;
 336        }
 337
 338        return err;
 339}
 340
 341static int mlx5_mtu_to_ib_mtu(int mtu)
 342{
 343        switch (mtu) {
 344        case 256: return 1;
 345        case 512: return 2;
 346        case 1024: return 3;
 347        case 2048: return 4;
 348        case 4096: return 5;
 349        default:
 350                pr_warn("invalid mtu\n");
 351                return -1;
 352        }
 353}
 354
 355enum ib_max_vl_num {
 356        __IB_MAX_VL_0           = 1,
 357        __IB_MAX_VL_0_1         = 2,
 358        __IB_MAX_VL_0_3         = 3,
 359        __IB_MAX_VL_0_7         = 4,
 360        __IB_MAX_VL_0_14        = 5,
 361};
 362
 363enum mlx5_vl_hw_cap {
 364        MLX5_VL_HW_0    = 1,
 365        MLX5_VL_HW_0_1  = 2,
 366        MLX5_VL_HW_0_2  = 3,
 367        MLX5_VL_HW_0_3  = 4,
 368        MLX5_VL_HW_0_4  = 5,
 369        MLX5_VL_HW_0_5  = 6,
 370        MLX5_VL_HW_0_6  = 7,
 371        MLX5_VL_HW_0_7  = 8,
 372        MLX5_VL_HW_0_14 = 15
 373};
 374
 375static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap,
 376                                u8 *max_vl_num)
 377{
 378        switch (vl_hw_cap) {
 379        case MLX5_VL_HW_0:
 380                *max_vl_num = __IB_MAX_VL_0;
 381                break;
 382        case MLX5_VL_HW_0_1:
 383                *max_vl_num = __IB_MAX_VL_0_1;
 384                break;
 385        case MLX5_VL_HW_0_3:
 386                *max_vl_num = __IB_MAX_VL_0_3;
 387                break;
 388        case MLX5_VL_HW_0_7:
 389                *max_vl_num = __IB_MAX_VL_0_7;
 390                break;
 391        case MLX5_VL_HW_0_14:
 392                *max_vl_num = __IB_MAX_VL_0_14;
 393                break;
 394
 395        default:
 396                return -EINVAL;
 397        }
 398
 399        return 0;
 400}
 401
 402static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
 403                               struct ib_port_attr *props)
 404{
 405        struct mlx5_ib_dev *dev = to_mdev(ibdev);
 406        struct mlx5_core_dev *mdev = dev->mdev;
 407        struct mlx5_hca_vport_context *rep;
 408        int max_mtu;
 409        int oper_mtu;
 410        int err;
 411        u8 ib_link_width_oper;
 412        u8 vl_hw_cap;
 413
 414        rep = kzalloc(sizeof(*rep), GFP_KERNEL);
 415        if (!rep) {
 416                err = -ENOMEM;
 417                goto out;
 418        }
 419
 420        memset(props, 0, sizeof(*props));
 421
 422        err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep);
 423        if (err)
 424                goto out;
 425
 426        props->lid              = rep->lid;
 427        props->lmc              = rep->lmc;
 428        props->sm_lid           = rep->sm_lid;
 429        props->sm_sl            = rep->sm_sl;
 430        props->state            = rep->vport_state;
 431        props->phys_state       = rep->port_physical_state;
 432        props->port_cap_flags   = rep->cap_mask1;
 433        props->gid_tbl_len      = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size));
 434        props->max_msg_sz       = 1 << MLX5_CAP_GEN(mdev, log_max_msg);
 435        props->pkey_tbl_len     = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size));
 436        props->bad_pkey_cntr    = rep->pkey_violation_counter;
 437        props->qkey_viol_cntr   = rep->qkey_violation_counter;
 438        props->subnet_timeout   = rep->subnet_timeout;
 439        props->init_type_reply  = rep->init_type_reply;
 440
 441        err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
 442        if (err)
 443                goto out;
 444
 445        err = translate_active_width(ibdev, ib_link_width_oper,
 446                                     &props->active_width);
 447        if (err)
 448                goto out;
 449        err = mlx5_query_port_proto_oper(mdev, &props->active_speed, MLX5_PTYS_IB,
 450                                         port);
 451        if (err)
 452                goto out;
 453
 454        mlx5_query_port_max_mtu(mdev, &max_mtu, port);
 455
 456        props->max_mtu = mlx5_mtu_to_ib_mtu(max_mtu);
 457
 458        mlx5_query_port_oper_mtu(mdev, &oper_mtu, port);
 459
 460        props->active_mtu = mlx5_mtu_to_ib_mtu(oper_mtu);
 461
 462        err = mlx5_query_port_vl_hw_cap(mdev, &vl_hw_cap, port);
 463        if (err)
 464                goto out;
 465
 466        err = translate_max_vl_num(ibdev, vl_hw_cap,
 467                                   &props->max_vl_num);
 468out:
 469        kfree(rep);
 470        return err;
 471}
 472
 473int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
 474                       struct ib_port_attr *props)
 475{
 476        switch (mlx5_get_vport_access_method(ibdev)) {
 477        case MLX5_VPORT_ACCESS_METHOD_MAD:
 478                return mlx5_query_mad_ifc_port(ibdev, port, props);
 479
 480        case MLX5_VPORT_ACCESS_METHOD_HCA:
 481                return mlx5_query_hca_port(ibdev, port, props);
 482
 483        default:
 484                return -EINVAL;
 485        }
 486}
 487
 488static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
 489                             union ib_gid *gid)
 490{
 491        struct mlx5_ib_dev *dev = to_mdev(ibdev);
 492        struct mlx5_core_dev *mdev = dev->mdev;
 493
 494        switch (mlx5_get_vport_access_method(ibdev)) {
 495        case MLX5_VPORT_ACCESS_METHOD_MAD:
 496                return mlx5_query_mad_ifc_gids(ibdev, port, index, gid);
 497
 498        case MLX5_VPORT_ACCESS_METHOD_HCA:
 499                return mlx5_query_hca_vport_gid(mdev, 0, port, 0, index, gid);
 500
 501        default:
 502                return -EINVAL;
 503        }
 504
 505}
 506
 507static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
 508                              u16 *pkey)
 509{
 510        struct mlx5_ib_dev *dev = to_mdev(ibdev);
 511        struct mlx5_core_dev *mdev = dev->mdev;
 512
 513        switch (mlx5_get_vport_access_method(ibdev)) {
 514        case MLX5_VPORT_ACCESS_METHOD_MAD:
 515                return mlx5_query_mad_ifc_pkey(ibdev, port, index, pkey);
 516
 517        case MLX5_VPORT_ACCESS_METHOD_HCA:
 518        case MLX5_VPORT_ACCESS_METHOD_NIC:
 519                return mlx5_query_hca_vport_pkey(mdev, 0, port,  0, index,
 520                                                 pkey);
 521        default:
 522                return -EINVAL;
 523        }
 524}
 525
 526static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
 527                                 struct ib_device_modify *props)
 528{
 529        struct mlx5_ib_dev *dev = to_mdev(ibdev);
 530        struct mlx5_reg_node_desc in;
 531        struct mlx5_reg_node_desc out;
 532        int err;
 533
 534        if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
 535                return -EOPNOTSUPP;
 536
 537        if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
 538                return 0;
 539
 540        /*
 541         * If possible, pass node desc to FW, so it can generate
 542         * a 144 trap.  If cmd fails, just ignore.
 543         */
 544        memcpy(&in, props->node_desc, 64);
 545        err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out,
 546                                   sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
 547        if (err)
 548                return err;
 549
 550        memcpy(ibdev->node_desc, props->node_desc, 64);
 551
 552        return err;
 553}
 554
 555static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
 556                               struct ib_port_modify *props)
 557{
 558        struct mlx5_ib_dev *dev = to_mdev(ibdev);
 559        struct ib_port_attr attr;
 560        u32 tmp;
 561        int err;
 562
 563        mutex_lock(&dev->cap_mask_mutex);
 564
 565        err = mlx5_ib_query_port(ibdev, port, &attr);
 566        if (err)
 567                goto out;
 568
 569        tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
 570                ~props->clr_port_cap_mask;
 571
 572        err = mlx5_set_port_caps(dev->mdev, port, tmp);
 573
 574out:
 575        mutex_unlock(&dev->cap_mask_mutex);
 576        return err;
 577}
 578
 579static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 580                                                  struct ib_udata *udata)
 581{
 582        struct mlx5_ib_dev *dev = to_mdev(ibdev);
 583        struct mlx5_ib_alloc_ucontext_req_v2 req;
 584        struct mlx5_ib_alloc_ucontext_resp resp;
 585        struct mlx5_ib_ucontext *context;
 586        struct mlx5_uuar_info *uuari;
 587        struct mlx5_uar *uars;
 588        int gross_uuars;
 589        int num_uars;
 590        int ver;
 591        int uuarn;
 592        int err;
 593        int i;
 594        size_t reqlen;
 595
 596        if (!dev->ib_active)
 597                return ERR_PTR(-EAGAIN);
 598
 599        memset(&req, 0, sizeof(req));
 600        reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
 601        if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
 602                ver = 0;
 603        else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
 604                ver = 2;
 605        else
 606                return ERR_PTR(-EINVAL);
 607
 608        err = ib_copy_from_udata(&req, udata, reqlen);
 609        if (err)
 610                return ERR_PTR(err);
 611
 612        if (req.flags || req.reserved)
 613                return ERR_PTR(-EINVAL);
 614
 615        if (req.total_num_uuars > MLX5_MAX_UUARS)
 616                return ERR_PTR(-ENOMEM);
 617
 618        if (req.total_num_uuars == 0)
 619                return ERR_PTR(-EINVAL);
 620
 621        req.total_num_uuars = ALIGN(req.total_num_uuars,
 622                                    MLX5_NON_FP_BF_REGS_PER_PAGE);
 623        if (req.num_low_latency_uuars > req.total_num_uuars - 1)
 624                return ERR_PTR(-EINVAL);
 625
 626        num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
 627        gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
 628        resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
 629        resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
 630        resp.cache_line_size = L1_CACHE_BYTES;
 631        resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
 632        resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
 633        resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
 634        resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
 635        resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
 636
 637        context = kzalloc(sizeof(*context), GFP_KERNEL);
 638        if (!context)
 639                return ERR_PTR(-ENOMEM);
 640
 641        uuari = &context->uuari;
 642        mutex_init(&uuari->lock);
 643        uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
 644        if (!uars) {
 645                err = -ENOMEM;
 646                goto out_ctx;
 647        }
 648
 649        uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
 650                                sizeof(*uuari->bitmap),
 651                                GFP_KERNEL);
 652        if (!uuari->bitmap) {
 653                err = -ENOMEM;
 654                goto out_uar_ctx;
 655        }
 656        /*
 657         * clear all fast path uuars
 658         */
 659        for (i = 0; i < gross_uuars; i++) {
 660                uuarn = i & 3;
 661                if (uuarn == 2 || uuarn == 3)
 662                        set_bit(i, uuari->bitmap);
 663        }
 664
 665        uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
 666        if (!uuari->count) {
 667                err = -ENOMEM;
 668                goto out_bitmap;
 669        }
 670
 671        for (i = 0; i < num_uars; i++) {
 672                err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index);
 673                if (err)
 674                        goto out_count;
 675        }
 676
 677#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 678        context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
 679#endif
 680
 681        INIT_LIST_HEAD(&context->db_page_list);
 682        mutex_init(&context->db_page_mutex);
 683
 684        resp.tot_uuars = req.total_num_uuars;
 685        resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
 686        err = ib_copy_to_udata(udata, &resp,
 687                               sizeof(resp) - sizeof(resp.reserved));
 688        if (err)
 689                goto out_uars;
 690
 691        uuari->ver = ver;
 692        uuari->num_low_latency_uuars = req.num_low_latency_uuars;
 693        uuari->uars = uars;
 694        uuari->num_uars = num_uars;
 695        return &context->ibucontext;
 696
 697out_uars:
 698        for (i--; i >= 0; i--)
 699                mlx5_cmd_free_uar(dev->mdev, uars[i].index);
 700out_count:
 701        kfree(uuari->count);
 702
 703out_bitmap:
 704        kfree(uuari->bitmap);
 705
 706out_uar_ctx:
 707        kfree(uars);
 708
 709out_ctx:
 710        kfree(context);
 711        return ERR_PTR(err);
 712}
 713
 714static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
 715{
 716        struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
 717        struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
 718        struct mlx5_uuar_info *uuari = &context->uuari;
 719        int i;
 720
 721        for (i = 0; i < uuari->num_uars; i++) {
 722                if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
 723                        mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
 724        }
 725
 726        kfree(uuari->count);
 727        kfree(uuari->bitmap);
 728        kfree(uuari->uars);
 729        kfree(context);
 730
 731        return 0;
 732}
 733
 734static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
 735{
 736        return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index;
 737}
 738
 739static int get_command(unsigned long offset)
 740{
 741        return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
 742}
 743
 744static int get_arg(unsigned long offset)
 745{
 746        return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
 747}
 748
 749static int get_index(unsigned long offset)
 750{
 751        return get_arg(offset);
 752}
 753
 754static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
 755{
 756        struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
 757        struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
 758        struct mlx5_uuar_info *uuari = &context->uuari;
 759        unsigned long command;
 760        unsigned long idx;
 761        phys_addr_t pfn;
 762
 763        command = get_command(vma->vm_pgoff);
 764        switch (command) {
 765        case MLX5_IB_MMAP_REGULAR_PAGE:
 766                if (vma->vm_end - vma->vm_start != PAGE_SIZE)
 767                        return -EINVAL;
 768
 769                idx = get_index(vma->vm_pgoff);
 770                if (idx >= uuari->num_uars)
 771                        return -EINVAL;
 772
 773                pfn = uar_index2pfn(dev, uuari->uars[idx].index);
 774                mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
 775                            (unsigned long long)pfn);
 776
 777                vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 778                if (io_remap_pfn_range(vma, vma->vm_start, pfn,
 779                                       PAGE_SIZE, vma->vm_page_prot))
 780                        return -EAGAIN;
 781
 782                mlx5_ib_dbg(dev, "mapped WC at 0x%lx, PA 0x%llx\n",
 783                            vma->vm_start,
 784                            (unsigned long long)pfn << PAGE_SHIFT);
 785                break;
 786
 787        case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
 788                return -ENOSYS;
 789
 790        default:
 791                return -EINVAL;
 792        }
 793
 794        return 0;
 795}
 796
 797static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
 798                                      struct ib_ucontext *context,
 799                                      struct ib_udata *udata)
 800{
 801        struct mlx5_ib_alloc_pd_resp resp;
 802        struct mlx5_ib_pd *pd;
 803        int err;
 804
 805        pd = kmalloc(sizeof(*pd), GFP_KERNEL);
 806        if (!pd)
 807                return ERR_PTR(-ENOMEM);
 808
 809        err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
 810        if (err) {
 811                kfree(pd);
 812                return ERR_PTR(err);
 813        }
 814
 815        if (context) {
 816                resp.pdn = pd->pdn;
 817                if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
 818                        mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
 819                        kfree(pd);
 820                        return ERR_PTR(-EFAULT);
 821                }
 822        }
 823
 824        return &pd->ibpd;
 825}
 826
 827static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
 828{
 829        struct mlx5_ib_dev *mdev = to_mdev(pd->device);
 830        struct mlx5_ib_pd *mpd = to_mpd(pd);
 831
 832        mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
 833        kfree(mpd);
 834
 835        return 0;
 836}
 837
 838static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 839{
 840        struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
 841        int err;
 842
 843        err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
 844        if (err)
 845                mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
 846                             ibqp->qp_num, gid->raw);
 847
 848        return err;
 849}
 850
 851static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 852{
 853        struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
 854        int err;
 855
 856        err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
 857        if (err)
 858                mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
 859                             ibqp->qp_num, gid->raw);
 860
 861        return err;
 862}
 863
 864static int init_node_data(struct mlx5_ib_dev *dev)
 865{
 866        int err;
 867
 868        err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc);
 869        if (err)
 870                return err;
 871
 872        dev->mdev->rev_id = dev->mdev->pdev->revision;
 873
 874        return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
 875}
 876
 877static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
 878                             char *buf)
 879{
 880        struct mlx5_ib_dev *dev =
 881                container_of(device, struct mlx5_ib_dev, ib_dev.dev);
 882
 883        return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages);
 884}
 885
 886static ssize_t show_reg_pages(struct device *device,
 887                              struct device_attribute *attr, char *buf)
 888{
 889        struct mlx5_ib_dev *dev =
 890                container_of(device, struct mlx5_ib_dev, ib_dev.dev);
 891
 892        return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
 893}
 894
 895static ssize_t show_hca(struct device *device, struct device_attribute *attr,
 896                        char *buf)
 897{
 898        struct mlx5_ib_dev *dev =
 899                container_of(device, struct mlx5_ib_dev, ib_dev.dev);
 900        return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
 901}
 902
 903static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
 904                           char *buf)
 905{
 906        struct mlx5_ib_dev *dev =
 907                container_of(device, struct mlx5_ib_dev, ib_dev.dev);
 908        return sprintf(buf, "%d.%d.%d\n", fw_rev_maj(dev->mdev),
 909                       fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
 910}
 911
 912static ssize_t show_rev(struct device *device, struct device_attribute *attr,
 913                        char *buf)
 914{
 915        struct mlx5_ib_dev *dev =
 916                container_of(device, struct mlx5_ib_dev, ib_dev.dev);
 917        return sprintf(buf, "%x\n", dev->mdev->rev_id);
 918}
 919
 920static ssize_t show_board(struct device *device, struct device_attribute *attr,
 921                          char *buf)
 922{
 923        struct mlx5_ib_dev *dev =
 924                container_of(device, struct mlx5_ib_dev, ib_dev.dev);
 925        return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
 926                       dev->mdev->board_id);
 927}
 928
 929static DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
 930static DEVICE_ATTR(fw_ver,   S_IRUGO, show_fw_ver, NULL);
 931static DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
 932static DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
 933static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
 934static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
 935
 936static struct device_attribute *mlx5_class_attributes[] = {
 937        &dev_attr_hw_rev,
 938        &dev_attr_fw_ver,
 939        &dev_attr_hca_type,
 940        &dev_attr_board_id,
 941        &dev_attr_fw_pages,
 942        &dev_attr_reg_pages,
 943};
 944
 945static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
 946                          enum mlx5_dev_event event, unsigned long param)
 947{
 948        struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
 949        struct ib_event ibev;
 950
 951        u8 port = 0;
 952
 953        switch (event) {
 954        case MLX5_DEV_EVENT_SYS_ERROR:
 955                ibdev->ib_active = false;
 956                ibev.event = IB_EVENT_DEVICE_FATAL;
 957                break;
 958
 959        case MLX5_DEV_EVENT_PORT_UP:
 960                ibev.event = IB_EVENT_PORT_ACTIVE;
 961                port = (u8)param;
 962                break;
 963
 964        case MLX5_DEV_EVENT_PORT_DOWN:
 965                ibev.event = IB_EVENT_PORT_ERR;
 966                port = (u8)param;
 967                break;
 968
 969        case MLX5_DEV_EVENT_PORT_INITIALIZED:
 970                /* not used by ULPs */
 971                return;
 972
 973        case MLX5_DEV_EVENT_LID_CHANGE:
 974                ibev.event = IB_EVENT_LID_CHANGE;
 975                port = (u8)param;
 976                break;
 977
 978        case MLX5_DEV_EVENT_PKEY_CHANGE:
 979                ibev.event = IB_EVENT_PKEY_CHANGE;
 980                port = (u8)param;
 981                break;
 982
 983        case MLX5_DEV_EVENT_GUID_CHANGE:
 984                ibev.event = IB_EVENT_GID_CHANGE;
 985                port = (u8)param;
 986                break;
 987
 988        case MLX5_DEV_EVENT_CLIENT_REREG:
 989                ibev.event = IB_EVENT_CLIENT_REREGISTER;
 990                port = (u8)param;
 991                break;
 992        }
 993
 994        ibev.device           = &ibdev->ib_dev;
 995        ibev.element.port_num = port;
 996
 997        if (port < 1 || port > ibdev->num_ports) {
 998                mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
 999                return;
1000        }
1001
1002        if (ibdev->ib_active)
1003                ib_dispatch_event(&ibev);
1004}
1005
1006static void get_ext_port_caps(struct mlx5_ib_dev *dev)
1007{
1008        int port;
1009
1010        for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++)
1011                mlx5_query_ext_port_caps(dev, port);
1012}
1013
1014static int get_port_caps(struct mlx5_ib_dev *dev)
1015{
1016        struct ib_device_attr *dprops = NULL;
1017        struct ib_port_attr *pprops = NULL;
1018        int err = -ENOMEM;
1019        int port;
1020        struct ib_udata uhw = {.inlen = 0, .outlen = 0};
1021
1022        pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
1023        if (!pprops)
1024                goto out;
1025
1026        dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
1027        if (!dprops)
1028                goto out;
1029
1030        err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw);
1031        if (err) {
1032                mlx5_ib_warn(dev, "query_device failed %d\n", err);
1033                goto out;
1034        }
1035
1036        for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
1037                err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
1038                if (err) {
1039                        mlx5_ib_warn(dev, "query_port %d failed %d\n",
1040                                     port, err);
1041                        break;
1042                }
1043                dev->mdev->port_caps[port - 1].pkey_table_len =
1044                                                dprops->max_pkeys;
1045                dev->mdev->port_caps[port - 1].gid_table_len =
1046                                                pprops->gid_tbl_len;
1047                mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
1048                            dprops->max_pkeys, pprops->gid_tbl_len);
1049        }
1050
1051out:
1052        kfree(pprops);
1053        kfree(dprops);
1054
1055        return err;
1056}
1057
1058static void destroy_umrc_res(struct mlx5_ib_dev *dev)
1059{
1060        int err;
1061
1062        err = mlx5_mr_cache_cleanup(dev);
1063        if (err)
1064                mlx5_ib_warn(dev, "mr cache cleanup failed\n");
1065
1066        mlx5_ib_destroy_qp(dev->umrc.qp);
1067        ib_destroy_cq(dev->umrc.cq);
1068        ib_dealloc_pd(dev->umrc.pd);
1069}
1070
1071enum {
1072        MAX_UMR_WR = 128,
1073};
1074
1075static int create_umr_res(struct mlx5_ib_dev *dev)
1076{
1077        struct ib_qp_init_attr *init_attr = NULL;
1078        struct ib_qp_attr *attr = NULL;
1079        struct ib_pd *pd;
1080        struct ib_cq *cq;
1081        struct ib_qp *qp;
1082        struct ib_cq_init_attr cq_attr = {};
1083        int ret;
1084
1085        attr = kzalloc(sizeof(*attr), GFP_KERNEL);
1086        init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
1087        if (!attr || !init_attr) {
1088                ret = -ENOMEM;
1089                goto error_0;
1090        }
1091
1092        pd = ib_alloc_pd(&dev->ib_dev);
1093        if (IS_ERR(pd)) {
1094                mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
1095                ret = PTR_ERR(pd);
1096                goto error_0;
1097        }
1098
1099        cq_attr.cqe = 128;
1100        cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL,
1101                          &cq_attr);
1102        if (IS_ERR(cq)) {
1103                mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
1104                ret = PTR_ERR(cq);
1105                goto error_2;
1106        }
1107        ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
1108
1109        init_attr->send_cq = cq;
1110        init_attr->recv_cq = cq;
1111        init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
1112        init_attr->cap.max_send_wr = MAX_UMR_WR;
1113        init_attr->cap.max_send_sge = 1;
1114        init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
1115        init_attr->port_num = 1;
1116        qp = mlx5_ib_create_qp(pd, init_attr, NULL);
1117        if (IS_ERR(qp)) {
1118                mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
1119                ret = PTR_ERR(qp);
1120                goto error_3;
1121        }
1122        qp->device     = &dev->ib_dev;
1123        qp->real_qp    = qp;
1124        qp->uobject    = NULL;
1125        qp->qp_type    = MLX5_IB_QPT_REG_UMR;
1126
1127        attr->qp_state = IB_QPS_INIT;
1128        attr->port_num = 1;
1129        ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX |
1130                                IB_QP_PORT, NULL);
1131        if (ret) {
1132                mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
1133                goto error_4;
1134        }
1135
1136        memset(attr, 0, sizeof(*attr));
1137        attr->qp_state = IB_QPS_RTR;
1138        attr->path_mtu = IB_MTU_256;
1139
1140        ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
1141        if (ret) {
1142                mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
1143                goto error_4;
1144        }
1145
1146        memset(attr, 0, sizeof(*attr));
1147        attr->qp_state = IB_QPS_RTS;
1148        ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
1149        if (ret) {
1150                mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
1151                goto error_4;
1152        }
1153
1154        dev->umrc.qp = qp;
1155        dev->umrc.cq = cq;
1156        dev->umrc.pd = pd;
1157
1158        sema_init(&dev->umrc.sem, MAX_UMR_WR);
1159        ret = mlx5_mr_cache_init(dev);
1160        if (ret) {
1161                mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
1162                goto error_4;
1163        }
1164
1165        kfree(attr);
1166        kfree(init_attr);
1167
1168        return 0;
1169
1170error_4:
1171        mlx5_ib_destroy_qp(qp);
1172
1173error_3:
1174        ib_destroy_cq(cq);
1175
1176error_2:
1177        ib_dealloc_pd(pd);
1178
1179error_0:
1180        kfree(attr);
1181        kfree(init_attr);
1182        return ret;
1183}
1184
1185static int create_dev_resources(struct mlx5_ib_resources *devr)
1186{
1187        struct ib_srq_init_attr attr;
1188        struct mlx5_ib_dev *dev;
1189        struct ib_cq_init_attr cq_attr = {.cqe = 1};
1190        int ret = 0;
1191
1192        dev = container_of(devr, struct mlx5_ib_dev, devr);
1193
1194        devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
1195        if (IS_ERR(devr->p0)) {
1196                ret = PTR_ERR(devr->p0);
1197                goto error0;
1198        }
1199        devr->p0->device  = &dev->ib_dev;
1200        devr->p0->uobject = NULL;
1201        atomic_set(&devr->p0->usecnt, 0);
1202
1203        devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL);
1204        if (IS_ERR(devr->c0)) {
1205                ret = PTR_ERR(devr->c0);
1206                goto error1;
1207        }
1208        devr->c0->device        = &dev->ib_dev;
1209        devr->c0->uobject       = NULL;
1210        devr->c0->comp_handler  = NULL;
1211        devr->c0->event_handler = NULL;
1212        devr->c0->cq_context    = NULL;
1213        atomic_set(&devr->c0->usecnt, 0);
1214
1215        devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1216        if (IS_ERR(devr->x0)) {
1217                ret = PTR_ERR(devr->x0);
1218                goto error2;
1219        }
1220        devr->x0->device = &dev->ib_dev;
1221        devr->x0->inode = NULL;
1222        atomic_set(&devr->x0->usecnt, 0);
1223        mutex_init(&devr->x0->tgt_qp_mutex);
1224        INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
1225
1226        devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1227        if (IS_ERR(devr->x1)) {
1228                ret = PTR_ERR(devr->x1);
1229                goto error3;
1230        }
1231        devr->x1->device = &dev->ib_dev;
1232        devr->x1->inode = NULL;
1233        atomic_set(&devr->x1->usecnt, 0);
1234        mutex_init(&devr->x1->tgt_qp_mutex);
1235        INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
1236
1237        memset(&attr, 0, sizeof(attr));
1238        attr.attr.max_sge = 1;
1239        attr.attr.max_wr = 1;
1240        attr.srq_type = IB_SRQT_XRC;
1241        attr.ext.xrc.cq = devr->c0;
1242        attr.ext.xrc.xrcd = devr->x0;
1243
1244        devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1245        if (IS_ERR(devr->s0)) {
1246                ret = PTR_ERR(devr->s0);
1247                goto error4;
1248        }
1249        devr->s0->device        = &dev->ib_dev;
1250        devr->s0->pd            = devr->p0;
1251        devr->s0->uobject       = NULL;
1252        devr->s0->event_handler = NULL;
1253        devr->s0->srq_context   = NULL;
1254        devr->s0->srq_type      = IB_SRQT_XRC;
1255        devr->s0->ext.xrc.xrcd  = devr->x0;
1256        devr->s0->ext.xrc.cq    = devr->c0;
1257        atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
1258        atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
1259        atomic_inc(&devr->p0->usecnt);
1260        atomic_set(&devr->s0->usecnt, 0);
1261
1262        memset(&attr, 0, sizeof(attr));
1263        attr.attr.max_sge = 1;
1264        attr.attr.max_wr = 1;
1265        attr.srq_type = IB_SRQT_BASIC;
1266        devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1267        if (IS_ERR(devr->s1)) {
1268                ret = PTR_ERR(devr->s1);
1269                goto error5;
1270        }
1271        devr->s1->device        = &dev->ib_dev;
1272        devr->s1->pd            = devr->p0;
1273        devr->s1->uobject       = NULL;
1274        devr->s1->event_handler = NULL;
1275        devr->s1->srq_context   = NULL;
1276        devr->s1->srq_type      = IB_SRQT_BASIC;
1277        devr->s1->ext.xrc.cq    = devr->c0;
1278        atomic_inc(&devr->p0->usecnt);
1279        atomic_set(&devr->s0->usecnt, 0);
1280
1281        return 0;
1282
1283error5:
1284        mlx5_ib_destroy_srq(devr->s0);
1285error4:
1286        mlx5_ib_dealloc_xrcd(devr->x1);
1287error3:
1288        mlx5_ib_dealloc_xrcd(devr->x0);
1289error2:
1290        mlx5_ib_destroy_cq(devr->c0);
1291error1:
1292        mlx5_ib_dealloc_pd(devr->p0);
1293error0:
1294        return ret;
1295}
1296
1297static void destroy_dev_resources(struct mlx5_ib_resources *devr)
1298{
1299        mlx5_ib_destroy_srq(devr->s1);
1300        mlx5_ib_destroy_srq(devr->s0);
1301        mlx5_ib_dealloc_xrcd(devr->x0);
1302        mlx5_ib_dealloc_xrcd(devr->x1);
1303        mlx5_ib_destroy_cq(devr->c0);
1304        mlx5_ib_dealloc_pd(devr->p0);
1305}
1306
1307static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
1308                               struct ib_port_immutable *immutable)
1309{
1310        struct ib_port_attr attr;
1311        int err;
1312
1313        err = mlx5_ib_query_port(ibdev, port_num, &attr);
1314        if (err)
1315                return err;
1316
1317        immutable->pkey_tbl_len = attr.pkey_tbl_len;
1318        immutable->gid_tbl_len = attr.gid_tbl_len;
1319        immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
1320        immutable->max_mad_size = IB_MGMT_MAD_SIZE;
1321
1322        return 0;
1323}
1324
1325static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
1326{
1327        struct mlx5_ib_dev *dev;
1328        int err;
1329        int i;
1330
1331        /* don't create IB instance over Eth ports, no RoCE yet! */
1332        if (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH)
1333                return NULL;
1334
1335        printk_once(KERN_INFO "%s", mlx5_version);
1336
1337        dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
1338        if (!dev)
1339                return NULL;
1340
1341        dev->mdev = mdev;
1342
1343        err = get_port_caps(dev);
1344        if (err)
1345                goto err_dealloc;
1346
1347        if (mlx5_use_mad_ifc(dev))
1348                get_ext_port_caps(dev);
1349
1350        MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
1351
1352        strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
1353        dev->ib_dev.owner               = THIS_MODULE;
1354        dev->ib_dev.node_type           = RDMA_NODE_IB_CA;
1355        dev->ib_dev.local_dma_lkey      = 0 /* not supported for now */;
1356        dev->num_ports          = MLX5_CAP_GEN(mdev, num_ports);
1357        dev->ib_dev.phys_port_cnt     = dev->num_ports;
1358        dev->ib_dev.num_comp_vectors    =
1359                dev->mdev->priv.eq_table.num_comp_vectors;
1360        dev->ib_dev.dma_device  = &mdev->pdev->dev;
1361
1362        dev->ib_dev.uverbs_abi_ver      = MLX5_IB_UVERBS_ABI_VERSION;
1363        dev->ib_dev.uverbs_cmd_mask     =
1364                (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
1365                (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
1366                (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
1367                (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
1368                (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
1369                (1ull << IB_USER_VERBS_CMD_REG_MR)              |
1370                (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
1371                (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
1372                (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
1373                (1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
1374                (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
1375                (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
1376                (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
1377                (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
1378                (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
1379                (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
1380                (1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
1381                (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
1382                (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
1383                (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
1384                (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
1385                (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)         |
1386                (1ull << IB_USER_VERBS_CMD_OPEN_QP);
1387        dev->ib_dev.uverbs_ex_cmd_mask =
1388                (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE);
1389
1390        dev->ib_dev.query_device        = mlx5_ib_query_device;
1391        dev->ib_dev.query_port          = mlx5_ib_query_port;
1392        dev->ib_dev.query_gid           = mlx5_ib_query_gid;
1393        dev->ib_dev.query_pkey          = mlx5_ib_query_pkey;
1394        dev->ib_dev.modify_device       = mlx5_ib_modify_device;
1395        dev->ib_dev.modify_port         = mlx5_ib_modify_port;
1396        dev->ib_dev.alloc_ucontext      = mlx5_ib_alloc_ucontext;
1397        dev->ib_dev.dealloc_ucontext    = mlx5_ib_dealloc_ucontext;
1398        dev->ib_dev.mmap                = mlx5_ib_mmap;
1399        dev->ib_dev.alloc_pd            = mlx5_ib_alloc_pd;
1400        dev->ib_dev.dealloc_pd          = mlx5_ib_dealloc_pd;
1401        dev->ib_dev.create_ah           = mlx5_ib_create_ah;
1402        dev->ib_dev.query_ah            = mlx5_ib_query_ah;
1403        dev->ib_dev.destroy_ah          = mlx5_ib_destroy_ah;
1404        dev->ib_dev.create_srq          = mlx5_ib_create_srq;
1405        dev->ib_dev.modify_srq          = mlx5_ib_modify_srq;
1406        dev->ib_dev.query_srq           = mlx5_ib_query_srq;
1407        dev->ib_dev.destroy_srq         = mlx5_ib_destroy_srq;
1408        dev->ib_dev.post_srq_recv       = mlx5_ib_post_srq_recv;
1409        dev->ib_dev.create_qp           = mlx5_ib_create_qp;
1410        dev->ib_dev.modify_qp           = mlx5_ib_modify_qp;
1411        dev->ib_dev.query_qp            = mlx5_ib_query_qp;
1412        dev->ib_dev.destroy_qp          = mlx5_ib_destroy_qp;
1413        dev->ib_dev.post_send           = mlx5_ib_post_send;
1414        dev->ib_dev.post_recv           = mlx5_ib_post_recv;
1415        dev->ib_dev.create_cq           = mlx5_ib_create_cq;
1416        dev->ib_dev.modify_cq           = mlx5_ib_modify_cq;
1417        dev->ib_dev.resize_cq           = mlx5_ib_resize_cq;
1418        dev->ib_dev.destroy_cq          = mlx5_ib_destroy_cq;
1419        dev->ib_dev.poll_cq             = mlx5_ib_poll_cq;
1420        dev->ib_dev.req_notify_cq       = mlx5_ib_arm_cq;
1421        dev->ib_dev.get_dma_mr          = mlx5_ib_get_dma_mr;
1422        dev->ib_dev.reg_user_mr         = mlx5_ib_reg_user_mr;
1423        dev->ib_dev.dereg_mr            = mlx5_ib_dereg_mr;
1424        dev->ib_dev.attach_mcast        = mlx5_ib_mcg_attach;
1425        dev->ib_dev.detach_mcast        = mlx5_ib_mcg_detach;
1426        dev->ib_dev.process_mad         = mlx5_ib_process_mad;
1427        dev->ib_dev.alloc_mr            = mlx5_ib_alloc_mr;
1428        dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
1429        dev->ib_dev.free_fast_reg_page_list  = mlx5_ib_free_fast_reg_page_list;
1430        dev->ib_dev.check_mr_status     = mlx5_ib_check_mr_status;
1431        dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
1432
1433        mlx5_ib_internal_fill_odp_caps(dev);
1434
1435        if (MLX5_CAP_GEN(mdev, xrc)) {
1436                dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
1437                dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
1438                dev->ib_dev.uverbs_cmd_mask |=
1439                        (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
1440                        (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
1441        }
1442
1443        err = init_node_data(dev);
1444        if (err)
1445                goto err_dealloc;
1446
1447        mutex_init(&dev->cap_mask_mutex);
1448
1449        err = create_dev_resources(&dev->devr);
1450        if (err)
1451                goto err_dealloc;
1452
1453        err = mlx5_ib_odp_init_one(dev);
1454        if (err)
1455                goto err_rsrc;
1456
1457        err = ib_register_device(&dev->ib_dev, NULL);
1458        if (err)
1459                goto err_odp;
1460
1461        err = create_umr_res(dev);
1462        if (err)
1463                goto err_dev;
1464
1465        for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
1466                err = device_create_file(&dev->ib_dev.dev,
1467                                         mlx5_class_attributes[i]);
1468                if (err)
1469                        goto err_umrc;
1470        }
1471
1472        dev->ib_active = true;
1473
1474        return dev;
1475
1476err_umrc:
1477        destroy_umrc_res(dev);
1478
1479err_dev:
1480        ib_unregister_device(&dev->ib_dev);
1481
1482err_odp:
1483        mlx5_ib_odp_remove_one(dev);
1484
1485err_rsrc:
1486        destroy_dev_resources(&dev->devr);
1487
1488err_dealloc:
1489        ib_dealloc_device((struct ib_device *)dev);
1490
1491        return NULL;
1492}
1493
1494static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
1495{
1496        struct mlx5_ib_dev *dev = context;
1497
1498        ib_unregister_device(&dev->ib_dev);
1499        destroy_umrc_res(dev);
1500        mlx5_ib_odp_remove_one(dev);
1501        destroy_dev_resources(&dev->devr);
1502        ib_dealloc_device(&dev->ib_dev);
1503}
1504
1505static struct mlx5_interface mlx5_ib_interface = {
1506        .add            = mlx5_ib_add,
1507        .remove         = mlx5_ib_remove,
1508        .event          = mlx5_ib_event,
1509        .protocol       = MLX5_INTERFACE_PROTOCOL_IB,
1510};
1511
1512static int __init mlx5_ib_init(void)
1513{
1514        int err;
1515
1516        if (deprecated_prof_sel != 2)
1517                pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
1518
1519        err = mlx5_ib_odp_init();
1520        if (err)
1521                return err;
1522
1523        err = mlx5_register_interface(&mlx5_ib_interface);
1524        if (err)
1525                goto clean_odp;
1526
1527        return err;
1528
1529clean_odp:
1530        mlx5_ib_odp_cleanup();
1531        return err;
1532}
1533
1534static void __exit mlx5_ib_cleanup(void)
1535{
1536        mlx5_unregister_interface(&mlx5_ib_interface);
1537        mlx5_ib_odp_cleanup();
1538}
1539
1540module_init(mlx5_ib_init);
1541module_exit(mlx5_ib_cleanup);
1542