linux/drivers/infiniband/core/nldev.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
   3 *
   4 * Redistribution and use in source and binary forms, with or without
   5 * modification, are permitted provided that the following conditions are met:
   6 *
   7 * 1. Redistributions of source code must retain the above copyright
   8 *    notice, this list of conditions and the following disclaimer.
   9 * 2. Redistributions in binary form must reproduce the above copyright
  10 *    notice, this list of conditions and the following disclaimer in the
  11 *    documentation and/or other materials provided with the distribution.
  12 * 3. Neither the names of the copyright holders nor the names of its
  13 *    contributors may be used to endorse or promote products derived from
  14 *    this software without specific prior written permission.
  15 *
  16 * Alternatively, this software may be distributed under the terms of the
  17 * GNU General Public License ("GPL") version 2 as published by the Free
  18 * Software Foundation.
  19 *
  20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  30 * POSSIBILITY OF SUCH DAMAGE.
  31 */
  32
  33#include <linux/module.h>
  34#include <linux/pid.h>
  35#include <linux/pid_namespace.h>
  36#include <net/netlink.h>
  37#include <rdma/rdma_cm.h>
  38#include <rdma/rdma_netlink.h>
  39
  40#include "core_priv.h"
  41#include "cma_priv.h"
  42
  43static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
  44        [RDMA_NLDEV_ATTR_DEV_INDEX]     = { .type = NLA_U32 },
  45        [RDMA_NLDEV_ATTR_DEV_NAME]      = { .type = NLA_NUL_STRING,
  46                                            .len = IB_DEVICE_NAME_MAX - 1},
  47        [RDMA_NLDEV_ATTR_PORT_INDEX]    = { .type = NLA_U32 },
  48        [RDMA_NLDEV_ATTR_FW_VERSION]    = { .type = NLA_NUL_STRING,
  49                                            .len = IB_FW_VERSION_NAME_MAX - 1},
  50        [RDMA_NLDEV_ATTR_NODE_GUID]     = { .type = NLA_U64 },
  51        [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 },
  52        [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 },
  53        [RDMA_NLDEV_ATTR_LID]           = { .type = NLA_U32 },
  54        [RDMA_NLDEV_ATTR_SM_LID]        = { .type = NLA_U32 },
  55        [RDMA_NLDEV_ATTR_LMC]           = { .type = NLA_U8 },
  56        [RDMA_NLDEV_ATTR_PORT_STATE]    = { .type = NLA_U8 },
  57        [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 },
  58        [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
  59        [RDMA_NLDEV_ATTR_RES_SUMMARY]   = { .type = NLA_NESTED },
  60        [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY]     = { .type = NLA_NESTED },
  61        [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME] = { .type = NLA_NUL_STRING,
  62                                             .len = 16 },
  63        [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR] = { .type = NLA_U64 },
  64        [RDMA_NLDEV_ATTR_RES_QP]                = { .type = NLA_NESTED },
  65        [RDMA_NLDEV_ATTR_RES_QP_ENTRY]          = { .type = NLA_NESTED },
  66        [RDMA_NLDEV_ATTR_RES_LQPN]              = { .type = NLA_U32 },
  67        [RDMA_NLDEV_ATTR_RES_RQPN]              = { .type = NLA_U32 },
  68        [RDMA_NLDEV_ATTR_RES_RQ_PSN]            = { .type = NLA_U32 },
  69        [RDMA_NLDEV_ATTR_RES_SQ_PSN]            = { .type = NLA_U32 },
  70        [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 },
  71        [RDMA_NLDEV_ATTR_RES_TYPE]              = { .type = NLA_U8 },
  72        [RDMA_NLDEV_ATTR_RES_STATE]             = { .type = NLA_U8 },
  73        [RDMA_NLDEV_ATTR_RES_PID]               = { .type = NLA_U32 },
  74        [RDMA_NLDEV_ATTR_RES_KERN_NAME]         = { .type = NLA_NUL_STRING,
  75                                                    .len = TASK_COMM_LEN },
  76        [RDMA_NLDEV_ATTR_RES_CM_ID]             = { .type = NLA_NESTED },
  77        [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]       = { .type = NLA_NESTED },
  78        [RDMA_NLDEV_ATTR_RES_PS]                = { .type = NLA_U32 },
  79        [RDMA_NLDEV_ATTR_RES_SRC_ADDR]  = {
  80                        .len = sizeof(struct __kernel_sockaddr_storage) },
  81        [RDMA_NLDEV_ATTR_RES_DST_ADDR]  = {
  82                        .len = sizeof(struct __kernel_sockaddr_storage) },
  83        [RDMA_NLDEV_ATTR_RES_CQ]                = { .type = NLA_NESTED },
  84        [RDMA_NLDEV_ATTR_RES_CQ_ENTRY]          = { .type = NLA_NESTED },
  85        [RDMA_NLDEV_ATTR_RES_CQE]               = { .type = NLA_U32 },
  86        [RDMA_NLDEV_ATTR_RES_USECNT]            = { .type = NLA_U64 },
  87        [RDMA_NLDEV_ATTR_RES_POLL_CTX]          = { .type = NLA_U8 },
  88        [RDMA_NLDEV_ATTR_RES_MR]                = { .type = NLA_NESTED },
  89        [RDMA_NLDEV_ATTR_RES_MR_ENTRY]          = { .type = NLA_NESTED },
  90        [RDMA_NLDEV_ATTR_RES_RKEY]              = { .type = NLA_U32 },
  91        [RDMA_NLDEV_ATTR_RES_LKEY]              = { .type = NLA_U32 },
  92        [RDMA_NLDEV_ATTR_RES_IOVA]              = { .type = NLA_U64 },
  93        [RDMA_NLDEV_ATTR_RES_MRLEN]             = { .type = NLA_U64 },
  94        [RDMA_NLDEV_ATTR_RES_PD]                = { .type = NLA_NESTED },
  95        [RDMA_NLDEV_ATTR_RES_PD_ENTRY]          = { .type = NLA_NESTED },
  96        [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY]    = { .type = NLA_U32 },
  97        [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY] = { .type = NLA_U32 },
  98        [RDMA_NLDEV_ATTR_NDEV_INDEX]            = { .type = NLA_U32 },
  99        [RDMA_NLDEV_ATTR_NDEV_NAME]             = { .type = NLA_NUL_STRING,
 100                                                    .len = IFNAMSIZ },
 101};
 102
 103static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
 104{
 105        if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
 106                return -EMSGSIZE;
 107        if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name))
 108                return -EMSGSIZE;
 109
 110        return 0;
 111}
 112
 113static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
 114{
 115        char fw[IB_FW_VERSION_NAME_MAX];
 116
 117        if (fill_nldev_handle(msg, device))
 118                return -EMSGSIZE;
 119
 120        if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
 121                return -EMSGSIZE;
 122
 123        BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
 124        if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
 125                              device->attrs.device_cap_flags, 0))
 126                return -EMSGSIZE;
 127
 128        ib_get_device_fw_str(device, fw);
 129        /* Device without FW has strlen(fw) = 0 */
 130        if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
 131                return -EMSGSIZE;
 132
 133        if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
 134                              be64_to_cpu(device->node_guid), 0))
 135                return -EMSGSIZE;
 136        if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
 137                              be64_to_cpu(device->attrs.sys_image_guid), 0))
 138                return -EMSGSIZE;
 139        if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
 140                return -EMSGSIZE;
 141        return 0;
 142}
 143
 144static int fill_port_info(struct sk_buff *msg,
 145                          struct ib_device *device, u32 port,
 146                          const struct net *net)
 147{
 148        struct net_device *netdev = NULL;
 149        struct ib_port_attr attr;
 150        int ret;
 151
 152        if (fill_nldev_handle(msg, device))
 153                return -EMSGSIZE;
 154
 155        if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
 156                return -EMSGSIZE;
 157
 158        ret = ib_query_port(device, port, &attr);
 159        if (ret)
 160                return ret;
 161
 162        BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64));
 163        if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
 164                              (u64)attr.port_cap_flags, 0))
 165                return -EMSGSIZE;
 166        if (rdma_protocol_ib(device, port) &&
 167            nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
 168                              attr.subnet_prefix, 0))
 169                return -EMSGSIZE;
 170        if (rdma_protocol_ib(device, port)) {
 171                if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
 172                        return -EMSGSIZE;
 173                if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
 174                        return -EMSGSIZE;
 175                if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
 176                        return -EMSGSIZE;
 177        }
 178        if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
 179                return -EMSGSIZE;
 180        if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
 181                return -EMSGSIZE;
 182
 183        if (device->get_netdev)
 184                netdev = device->get_netdev(device, port);
 185
 186        if (netdev && net_eq(dev_net(netdev), net)) {
 187                ret = nla_put_u32(msg,
 188                                  RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
 189                if (ret)
 190                        goto out;
 191                ret = nla_put_string(msg,
 192                                     RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
 193        }
 194
 195out:
 196        if (netdev)
 197                dev_put(netdev);
 198        return ret;
 199}
 200
 201static int fill_res_info_entry(struct sk_buff *msg,
 202                               const char *name, u64 curr)
 203{
 204        struct nlattr *entry_attr;
 205
 206        entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
 207        if (!entry_attr)
 208                return -EMSGSIZE;
 209
 210        if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
 211                goto err;
 212        if (nla_put_u64_64bit(msg,
 213                              RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr, 0))
 214                goto err;
 215
 216        nla_nest_end(msg, entry_attr);
 217        return 0;
 218
 219err:
 220        nla_nest_cancel(msg, entry_attr);
 221        return -EMSGSIZE;
 222}
 223
 224static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
 225{
 226        static const char * const names[RDMA_RESTRACK_MAX] = {
 227                [RDMA_RESTRACK_PD] = "pd",
 228                [RDMA_RESTRACK_CQ] = "cq",
 229                [RDMA_RESTRACK_QP] = "qp",
 230                [RDMA_RESTRACK_CM_ID] = "cm_id",
 231                [RDMA_RESTRACK_MR] = "mr",
 232        };
 233
 234        struct rdma_restrack_root *res = &device->res;
 235        struct nlattr *table_attr;
 236        int ret, i, curr;
 237
 238        if (fill_nldev_handle(msg, device))
 239                return -EMSGSIZE;
 240
 241        table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
 242        if (!table_attr)
 243                return -EMSGSIZE;
 244
 245        for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
 246                if (!names[i])
 247                        continue;
 248                curr = rdma_restrack_count(res, i, task_active_pid_ns(current));
 249                ret = fill_res_info_entry(msg, names[i], curr);
 250                if (ret)
 251                        goto err;
 252        }
 253
 254        nla_nest_end(msg, table_attr);
 255        return 0;
 256
 257err:
 258        nla_nest_cancel(msg, table_attr);
 259        return ret;
 260}
 261
 262static int fill_res_name_pid(struct sk_buff *msg,
 263                             struct rdma_restrack_entry *res)
 264{
 265        /*
 266         * For user resources, user is should read /proc/PID/comm to get the
 267         * name of the task file.
 268         */
 269        if (rdma_is_kernel_res(res)) {
 270                if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
 271                    res->kern_name))
 272                        return -EMSGSIZE;
 273        } else {
 274                if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID,
 275                    task_pid_vnr(res->task)))
 276                        return -EMSGSIZE;
 277        }
 278        return 0;
 279}
 280
 281static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb,
 282                             struct rdma_restrack_entry *res, uint32_t port)
 283{
 284        struct ib_qp *qp = container_of(res, struct ib_qp, res);
 285        struct ib_qp_init_attr qp_init_attr;
 286        struct nlattr *entry_attr;
 287        struct ib_qp_attr qp_attr;
 288        int ret;
 289
 290        ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
 291        if (ret)
 292                return ret;
 293
 294        if (port && port != qp_attr.port_num)
 295                return 0;
 296
 297        entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
 298        if (!entry_attr)
 299                goto out;
 300
 301        /* In create_qp() port is not set yet */
 302        if (qp_attr.port_num &&
 303            nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
 304                goto err;
 305
 306        if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
 307                goto err;
 308        if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
 309                if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
 310                                qp_attr.dest_qp_num))
 311                        goto err;
 312                if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
 313                                qp_attr.rq_psn))
 314                        goto err;
 315        }
 316
 317        if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
 318                goto err;
 319
 320        if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
 321            qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
 322                if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
 323                               qp_attr.path_mig_state))
 324                        goto err;
 325        }
 326        if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
 327                goto err;
 328        if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
 329                goto err;
 330
 331        if (fill_res_name_pid(msg, res))
 332                goto err;
 333
 334        nla_nest_end(msg, entry_attr);
 335        return 0;
 336
 337err:
 338        nla_nest_cancel(msg, entry_attr);
 339out:
 340        return -EMSGSIZE;
 341}
 342
 343static int fill_res_cm_id_entry(struct sk_buff *msg,
 344                                struct netlink_callback *cb,
 345                                struct rdma_restrack_entry *res, uint32_t port)
 346{
 347        struct rdma_id_private *id_priv =
 348                                container_of(res, struct rdma_id_private, res);
 349        struct rdma_cm_id *cm_id = &id_priv->id;
 350        struct nlattr *entry_attr;
 351
 352        if (port && port != cm_id->port_num)
 353                return 0;
 354
 355        entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY);
 356        if (!entry_attr)
 357                goto out;
 358
 359        if (cm_id->port_num &&
 360            nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
 361                goto err;
 362
 363        if (id_priv->qp_num) {
 364                if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
 365                        goto err;
 366                if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
 367                        goto err;
 368        }
 369
 370        if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
 371                goto err;
 372
 373        if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
 374                goto err;
 375
 376        if (cm_id->route.addr.src_addr.ss_family &&
 377            nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
 378                    sizeof(cm_id->route.addr.src_addr),
 379                    &cm_id->route.addr.src_addr))
 380                goto err;
 381        if (cm_id->route.addr.dst_addr.ss_family &&
 382            nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
 383                    sizeof(cm_id->route.addr.dst_addr),
 384                    &cm_id->route.addr.dst_addr))
 385                goto err;
 386
 387        if (fill_res_name_pid(msg, res))
 388                goto err;
 389
 390        nla_nest_end(msg, entry_attr);
 391        return 0;
 392
 393err:
 394        nla_nest_cancel(msg, entry_attr);
 395out:
 396        return -EMSGSIZE;
 397}
 398
 399static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb,
 400                             struct rdma_restrack_entry *res, uint32_t port)
 401{
 402        struct ib_cq *cq = container_of(res, struct ib_cq, res);
 403        struct nlattr *entry_attr;
 404
 405        entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CQ_ENTRY);
 406        if (!entry_attr)
 407                goto out;
 408
 409        if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
 410                goto err;
 411        if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
 412                              atomic_read(&cq->usecnt), 0))
 413                goto err;
 414
 415        /* Poll context is only valid for kernel CQs */
 416        if (rdma_is_kernel_res(res) &&
 417            nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
 418                goto err;
 419
 420        if (fill_res_name_pid(msg, res))
 421                goto err;
 422
 423        nla_nest_end(msg, entry_attr);
 424        return 0;
 425
 426err:
 427        nla_nest_cancel(msg, entry_attr);
 428out:
 429        return -EMSGSIZE;
 430}
 431
 432static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb,
 433                             struct rdma_restrack_entry *res, uint32_t port)
 434{
 435        struct ib_mr *mr = container_of(res, struct ib_mr, res);
 436        struct nlattr *entry_attr;
 437
 438        entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_MR_ENTRY);
 439        if (!entry_attr)
 440                goto out;
 441
 442        if (netlink_capable(cb->skb, CAP_NET_ADMIN)) {
 443                if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
 444                        goto err;
 445                if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
 446                        goto err;
 447                if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_IOVA,
 448                                      mr->iova, 0))
 449                        goto err;
 450        }
 451
 452        if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, 0))
 453                goto err;
 454
 455        if (fill_res_name_pid(msg, res))
 456                goto err;
 457
 458        nla_nest_end(msg, entry_attr);
 459        return 0;
 460
 461err:
 462        nla_nest_cancel(msg, entry_attr);
 463out:
 464        return -EMSGSIZE;
 465}
 466
 467static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb,
 468                             struct rdma_restrack_entry *res, uint32_t port)
 469{
 470        struct ib_pd *pd = container_of(res, struct ib_pd, res);
 471        struct nlattr *entry_attr;
 472
 473        entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_PD_ENTRY);
 474        if (!entry_attr)
 475                goto out;
 476
 477        if (netlink_capable(cb->skb, CAP_NET_ADMIN)) {
 478                if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
 479                                pd->local_dma_lkey))
 480                        goto err;
 481                if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
 482                    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
 483                                pd->unsafe_global_rkey))
 484                        goto err;
 485        }
 486        if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
 487                              atomic_read(&pd->usecnt), 0))
 488                goto err;
 489        if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
 490            nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
 491                        pd->unsafe_global_rkey))
 492                goto err;
 493
 494        if (fill_res_name_pid(msg, res))
 495                goto err;
 496
 497        nla_nest_end(msg, entry_attr);
 498        return 0;
 499
 500err:
 501        nla_nest_cancel(msg, entry_attr);
 502out:
 503        return -EMSGSIZE;
 504}
 505
 506static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 507                          struct netlink_ext_ack *extack)
 508{
 509        struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
 510        struct ib_device *device;
 511        struct sk_buff *msg;
 512        u32 index;
 513        int err;
 514
 515        err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
 516                          nldev_policy, extack);
 517        if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
 518                return -EINVAL;
 519
 520        index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
 521
 522        device = ib_device_get_by_index(index);
 523        if (!device)
 524                return -EINVAL;
 525
 526        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 527        if (!msg) {
 528                err = -ENOMEM;
 529                goto err;
 530        }
 531
 532        nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
 533                        RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
 534                        0, 0);
 535
 536        err = fill_dev_info(msg, device);
 537        if (err)
 538                goto err_free;
 539
 540        nlmsg_end(msg, nlh);
 541
 542        put_device(&device->dev);
 543        return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
 544
 545err_free:
 546        nlmsg_free(msg);
 547err:
 548        put_device(&device->dev);
 549        return err;
 550}
 551
 552static int _nldev_get_dumpit(struct ib_device *device,
 553                             struct sk_buff *skb,
 554                             struct netlink_callback *cb,
 555                             unsigned int idx)
 556{
 557        int start = cb->args[0];
 558        struct nlmsghdr *nlh;
 559
 560        if (idx < start)
 561                return 0;
 562
 563        nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 564                        RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
 565                        0, NLM_F_MULTI);
 566
 567        if (fill_dev_info(skb, device)) {
 568                nlmsg_cancel(skb, nlh);
 569                goto out;
 570        }
 571
 572        nlmsg_end(skb, nlh);
 573
 574        idx++;
 575
 576out:    cb->args[0] = idx;
 577        return skb->len;
 578}
 579
 580static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
 581{
 582        /*
 583         * There is no need to take lock, because
 584         * we are relying on ib_core's lists_rwsem
 585         */
 586        return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
 587}
 588
 589static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 590                               struct netlink_ext_ack *extack)
 591{
 592        struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
 593        struct ib_device *device;
 594        struct sk_buff *msg;
 595        u32 index;
 596        u32 port;
 597        int err;
 598
 599        err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
 600                          nldev_policy, extack);
 601        if (err ||
 602            !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
 603            !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
 604                return -EINVAL;
 605
 606        index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
 607        device = ib_device_get_by_index(index);
 608        if (!device)
 609                return -EINVAL;
 610
 611        port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
 612        if (!rdma_is_port_valid(device, port)) {
 613                err = -EINVAL;
 614                goto err;
 615        }
 616
 617        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 618        if (!msg) {
 619                err = -ENOMEM;
 620                goto err;
 621        }
 622
 623        nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
 624                        RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
 625                        0, 0);
 626
 627        err = fill_port_info(msg, device, port, sock_net(skb->sk));
 628        if (err)
 629                goto err_free;
 630
 631        nlmsg_end(msg, nlh);
 632        put_device(&device->dev);
 633
 634        return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
 635
 636err_free:
 637        nlmsg_free(msg);
 638err:
 639        put_device(&device->dev);
 640        return err;
 641}
 642
 643static int nldev_port_get_dumpit(struct sk_buff *skb,
 644                                 struct netlink_callback *cb)
 645{
 646        struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
 647        struct ib_device *device;
 648        int start = cb->args[0];
 649        struct nlmsghdr *nlh;
 650        u32 idx = 0;
 651        u32 ifindex;
 652        int err;
 653        u32 p;
 654
 655        err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
 656                          nldev_policy, NULL);
 657        if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
 658                return -EINVAL;
 659
 660        ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
 661        device = ib_device_get_by_index(ifindex);
 662        if (!device)
 663                return -EINVAL;
 664
 665        for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
 666                /*
 667                 * The dumpit function returns all information from specific
 668                 * index. This specific index is taken from the netlink
 669                 * messages request sent by user and it is available
 670                 * in cb->args[0].
 671                 *
 672                 * Usually, the user doesn't fill this field and it causes
 673                 * to return everything.
 674                 *
 675                 */
 676                if (idx < start) {
 677                        idx++;
 678                        continue;
 679                }
 680
 681                nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
 682                                cb->nlh->nlmsg_seq,
 683                                RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
 684                                                 RDMA_NLDEV_CMD_PORT_GET),
 685                                0, NLM_F_MULTI);
 686
 687                if (fill_port_info(skb, device, p, sock_net(skb->sk))) {
 688                        nlmsg_cancel(skb, nlh);
 689                        goto out;
 690                }
 691                idx++;
 692                nlmsg_end(skb, nlh);
 693        }
 694
 695out:
 696        put_device(&device->dev);
 697        cb->args[0] = idx;
 698        return skb->len;
 699}
 700
 701static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 702                              struct netlink_ext_ack *extack)
 703{
 704        struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
 705        struct ib_device *device;
 706        struct sk_buff *msg;
 707        u32 index;
 708        int ret;
 709
 710        ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
 711                          nldev_policy, extack);
 712        if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
 713                return -EINVAL;
 714
 715        index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
 716        device = ib_device_get_by_index(index);
 717        if (!device)
 718                return -EINVAL;
 719
 720        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 721        if (!msg) {
 722                ret = -ENOMEM;
 723                goto err;
 724        }
 725
 726        nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
 727                        RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
 728                        0, 0);
 729
 730        ret = fill_res_info(msg, device);
 731        if (ret)
 732                goto err_free;
 733
 734        nlmsg_end(msg, nlh);
 735        put_device(&device->dev);
 736        return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
 737
 738err_free:
 739        nlmsg_free(msg);
 740err:
 741        put_device(&device->dev);
 742        return ret;
 743}
 744
 745static int _nldev_res_get_dumpit(struct ib_device *device,
 746                                 struct sk_buff *skb,
 747                                 struct netlink_callback *cb,
 748                                 unsigned int idx)
 749{
 750        int start = cb->args[0];
 751        struct nlmsghdr *nlh;
 752
 753        if (idx < start)
 754                return 0;
 755
 756        nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 757                        RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
 758                        0, NLM_F_MULTI);
 759
 760        if (fill_res_info(skb, device)) {
 761                nlmsg_cancel(skb, nlh);
 762                goto out;
 763        }
 764
 765        nlmsg_end(skb, nlh);
 766
 767        idx++;
 768
 769out:
 770        cb->args[0] = idx;
 771        return skb->len;
 772}
 773
 774static int nldev_res_get_dumpit(struct sk_buff *skb,
 775                                struct netlink_callback *cb)
 776{
 777        return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
 778}
 779
 780struct nldev_fill_res_entry {
 781        int (*fill_res_func)(struct sk_buff *msg, struct netlink_callback *cb,
 782                             struct rdma_restrack_entry *res, u32 port);
 783        enum rdma_nldev_attr nldev_attr;
 784        enum rdma_nldev_command nldev_cmd;
 785};
 786
 787static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
 788        [RDMA_RESTRACK_QP] = {
 789                .fill_res_func = fill_res_qp_entry,
 790                .nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
 791                .nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
 792        },
 793        [RDMA_RESTRACK_CM_ID] = {
 794                .fill_res_func = fill_res_cm_id_entry,
 795                .nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
 796                .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
 797        },
 798        [RDMA_RESTRACK_CQ] = {
 799                .fill_res_func = fill_res_cq_entry,
 800                .nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
 801                .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
 802        },
 803        [RDMA_RESTRACK_MR] = {
 804                .fill_res_func = fill_res_mr_entry,
 805                .nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
 806                .nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
 807        },
 808        [RDMA_RESTRACK_PD] = {
 809                .fill_res_func = fill_res_pd_entry,
 810                .nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
 811                .nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
 812        },
 813};
 814
 815static int res_get_common_dumpit(struct sk_buff *skb,
 816                                 struct netlink_callback *cb,
 817                                 enum rdma_restrack_type res_type)
 818{
 819        const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
 820        struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
 821        struct rdma_restrack_entry *res;
 822        int err, ret = 0, idx = 0;
 823        struct nlattr *table_attr;
 824        struct ib_device *device;
 825        int start = cb->args[0];
 826        struct nlmsghdr *nlh;
 827        u32 index, port = 0;
 828        bool filled = false;
 829
 830        err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
 831                          nldev_policy, NULL);
 832        /*
 833         * Right now, we are expecting the device index to get res information,
 834         * but it is possible to extend this code to return all devices in
 835         * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
 836         * if it doesn't exist, we will iterate over all devices.
 837         *
 838         * But it is not needed for now.
 839         */
 840        if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
 841                return -EINVAL;
 842
 843        index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
 844        device = ib_device_get_by_index(index);
 845        if (!device)
 846                return -EINVAL;
 847
 848        /*
 849         * If no PORT_INDEX is supplied, we will return all QPs from that device
 850         */
 851        if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
 852                port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
 853                if (!rdma_is_port_valid(device, port)) {
 854                        ret = -EINVAL;
 855                        goto err_index;
 856                }
 857        }
 858
 859        nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 860                        RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
 861                        0, NLM_F_MULTI);
 862
 863        if (fill_nldev_handle(skb, device)) {
 864                ret = -EMSGSIZE;
 865                goto err;
 866        }
 867
 868        table_attr = nla_nest_start(skb, fe->nldev_attr);
 869        if (!table_attr) {
 870                ret = -EMSGSIZE;
 871                goto err;
 872        }
 873
 874        down_read(&device->res.rwsem);
 875        hash_for_each_possible(device->res.hash, res, node, res_type) {
 876                if (idx < start)
 877                        goto next;
 878
 879                if ((rdma_is_kernel_res(res) &&
 880                     task_active_pid_ns(current) != &init_pid_ns) ||
 881                    (!rdma_is_kernel_res(res) && task_active_pid_ns(current) !=
 882                     task_active_pid_ns(res->task)))
 883                        /*
 884                         * 1. Kern resources should be visible in init
 885                         *    namspace only
 886                         * 2. Present only resources visible in the current
 887                         *    namespace
 888                         */
 889                        goto next;
 890
 891                if (!rdma_restrack_get(res))
 892                        /*
 893                         * Resource is under release now, but we are not
 894                         * relesing lock now, so it will be released in
 895                         * our next pass, once we will get ->next pointer.
 896                         */
 897                        goto next;
 898
 899                filled = true;
 900
 901                up_read(&device->res.rwsem);
 902                ret = fe->fill_res_func(skb, cb, res, port);
 903                down_read(&device->res.rwsem);
 904                /*
 905                 * Return resource back, but it won't be released till
 906                 * the &device->res.rwsem will be released for write.
 907                 */
 908                rdma_restrack_put(res);
 909
 910                if (ret == -EMSGSIZE)
 911                        /*
 912                         * There is a chance to optimize here.
 913                         * It can be done by using list_prepare_entry
 914                         * and list_for_each_entry_continue afterwards.
 915                         */
 916                        break;
 917                if (ret)
 918                        goto res_err;
 919next:           idx++;
 920        }
 921        up_read(&device->res.rwsem);
 922
 923        nla_nest_end(skb, table_attr);
 924        nlmsg_end(skb, nlh);
 925        cb->args[0] = idx;
 926
 927        /*
 928         * No more entries to fill, cancel the message and
 929         * return 0 to mark end of dumpit.
 930         */
 931        if (!filled)
 932                goto err;
 933
 934        put_device(&device->dev);
 935        return skb->len;
 936
 937res_err:
 938        nla_nest_cancel(skb, table_attr);
 939        up_read(&device->res.rwsem);
 940
 941err:
 942        nlmsg_cancel(skb, nlh);
 943
 944err_index:
 945        put_device(&device->dev);
 946        return ret;
 947}
 948
 949static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
 950                                   struct netlink_callback *cb)
 951{
 952        return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_QP);
 953}
 954
 955static int nldev_res_get_cm_id_dumpit(struct sk_buff *skb,
 956                                      struct netlink_callback *cb)
 957{
 958        return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CM_ID);
 959}
 960
 961static int nldev_res_get_cq_dumpit(struct sk_buff *skb,
 962                                   struct netlink_callback *cb)
 963{
 964        return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CQ);
 965}
 966
 967static int nldev_res_get_mr_dumpit(struct sk_buff *skb,
 968                                   struct netlink_callback *cb)
 969{
 970        return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR);
 971}
 972
 973static int nldev_res_get_pd_dumpit(struct sk_buff *skb,
 974                                   struct netlink_callback *cb)
 975{
 976        return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD);
 977}
 978
 979static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
 980        [RDMA_NLDEV_CMD_GET] = {
 981                .doit = nldev_get_doit,
 982                .dump = nldev_get_dumpit,
 983        },
 984        [RDMA_NLDEV_CMD_PORT_GET] = {
 985                .doit = nldev_port_get_doit,
 986                .dump = nldev_port_get_dumpit,
 987        },
 988        [RDMA_NLDEV_CMD_RES_GET] = {
 989                .doit = nldev_res_get_doit,
 990                .dump = nldev_res_get_dumpit,
 991        },
 992        [RDMA_NLDEV_CMD_RES_QP_GET] = {
 993                .dump = nldev_res_get_qp_dumpit,
 994                /*
 995                 * .doit is not implemented yet for two reasons:
 996                 * 1. It is not needed yet.
 997                 * 2. There is a need to provide identifier, while it is easy
 998                 * for the QPs (device index + port index + LQPN), it is not
 999                 * the case for the rest of resources (PD and CQ). Because it
1000                 * is better to provide similar interface for all resources,
1001                 * let's wait till we will have other resources implemented
1002                 * too.
1003                 */
1004        },
1005        [RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
1006                .dump = nldev_res_get_cm_id_dumpit,
1007        },
1008        [RDMA_NLDEV_CMD_RES_CQ_GET] = {
1009                .dump = nldev_res_get_cq_dumpit,
1010        },
1011        [RDMA_NLDEV_CMD_RES_MR_GET] = {
1012                .dump = nldev_res_get_mr_dumpit,
1013        },
1014        [RDMA_NLDEV_CMD_RES_PD_GET] = {
1015                .dump = nldev_res_get_pd_dumpit,
1016        },
1017};
1018
1019void __init nldev_init(void)
1020{
1021        rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
1022}
1023
1024void __exit nldev_exit(void)
1025{
1026        rdma_nl_unregister(RDMA_NL_NLDEV);
1027}
1028
1029MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);
1030