linux/drivers/infiniband/core/ucma.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *      copyright notice, this list of conditions and the following
  16 *      disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *      copyright notice, this list of conditions and the following
  20 *      disclaimer in the documentation and/or other materials
  21 *      provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include <linux/completion.h>
  34#include <linux/file.h>
  35#include <linux/mutex.h>
  36#include <linux/poll.h>
  37#include <linux/sched.h>
  38#include <linux/idr.h>
  39#include <linux/in.h>
  40#include <linux/in6.h>
  41#include <linux/miscdevice.h>
  42#include <linux/slab.h>
  43#include <linux/sysctl.h>
  44#include <linux/module.h>
  45#include <linux/nsproxy.h>
  46
  47#include <linux/nospec.h>
  48
  49#include <rdma/rdma_user_cm.h>
  50#include <rdma/ib_marshall.h>
  51#include <rdma/rdma_cm.h>
  52#include <rdma/rdma_cm_ib.h>
  53#include <rdma/ib_addr.h>
  54#include <rdma/ib.h>
  55#include <rdma/ib_cm.h>
  56#include <rdma/rdma_netlink.h>
  57#include "core_priv.h"
  58
  59MODULE_AUTHOR("Sean Hefty");
  60MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
  61MODULE_LICENSE("Dual BSD/GPL");
  62
  63static unsigned int max_backlog = 1024;
  64
  65static struct ctl_table_header *ucma_ctl_table_hdr;
  66static struct ctl_table ucma_ctl_table[] = {
  67        {
  68                .procname       = "max_backlog",
  69                .data           = &max_backlog,
  70                .maxlen         = sizeof max_backlog,
  71                .mode           = 0644,
  72                .proc_handler   = proc_dointvec,
  73        },
  74        { }
  75};
  76
  77struct ucma_file {
  78        struct mutex            mut;
  79        struct file             *filp;
  80        struct list_head        ctx_list;
  81        struct list_head        event_list;
  82        wait_queue_head_t       poll_wait;
  83};
  84
  85struct ucma_context {
  86        u32                     id;
  87        struct completion       comp;
  88        refcount_t              ref;
  89        int                     events_reported;
  90        atomic_t                backlog;
  91
  92        struct ucma_file        *file;
  93        struct rdma_cm_id       *cm_id;
  94        struct mutex            mutex;
  95        u64                     uid;
  96
  97        struct list_head        list;
  98        struct work_struct      close_work;
  99};
 100
 101struct ucma_multicast {
 102        struct ucma_context     *ctx;
 103        u32                     id;
 104        int                     events_reported;
 105
 106        u64                     uid;
 107        u8                      join_state;
 108        struct sockaddr_storage addr;
 109};
 110
 111struct ucma_event {
 112        struct ucma_context     *ctx;
 113        struct ucma_context     *conn_req_ctx;
 114        struct ucma_multicast   *mc;
 115        struct list_head        list;
 116        struct rdma_ucm_event_resp resp;
 117};
 118
 119static DEFINE_XARRAY_ALLOC(ctx_table);
 120static DEFINE_XARRAY_ALLOC(multicast_table);
 121
 122static const struct file_operations ucma_fops;
 123static int ucma_destroy_private_ctx(struct ucma_context *ctx);
 124
 125static inline struct ucma_context *_ucma_find_context(int id,
 126                                                      struct ucma_file *file)
 127{
 128        struct ucma_context *ctx;
 129
 130        ctx = xa_load(&ctx_table, id);
 131        if (!ctx)
 132                ctx = ERR_PTR(-ENOENT);
 133        else if (ctx->file != file)
 134                ctx = ERR_PTR(-EINVAL);
 135        return ctx;
 136}
 137
 138static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id)
 139{
 140        struct ucma_context *ctx;
 141
 142        xa_lock(&ctx_table);
 143        ctx = _ucma_find_context(id, file);
 144        if (!IS_ERR(ctx))
 145                if (!refcount_inc_not_zero(&ctx->ref))
 146                        ctx = ERR_PTR(-ENXIO);
 147        xa_unlock(&ctx_table);
 148        return ctx;
 149}
 150
 151static void ucma_put_ctx(struct ucma_context *ctx)
 152{
 153        if (refcount_dec_and_test(&ctx->ref))
 154                complete(&ctx->comp);
 155}
 156
 157/*
 158 * Same as ucm_get_ctx but requires that ->cm_id->device is valid, eg that the
 159 * CM_ID is bound.
 160 */
 161static struct ucma_context *ucma_get_ctx_dev(struct ucma_file *file, int id)
 162{
 163        struct ucma_context *ctx = ucma_get_ctx(file, id);
 164
 165        if (IS_ERR(ctx))
 166                return ctx;
 167        if (!ctx->cm_id->device) {
 168                ucma_put_ctx(ctx);
 169                return ERR_PTR(-EINVAL);
 170        }
 171        return ctx;
 172}
 173
 174static void ucma_close_id(struct work_struct *work)
 175{
 176        struct ucma_context *ctx =  container_of(work, struct ucma_context, close_work);
 177
 178        /* once all inflight tasks are finished, we close all underlying
 179         * resources. The context is still alive till its explicit destryoing
 180         * by its creator. This puts back the xarray's reference.
 181         */
 182        ucma_put_ctx(ctx);
 183        wait_for_completion(&ctx->comp);
 184        /* No new events will be generated after destroying the id. */
 185        rdma_destroy_id(ctx->cm_id);
 186
 187        /* Reading the cm_id without holding a positive ref is not allowed */
 188        ctx->cm_id = NULL;
 189}
 190
 191static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
 192{
 193        struct ucma_context *ctx;
 194
 195        ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 196        if (!ctx)
 197                return NULL;
 198
 199        INIT_WORK(&ctx->close_work, ucma_close_id);
 200        init_completion(&ctx->comp);
 201        /* So list_del() will work if we don't do ucma_finish_ctx() */
 202        INIT_LIST_HEAD(&ctx->list);
 203        ctx->file = file;
 204        mutex_init(&ctx->mutex);
 205
 206        if (xa_alloc(&ctx_table, &ctx->id, NULL, xa_limit_32b, GFP_KERNEL)) {
 207                kfree(ctx);
 208                return NULL;
 209        }
 210        return ctx;
 211}
 212
 213static void ucma_set_ctx_cm_id(struct ucma_context *ctx,
 214                               struct rdma_cm_id *cm_id)
 215{
 216        refcount_set(&ctx->ref, 1);
 217        ctx->cm_id = cm_id;
 218}
 219
 220static void ucma_finish_ctx(struct ucma_context *ctx)
 221{
 222        lockdep_assert_held(&ctx->file->mut);
 223        list_add_tail(&ctx->list, &ctx->file->ctx_list);
 224        xa_store(&ctx_table, ctx->id, ctx, GFP_KERNEL);
 225}
 226
 227static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
 228                                 struct rdma_conn_param *src)
 229{
 230        if (src->private_data_len)
 231                memcpy(dst->private_data, src->private_data,
 232                       src->private_data_len);
 233        dst->private_data_len = src->private_data_len;
 234        dst->responder_resources = src->responder_resources;
 235        dst->initiator_depth = src->initiator_depth;
 236        dst->flow_control = src->flow_control;
 237        dst->retry_count = src->retry_count;
 238        dst->rnr_retry_count = src->rnr_retry_count;
 239        dst->srq = src->srq;
 240        dst->qp_num = src->qp_num;
 241}
 242
 243static void ucma_copy_ud_event(struct ib_device *device,
 244                               struct rdma_ucm_ud_param *dst,
 245                               struct rdma_ud_param *src)
 246{
 247        if (src->private_data_len)
 248                memcpy(dst->private_data, src->private_data,
 249                       src->private_data_len);
 250        dst->private_data_len = src->private_data_len;
 251        ib_copy_ah_attr_to_user(device, &dst->ah_attr, &src->ah_attr);
 252        dst->qp_num = src->qp_num;
 253        dst->qkey = src->qkey;
 254}
 255
 256static struct ucma_event *ucma_create_uevent(struct ucma_context *ctx,
 257                                             struct rdma_cm_event *event)
 258{
 259        struct ucma_event *uevent;
 260
 261        uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
 262        if (!uevent)
 263                return NULL;
 264
 265        uevent->ctx = ctx;
 266        switch (event->event) {
 267        case RDMA_CM_EVENT_MULTICAST_JOIN:
 268        case RDMA_CM_EVENT_MULTICAST_ERROR:
 269                uevent->mc = (struct ucma_multicast *)
 270                             event->param.ud.private_data;
 271                uevent->resp.uid = uevent->mc->uid;
 272                uevent->resp.id = uevent->mc->id;
 273                break;
 274        default:
 275                uevent->resp.uid = ctx->uid;
 276                uevent->resp.id = ctx->id;
 277                break;
 278        }
 279        uevent->resp.event = event->event;
 280        uevent->resp.status = event->status;
 281        if (ctx->cm_id->qp_type == IB_QPT_UD)
 282                ucma_copy_ud_event(ctx->cm_id->device, &uevent->resp.param.ud,
 283                                   &event->param.ud);
 284        else
 285                ucma_copy_conn_event(&uevent->resp.param.conn,
 286                                     &event->param.conn);
 287
 288        uevent->resp.ece.vendor_id = event->ece.vendor_id;
 289        uevent->resp.ece.attr_mod = event->ece.attr_mod;
 290        return uevent;
 291}
 292
 293static int ucma_connect_event_handler(struct rdma_cm_id *cm_id,
 294                                      struct rdma_cm_event *event)
 295{
 296        struct ucma_context *listen_ctx = cm_id->context;
 297        struct ucma_context *ctx;
 298        struct ucma_event *uevent;
 299
 300        if (!atomic_add_unless(&listen_ctx->backlog, -1, 0))
 301                return -ENOMEM;
 302        ctx = ucma_alloc_ctx(listen_ctx->file);
 303        if (!ctx)
 304                goto err_backlog;
 305        ucma_set_ctx_cm_id(ctx, cm_id);
 306
 307        uevent = ucma_create_uevent(listen_ctx, event);
 308        if (!uevent)
 309                goto err_alloc;
 310        uevent->conn_req_ctx = ctx;
 311        uevent->resp.id = ctx->id;
 312
 313        ctx->cm_id->context = ctx;
 314
 315        mutex_lock(&ctx->file->mut);
 316        ucma_finish_ctx(ctx);
 317        list_add_tail(&uevent->list, &ctx->file->event_list);
 318        mutex_unlock(&ctx->file->mut);
 319        wake_up_interruptible(&ctx->file->poll_wait);
 320        return 0;
 321
 322err_alloc:
 323        ucma_destroy_private_ctx(ctx);
 324err_backlog:
 325        atomic_inc(&listen_ctx->backlog);
 326        /* Returning error causes the new ID to be destroyed */
 327        return -ENOMEM;
 328}
 329
 330static int ucma_event_handler(struct rdma_cm_id *cm_id,
 331                              struct rdma_cm_event *event)
 332{
 333        struct ucma_event *uevent;
 334        struct ucma_context *ctx = cm_id->context;
 335
 336        if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)
 337                return ucma_connect_event_handler(cm_id, event);
 338
 339        /*
 340         * We ignore events for new connections until userspace has set their
 341         * context.  This can only happen if an error occurs on a new connection
 342         * before the user accepts it.  This is okay, since the accept will just
 343         * fail later. However, we do need to release the underlying HW
 344         * resources in case of a device removal event.
 345         */
 346        if (ctx->uid) {
 347                uevent = ucma_create_uevent(ctx, event);
 348                if (!uevent)
 349                        return 0;
 350
 351                mutex_lock(&ctx->file->mut);
 352                list_add_tail(&uevent->list, &ctx->file->event_list);
 353                mutex_unlock(&ctx->file->mut);
 354                wake_up_interruptible(&ctx->file->poll_wait);
 355        }
 356
 357        if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
 358                xa_lock(&ctx_table);
 359                if (xa_load(&ctx_table, ctx->id) == ctx)
 360                        queue_work(system_unbound_wq, &ctx->close_work);
 361                xa_unlock(&ctx_table);
 362        }
 363        return 0;
 364}
 365
 366static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
 367                              int in_len, int out_len)
 368{
 369        struct rdma_ucm_get_event cmd;
 370        struct ucma_event *uevent;
 371
 372        /*
 373         * Old 32 bit user space does not send the 4 byte padding in the
 374         * reserved field. We don't care, allow it to keep working.
 375         */
 376        if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved) -
 377                              sizeof(uevent->resp.ece))
 378                return -ENOSPC;
 379
 380        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 381                return -EFAULT;
 382
 383        mutex_lock(&file->mut);
 384        while (list_empty(&file->event_list)) {
 385                mutex_unlock(&file->mut);
 386
 387                if (file->filp->f_flags & O_NONBLOCK)
 388                        return -EAGAIN;
 389
 390                if (wait_event_interruptible(file->poll_wait,
 391                                             !list_empty(&file->event_list)))
 392                        return -ERESTARTSYS;
 393
 394                mutex_lock(&file->mut);
 395        }
 396
 397        uevent = list_first_entry(&file->event_list, struct ucma_event, list);
 398
 399        if (copy_to_user(u64_to_user_ptr(cmd.response),
 400                         &uevent->resp,
 401                         min_t(size_t, out_len, sizeof(uevent->resp)))) {
 402                mutex_unlock(&file->mut);
 403                return -EFAULT;
 404        }
 405
 406        list_del(&uevent->list);
 407        uevent->ctx->events_reported++;
 408        if (uevent->mc)
 409                uevent->mc->events_reported++;
 410        if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
 411                atomic_inc(&uevent->ctx->backlog);
 412        mutex_unlock(&file->mut);
 413
 414        kfree(uevent);
 415        return 0;
 416}
 417
 418static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type)
 419{
 420        switch (cmd->ps) {
 421        case RDMA_PS_TCP:
 422                *qp_type = IB_QPT_RC;
 423                return 0;
 424        case RDMA_PS_UDP:
 425        case RDMA_PS_IPOIB:
 426                *qp_type = IB_QPT_UD;
 427                return 0;
 428        case RDMA_PS_IB:
 429                *qp_type = cmd->qp_type;
 430                return 0;
 431        default:
 432                return -EINVAL;
 433        }
 434}
 435
 436static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
 437                              int in_len, int out_len)
 438{
 439        struct rdma_ucm_create_id cmd;
 440        struct rdma_ucm_create_id_resp resp;
 441        struct ucma_context *ctx;
 442        struct rdma_cm_id *cm_id;
 443        enum ib_qp_type qp_type;
 444        int ret;
 445
 446        if (out_len < sizeof(resp))
 447                return -ENOSPC;
 448
 449        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 450                return -EFAULT;
 451
 452        ret = ucma_get_qp_type(&cmd, &qp_type);
 453        if (ret)
 454                return ret;
 455
 456        ctx = ucma_alloc_ctx(file);
 457        if (!ctx)
 458                return -ENOMEM;
 459
 460        ctx->uid = cmd.uid;
 461        cm_id = rdma_create_user_id(ucma_event_handler, ctx, cmd.ps, qp_type);
 462        if (IS_ERR(cm_id)) {
 463                ret = PTR_ERR(cm_id);
 464                goto err1;
 465        }
 466        ucma_set_ctx_cm_id(ctx, cm_id);
 467
 468        resp.id = ctx->id;
 469        if (copy_to_user(u64_to_user_ptr(cmd.response),
 470                         &resp, sizeof(resp))) {
 471                ret = -EFAULT;
 472                goto err1;
 473        }
 474
 475        mutex_lock(&file->mut);
 476        ucma_finish_ctx(ctx);
 477        mutex_unlock(&file->mut);
 478        return 0;
 479
 480err1:
 481        ucma_destroy_private_ctx(ctx);
 482        return ret;
 483}
 484
 485static void ucma_cleanup_multicast(struct ucma_context *ctx)
 486{
 487        struct ucma_multicast *mc;
 488        unsigned long index;
 489
 490        xa_for_each(&multicast_table, index, mc) {
 491                if (mc->ctx != ctx)
 492                        continue;
 493                /*
 494                 * At this point mc->ctx->ref is 0 so the mc cannot leave the
 495                 * lock on the reader and this is enough serialization
 496                 */
 497                xa_erase(&multicast_table, index);
 498                kfree(mc);
 499        }
 500}
 501
 502static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
 503{
 504        struct ucma_event *uevent, *tmp;
 505
 506        rdma_lock_handler(mc->ctx->cm_id);
 507        mutex_lock(&mc->ctx->file->mut);
 508        list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) {
 509                if (uevent->mc != mc)
 510                        continue;
 511
 512                list_del(&uevent->list);
 513                kfree(uevent);
 514        }
 515        mutex_unlock(&mc->ctx->file->mut);
 516        rdma_unlock_handler(mc->ctx->cm_id);
 517}
 518
 519static int ucma_cleanup_ctx_events(struct ucma_context *ctx)
 520{
 521        int events_reported;
 522        struct ucma_event *uevent, *tmp;
 523        LIST_HEAD(list);
 524
 525        /* Cleanup events not yet reported to the user.*/
 526        mutex_lock(&ctx->file->mut);
 527        list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
 528                if (uevent->ctx != ctx)
 529                        continue;
 530
 531                if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST &&
 532                    xa_cmpxchg(&ctx_table, uevent->conn_req_ctx->id,
 533                               uevent->conn_req_ctx, XA_ZERO_ENTRY,
 534                               GFP_KERNEL) == uevent->conn_req_ctx) {
 535                        list_move_tail(&uevent->list, &list);
 536                        continue;
 537                }
 538                list_del(&uevent->list);
 539                kfree(uevent);
 540        }
 541        list_del(&ctx->list);
 542        events_reported = ctx->events_reported;
 543        mutex_unlock(&ctx->file->mut);
 544
 545        /*
 546         * If this was a listening ID then any connections spawned from it that
 547         * have not been delivered to userspace are cleaned up too. Must be done
 548         * outside any locks.
 549         */
 550        list_for_each_entry_safe(uevent, tmp, &list, list) {
 551                ucma_destroy_private_ctx(uevent->conn_req_ctx);
 552                kfree(uevent);
 553        }
 554        return events_reported;
 555}
 556
 557/*
 558 * When this is called the xarray must have a XA_ZERO_ENTRY in the ctx->id (ie
 559 * the ctx is not public to the user). This either because:
 560 *  - ucma_finish_ctx() hasn't been called
 561 *  - xa_cmpxchg() succeed to remove the entry (only one thread can succeed)
 562 */
 563static int ucma_destroy_private_ctx(struct ucma_context *ctx)
 564{
 565        int events_reported;
 566
 567        /*
 568         * Destroy the underlying cm_id. New work queuing is prevented now by
 569         * the removal from the xarray. Once the work is cancled ref will either
 570         * be 0 because the work ran to completion and consumed the ref from the
 571         * xarray, or it will be positive because we still have the ref from the
 572         * xarray. This can also be 0 in cases where cm_id was never set
 573         */
 574        cancel_work_sync(&ctx->close_work);
 575        if (refcount_read(&ctx->ref))
 576                ucma_close_id(&ctx->close_work);
 577
 578        events_reported = ucma_cleanup_ctx_events(ctx);
 579        ucma_cleanup_multicast(ctx);
 580
 581        WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, XA_ZERO_ENTRY, NULL,
 582                           GFP_KERNEL) != NULL);
 583        mutex_destroy(&ctx->mutex);
 584        kfree(ctx);
 585        return events_reported;
 586}
 587
 588static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
 589                               int in_len, int out_len)
 590{
 591        struct rdma_ucm_destroy_id cmd;
 592        struct rdma_ucm_destroy_id_resp resp;
 593        struct ucma_context *ctx;
 594        int ret = 0;
 595
 596        if (out_len < sizeof(resp))
 597                return -ENOSPC;
 598
 599        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 600                return -EFAULT;
 601
 602        xa_lock(&ctx_table);
 603        ctx = _ucma_find_context(cmd.id, file);
 604        if (!IS_ERR(ctx)) {
 605                if (__xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY,
 606                                 GFP_KERNEL) != ctx)
 607                        ctx = ERR_PTR(-ENOENT);
 608        }
 609        xa_unlock(&ctx_table);
 610
 611        if (IS_ERR(ctx))
 612                return PTR_ERR(ctx);
 613
 614        resp.events_reported = ucma_destroy_private_ctx(ctx);
 615        if (copy_to_user(u64_to_user_ptr(cmd.response),
 616                         &resp, sizeof(resp)))
 617                ret = -EFAULT;
 618
 619        return ret;
 620}
 621
 622static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf,
 623                              int in_len, int out_len)
 624{
 625        struct rdma_ucm_bind_ip cmd;
 626        struct ucma_context *ctx;
 627        int ret;
 628
 629        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 630                return -EFAULT;
 631
 632        if (!rdma_addr_size_in6(&cmd.addr))
 633                return -EINVAL;
 634
 635        ctx = ucma_get_ctx(file, cmd.id);
 636        if (IS_ERR(ctx))
 637                return PTR_ERR(ctx);
 638
 639        mutex_lock(&ctx->mutex);
 640        ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr);
 641        mutex_unlock(&ctx->mutex);
 642
 643        ucma_put_ctx(ctx);
 644        return ret;
 645}
 646
 647static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf,
 648                         int in_len, int out_len)
 649{
 650        struct rdma_ucm_bind cmd;
 651        struct ucma_context *ctx;
 652        int ret;
 653
 654        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 655                return -EFAULT;
 656
 657        if (cmd.reserved || !cmd.addr_size ||
 658            cmd.addr_size != rdma_addr_size_kss(&cmd.addr))
 659                return -EINVAL;
 660
 661        ctx = ucma_get_ctx(file, cmd.id);
 662        if (IS_ERR(ctx))
 663                return PTR_ERR(ctx);
 664
 665        mutex_lock(&ctx->mutex);
 666        ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr);
 667        mutex_unlock(&ctx->mutex);
 668        ucma_put_ctx(ctx);
 669        return ret;
 670}
 671
 672static ssize_t ucma_resolve_ip(struct ucma_file *file,
 673                               const char __user *inbuf,
 674                               int in_len, int out_len)
 675{
 676        struct rdma_ucm_resolve_ip cmd;
 677        struct ucma_context *ctx;
 678        int ret;
 679
 680        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 681                return -EFAULT;
 682
 683        if ((cmd.src_addr.sin6_family && !rdma_addr_size_in6(&cmd.src_addr)) ||
 684            !rdma_addr_size_in6(&cmd.dst_addr))
 685                return -EINVAL;
 686
 687        ctx = ucma_get_ctx(file, cmd.id);
 688        if (IS_ERR(ctx))
 689                return PTR_ERR(ctx);
 690
 691        mutex_lock(&ctx->mutex);
 692        ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
 693                                (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms);
 694        mutex_unlock(&ctx->mutex);
 695        ucma_put_ctx(ctx);
 696        return ret;
 697}
 698
 699static ssize_t ucma_resolve_addr(struct ucma_file *file,
 700                                 const char __user *inbuf,
 701                                 int in_len, int out_len)
 702{
 703        struct rdma_ucm_resolve_addr cmd;
 704        struct ucma_context *ctx;
 705        int ret;
 706
 707        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 708                return -EFAULT;
 709
 710        if (cmd.reserved ||
 711            (cmd.src_size && (cmd.src_size != rdma_addr_size_kss(&cmd.src_addr))) ||
 712            !cmd.dst_size || (cmd.dst_size != rdma_addr_size_kss(&cmd.dst_addr)))
 713                return -EINVAL;
 714
 715        ctx = ucma_get_ctx(file, cmd.id);
 716        if (IS_ERR(ctx))
 717                return PTR_ERR(ctx);
 718
 719        mutex_lock(&ctx->mutex);
 720        ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
 721                                (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms);
 722        mutex_unlock(&ctx->mutex);
 723        ucma_put_ctx(ctx);
 724        return ret;
 725}
 726
 727static ssize_t ucma_resolve_route(struct ucma_file *file,
 728                                  const char __user *inbuf,
 729                                  int in_len, int out_len)
 730{
 731        struct rdma_ucm_resolve_route cmd;
 732        struct ucma_context *ctx;
 733        int ret;
 734
 735        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 736                return -EFAULT;
 737
 738        ctx = ucma_get_ctx_dev(file, cmd.id);
 739        if (IS_ERR(ctx))
 740                return PTR_ERR(ctx);
 741
 742        mutex_lock(&ctx->mutex);
 743        ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms);
 744        mutex_unlock(&ctx->mutex);
 745        ucma_put_ctx(ctx);
 746        return ret;
 747}
 748
 749static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
 750                               struct rdma_route *route)
 751{
 752        struct rdma_dev_addr *dev_addr;
 753
 754        resp->num_paths = route->num_paths;
 755        switch (route->num_paths) {
 756        case 0:
 757                dev_addr = &route->addr.dev_addr;
 758                rdma_addr_get_dgid(dev_addr,
 759                                   (union ib_gid *) &resp->ib_route[0].dgid);
 760                rdma_addr_get_sgid(dev_addr,
 761                                   (union ib_gid *) &resp->ib_route[0].sgid);
 762                resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
 763                break;
 764        case 2:
 765                ib_copy_path_rec_to_user(&resp->ib_route[1],
 766                                         &route->path_rec[1]);
 767                fallthrough;
 768        case 1:
 769                ib_copy_path_rec_to_user(&resp->ib_route[0],
 770                                         &route->path_rec[0]);
 771                break;
 772        default:
 773                break;
 774        }
 775}
 776
 777static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
 778                                 struct rdma_route *route)
 779{
 780
 781        resp->num_paths = route->num_paths;
 782        switch (route->num_paths) {
 783        case 0:
 784                rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr,
 785                            (union ib_gid *)&resp->ib_route[0].dgid);
 786                rdma_ip2gid((struct sockaddr *)&route->addr.src_addr,
 787                            (union ib_gid *)&resp->ib_route[0].sgid);
 788                resp->ib_route[0].pkey = cpu_to_be16(0xffff);
 789                break;
 790        case 2:
 791                ib_copy_path_rec_to_user(&resp->ib_route[1],
 792                                         &route->path_rec[1]);
 793                fallthrough;
 794        case 1:
 795                ib_copy_path_rec_to_user(&resp->ib_route[0],
 796                                         &route->path_rec[0]);
 797                break;
 798        default:
 799                break;
 800        }
 801}
 802
 803static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp *resp,
 804                               struct rdma_route *route)
 805{
 806        struct rdma_dev_addr *dev_addr;
 807
 808        dev_addr = &route->addr.dev_addr;
 809        rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid);
 810        rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid);
 811}
 812
 813static ssize_t ucma_query_route(struct ucma_file *file,
 814                                const char __user *inbuf,
 815                                int in_len, int out_len)
 816{
 817        struct rdma_ucm_query cmd;
 818        struct rdma_ucm_query_route_resp resp;
 819        struct ucma_context *ctx;
 820        struct sockaddr *addr;
 821        int ret = 0;
 822
 823        if (out_len < offsetof(struct rdma_ucm_query_route_resp, ibdev_index))
 824                return -ENOSPC;
 825
 826        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 827                return -EFAULT;
 828
 829        ctx = ucma_get_ctx(file, cmd.id);
 830        if (IS_ERR(ctx))
 831                return PTR_ERR(ctx);
 832
 833        mutex_lock(&ctx->mutex);
 834        memset(&resp, 0, sizeof resp);
 835        addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr;
 836        memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ?
 837                                     sizeof(struct sockaddr_in) :
 838                                     sizeof(struct sockaddr_in6));
 839        addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr;
 840        memcpy(&resp.dst_addr, addr, addr->sa_family == AF_INET ?
 841                                     sizeof(struct sockaddr_in) :
 842                                     sizeof(struct sockaddr_in6));
 843        if (!ctx->cm_id->device)
 844                goto out;
 845
 846        resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid;
 847        resp.ibdev_index = ctx->cm_id->device->index;
 848        resp.port_num = ctx->cm_id->port_num;
 849
 850        if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num))
 851                ucma_copy_ib_route(&resp, &ctx->cm_id->route);
 852        else if (rdma_protocol_roce(ctx->cm_id->device, ctx->cm_id->port_num))
 853                ucma_copy_iboe_route(&resp, &ctx->cm_id->route);
 854        else if (rdma_protocol_iwarp(ctx->cm_id->device, ctx->cm_id->port_num))
 855                ucma_copy_iw_route(&resp, &ctx->cm_id->route);
 856
 857out:
 858        mutex_unlock(&ctx->mutex);
 859        if (copy_to_user(u64_to_user_ptr(cmd.response), &resp,
 860                         min_t(size_t, out_len, sizeof(resp))))
 861                ret = -EFAULT;
 862
 863        ucma_put_ctx(ctx);
 864        return ret;
 865}
 866
 867static void ucma_query_device_addr(struct rdma_cm_id *cm_id,
 868                                   struct rdma_ucm_query_addr_resp *resp)
 869{
 870        if (!cm_id->device)
 871                return;
 872
 873        resp->node_guid = (__force __u64) cm_id->device->node_guid;
 874        resp->ibdev_index = cm_id->device->index;
 875        resp->port_num = cm_id->port_num;
 876        resp->pkey = (__force __u16) cpu_to_be16(
 877                     ib_addr_get_pkey(&cm_id->route.addr.dev_addr));
 878}
 879
 880static ssize_t ucma_query_addr(struct ucma_context *ctx,
 881                               void __user *response, int out_len)
 882{
 883        struct rdma_ucm_query_addr_resp resp;
 884        struct sockaddr *addr;
 885        int ret = 0;
 886
 887        if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index))
 888                return -ENOSPC;
 889
 890        memset(&resp, 0, sizeof resp);
 891
 892        addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr;
 893        resp.src_size = rdma_addr_size(addr);
 894        memcpy(&resp.src_addr, addr, resp.src_size);
 895
 896        addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr;
 897        resp.dst_size = rdma_addr_size(addr);
 898        memcpy(&resp.dst_addr, addr, resp.dst_size);
 899
 900        ucma_query_device_addr(ctx->cm_id, &resp);
 901
 902        if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp))))
 903                ret = -EFAULT;
 904
 905        return ret;
 906}
 907
 908static ssize_t ucma_query_path(struct ucma_context *ctx,
 909                               void __user *response, int out_len)
 910{
 911        struct rdma_ucm_query_path_resp *resp;
 912        int i, ret = 0;
 913
 914        if (out_len < sizeof(*resp))
 915                return -ENOSPC;
 916
 917        resp = kzalloc(out_len, GFP_KERNEL);
 918        if (!resp)
 919                return -ENOMEM;
 920
 921        resp->num_paths = ctx->cm_id->route.num_paths;
 922        for (i = 0, out_len -= sizeof(*resp);
 923             i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data);
 924             i++, out_len -= sizeof(struct ib_path_rec_data)) {
 925                struct sa_path_rec *rec = &ctx->cm_id->route.path_rec[i];
 926
 927                resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY |
 928                                           IB_PATH_BIDIRECTIONAL;
 929                if (rec->rec_type == SA_PATH_REC_TYPE_OPA) {
 930                        struct sa_path_rec ib;
 931
 932                        sa_convert_path_opa_to_ib(&ib, rec);
 933                        ib_sa_pack_path(&ib, &resp->path_data[i].path_rec);
 934
 935                } else {
 936                        ib_sa_pack_path(rec, &resp->path_data[i].path_rec);
 937                }
 938        }
 939
 940        if (copy_to_user(response, resp, struct_size(resp, path_data, i)))
 941                ret = -EFAULT;
 942
 943        kfree(resp);
 944        return ret;
 945}
 946
 947static ssize_t ucma_query_gid(struct ucma_context *ctx,
 948                              void __user *response, int out_len)
 949{
 950        struct rdma_ucm_query_addr_resp resp;
 951        struct sockaddr_ib *addr;
 952        int ret = 0;
 953
 954        if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index))
 955                return -ENOSPC;
 956
 957        memset(&resp, 0, sizeof resp);
 958
 959        ucma_query_device_addr(ctx->cm_id, &resp);
 960
 961        addr = (struct sockaddr_ib *) &resp.src_addr;
 962        resp.src_size = sizeof(*addr);
 963        if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) {
 964                memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size);
 965        } else {
 966                addr->sib_family = AF_IB;
 967                addr->sib_pkey = (__force __be16) resp.pkey;
 968                rdma_read_gids(ctx->cm_id, (union ib_gid *)&addr->sib_addr,
 969                               NULL);
 970                addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
 971                                                    &ctx->cm_id->route.addr.src_addr);
 972        }
 973
 974        addr = (struct sockaddr_ib *) &resp.dst_addr;
 975        resp.dst_size = sizeof(*addr);
 976        if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) {
 977                memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size);
 978        } else {
 979                addr->sib_family = AF_IB;
 980                addr->sib_pkey = (__force __be16) resp.pkey;
 981                rdma_read_gids(ctx->cm_id, NULL,
 982                               (union ib_gid *)&addr->sib_addr);
 983                addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
 984                                                    &ctx->cm_id->route.addr.dst_addr);
 985        }
 986
 987        if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp))))
 988                ret = -EFAULT;
 989
 990        return ret;
 991}
 992
 993static ssize_t ucma_query(struct ucma_file *file,
 994                          const char __user *inbuf,
 995                          int in_len, int out_len)
 996{
 997        struct rdma_ucm_query cmd;
 998        struct ucma_context *ctx;
 999        void __user *response;
1000        int ret;
1001
1002        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1003                return -EFAULT;
1004
1005        response = u64_to_user_ptr(cmd.response);
1006        ctx = ucma_get_ctx(file, cmd.id);
1007        if (IS_ERR(ctx))
1008                return PTR_ERR(ctx);
1009
1010        mutex_lock(&ctx->mutex);
1011        switch (cmd.option) {
1012        case RDMA_USER_CM_QUERY_ADDR:
1013                ret = ucma_query_addr(ctx, response, out_len);
1014                break;
1015        case RDMA_USER_CM_QUERY_PATH:
1016                ret = ucma_query_path(ctx, response, out_len);
1017                break;
1018        case RDMA_USER_CM_QUERY_GID:
1019                ret = ucma_query_gid(ctx, response, out_len);
1020                break;
1021        default:
1022                ret = -ENOSYS;
1023                break;
1024        }
1025        mutex_unlock(&ctx->mutex);
1026
1027        ucma_put_ctx(ctx);
1028        return ret;
1029}
1030
1031static void ucma_copy_conn_param(struct rdma_cm_id *id,
1032                                 struct rdma_conn_param *dst,
1033                                 struct rdma_ucm_conn_param *src)
1034{
1035        dst->private_data = src->private_data;
1036        dst->private_data_len = src->private_data_len;
1037        dst->responder_resources = src->responder_resources;
1038        dst->initiator_depth = src->initiator_depth;
1039        dst->flow_control = src->flow_control;
1040        dst->retry_count = src->retry_count;
1041        dst->rnr_retry_count = src->rnr_retry_count;
1042        dst->srq = src->srq;
1043        dst->qp_num = src->qp_num & 0xFFFFFF;
1044        dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0;
1045}
1046
1047static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
1048                            int in_len, int out_len)
1049{
1050        struct rdma_conn_param conn_param;
1051        struct rdma_ucm_ece ece = {};
1052        struct rdma_ucm_connect cmd;
1053        struct ucma_context *ctx;
1054        size_t in_size;
1055        int ret;
1056
1057        if (in_len < offsetofend(typeof(cmd), reserved))
1058                return -EINVAL;
1059        in_size = min_t(size_t, in_len, sizeof(cmd));
1060        if (copy_from_user(&cmd, inbuf, in_size))
1061                return -EFAULT;
1062
1063        if (!cmd.conn_param.valid)
1064                return -EINVAL;
1065
1066        ctx = ucma_get_ctx_dev(file, cmd.id);
1067        if (IS_ERR(ctx))
1068                return PTR_ERR(ctx);
1069
1070        ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
1071        if (offsetofend(typeof(cmd), ece) <= in_size) {
1072                ece.vendor_id = cmd.ece.vendor_id;
1073                ece.attr_mod = cmd.ece.attr_mod;
1074        }
1075
1076        mutex_lock(&ctx->mutex);
1077        ret = rdma_connect_ece(ctx->cm_id, &conn_param, &ece);
1078        mutex_unlock(&ctx->mutex);
1079        ucma_put_ctx(ctx);
1080        return ret;
1081}
1082
1083static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf,
1084                           int in_len, int out_len)
1085{
1086        struct rdma_ucm_listen cmd;
1087        struct ucma_context *ctx;
1088        int ret;
1089
1090        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1091                return -EFAULT;
1092
1093        ctx = ucma_get_ctx(file, cmd.id);
1094        if (IS_ERR(ctx))
1095                return PTR_ERR(ctx);
1096
1097        if (cmd.backlog <= 0 || cmd.backlog > max_backlog)
1098                cmd.backlog = max_backlog;
1099        atomic_set(&ctx->backlog, cmd.backlog);
1100
1101        mutex_lock(&ctx->mutex);
1102        ret = rdma_listen(ctx->cm_id, cmd.backlog);
1103        mutex_unlock(&ctx->mutex);
1104        ucma_put_ctx(ctx);
1105        return ret;
1106}
1107
1108static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
1109                           int in_len, int out_len)
1110{
1111        struct rdma_ucm_accept cmd;
1112        struct rdma_conn_param conn_param;
1113        struct rdma_ucm_ece ece = {};
1114        struct ucma_context *ctx;
1115        size_t in_size;
1116        int ret;
1117
1118        if (in_len < offsetofend(typeof(cmd), reserved))
1119                return -EINVAL;
1120        in_size = min_t(size_t, in_len, sizeof(cmd));
1121        if (copy_from_user(&cmd, inbuf, in_size))
1122                return -EFAULT;
1123
1124        ctx = ucma_get_ctx_dev(file, cmd.id);
1125        if (IS_ERR(ctx))
1126                return PTR_ERR(ctx);
1127
1128        if (offsetofend(typeof(cmd), ece) <= in_size) {
1129                ece.vendor_id = cmd.ece.vendor_id;
1130                ece.attr_mod = cmd.ece.attr_mod;
1131        }
1132
1133        if (cmd.conn_param.valid) {
1134                ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
1135                mutex_lock(&ctx->mutex);
1136                rdma_lock_handler(ctx->cm_id);
1137                ret = rdma_accept_ece(ctx->cm_id, &conn_param, &ece);
1138                if (!ret) {
1139                        /* The uid must be set atomically with the handler */
1140                        ctx->uid = cmd.uid;
1141                }
1142                rdma_unlock_handler(ctx->cm_id);
1143                mutex_unlock(&ctx->mutex);
1144        } else {
1145                mutex_lock(&ctx->mutex);
1146                rdma_lock_handler(ctx->cm_id);
1147                ret = rdma_accept_ece(ctx->cm_id, NULL, &ece);
1148                rdma_unlock_handler(ctx->cm_id);
1149                mutex_unlock(&ctx->mutex);
1150        }
1151        ucma_put_ctx(ctx);
1152        return ret;
1153}
1154
1155static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf,
1156                           int in_len, int out_len)
1157{
1158        struct rdma_ucm_reject cmd;
1159        struct ucma_context *ctx;
1160        int ret;
1161
1162        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1163                return -EFAULT;
1164
1165        if (!cmd.reason)
1166                cmd.reason = IB_CM_REJ_CONSUMER_DEFINED;
1167
1168        switch (cmd.reason) {
1169        case IB_CM_REJ_CONSUMER_DEFINED:
1170        case IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED:
1171                break;
1172        default:
1173                return -EINVAL;
1174        }
1175
1176        ctx = ucma_get_ctx_dev(file, cmd.id);
1177        if (IS_ERR(ctx))
1178                return PTR_ERR(ctx);
1179
1180        mutex_lock(&ctx->mutex);
1181        ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len,
1182                          cmd.reason);
1183        mutex_unlock(&ctx->mutex);
1184        ucma_put_ctx(ctx);
1185        return ret;
1186}
1187
1188static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf,
1189                               int in_len, int out_len)
1190{
1191        struct rdma_ucm_disconnect cmd;
1192        struct ucma_context *ctx;
1193        int ret;
1194
1195        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1196                return -EFAULT;
1197
1198        ctx = ucma_get_ctx_dev(file, cmd.id);
1199        if (IS_ERR(ctx))
1200                return PTR_ERR(ctx);
1201
1202        mutex_lock(&ctx->mutex);
1203        ret = rdma_disconnect(ctx->cm_id);
1204        mutex_unlock(&ctx->mutex);
1205        ucma_put_ctx(ctx);
1206        return ret;
1207}
1208
1209static ssize_t ucma_init_qp_attr(struct ucma_file *file,
1210                                 const char __user *inbuf,
1211                                 int in_len, int out_len)
1212{
1213        struct rdma_ucm_init_qp_attr cmd;
1214        struct ib_uverbs_qp_attr resp;
1215        struct ucma_context *ctx;
1216        struct ib_qp_attr qp_attr;
1217        int ret;
1218
1219        if (out_len < sizeof(resp))
1220                return -ENOSPC;
1221
1222        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1223                return -EFAULT;
1224
1225        if (cmd.qp_state > IB_QPS_ERR)
1226                return -EINVAL;
1227
1228        ctx = ucma_get_ctx_dev(file, cmd.id);
1229        if (IS_ERR(ctx))
1230                return PTR_ERR(ctx);
1231
1232        resp.qp_attr_mask = 0;
1233        memset(&qp_attr, 0, sizeof qp_attr);
1234        qp_attr.qp_state = cmd.qp_state;
1235        mutex_lock(&ctx->mutex);
1236        ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask);
1237        mutex_unlock(&ctx->mutex);
1238        if (ret)
1239                goto out;
1240
1241        ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr);
1242        if (copy_to_user(u64_to_user_ptr(cmd.response),
1243                         &resp, sizeof(resp)))
1244                ret = -EFAULT;
1245
1246out:
1247        ucma_put_ctx(ctx);
1248        return ret;
1249}
1250
1251static int ucma_set_option_id(struct ucma_context *ctx, int optname,
1252                              void *optval, size_t optlen)
1253{
1254        int ret = 0;
1255
1256        switch (optname) {
1257        case RDMA_OPTION_ID_TOS:
1258                if (optlen != sizeof(u8)) {
1259                        ret = -EINVAL;
1260                        break;
1261                }
1262                rdma_set_service_type(ctx->cm_id, *((u8 *) optval));
1263                break;
1264        case RDMA_OPTION_ID_REUSEADDR:
1265                if (optlen != sizeof(int)) {
1266                        ret = -EINVAL;
1267                        break;
1268                }
1269                ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0);
1270                break;
1271        case RDMA_OPTION_ID_AFONLY:
1272                if (optlen != sizeof(int)) {
1273                        ret = -EINVAL;
1274                        break;
1275                }
1276                ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0);
1277                break;
1278        case RDMA_OPTION_ID_ACK_TIMEOUT:
1279                if (optlen != sizeof(u8)) {
1280                        ret = -EINVAL;
1281                        break;
1282                }
1283                ret = rdma_set_ack_timeout(ctx->cm_id, *((u8 *)optval));
1284                break;
1285        default:
1286                ret = -ENOSYS;
1287        }
1288
1289        return ret;
1290}
1291
1292static int ucma_set_ib_path(struct ucma_context *ctx,
1293                            struct ib_path_rec_data *path_data, size_t optlen)
1294{
1295        struct sa_path_rec sa_path;
1296        struct rdma_cm_event event;
1297        int ret;
1298
1299        if (optlen % sizeof(*path_data))
1300                return -EINVAL;
1301
1302        for (; optlen; optlen -= sizeof(*path_data), path_data++) {
1303                if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY |
1304                                         IB_PATH_BIDIRECTIONAL))
1305                        break;
1306        }
1307
1308        if (!optlen)
1309                return -EINVAL;
1310
1311        if (!ctx->cm_id->device)
1312                return -EINVAL;
1313
1314        memset(&sa_path, 0, sizeof(sa_path));
1315
1316        sa_path.rec_type = SA_PATH_REC_TYPE_IB;
1317        ib_sa_unpack_path(path_data->path_rec, &sa_path);
1318
1319        if (rdma_cap_opa_ah(ctx->cm_id->device, ctx->cm_id->port_num)) {
1320                struct sa_path_rec opa;
1321
1322                sa_convert_path_ib_to_opa(&opa, &sa_path);
1323                mutex_lock(&ctx->mutex);
1324                ret = rdma_set_ib_path(ctx->cm_id, &opa);
1325                mutex_unlock(&ctx->mutex);
1326        } else {
1327                mutex_lock(&ctx->mutex);
1328                ret = rdma_set_ib_path(ctx->cm_id, &sa_path);
1329                mutex_unlock(&ctx->mutex);
1330        }
1331        if (ret)
1332                return ret;
1333
1334        memset(&event, 0, sizeof event);
1335        event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1336        return ucma_event_handler(ctx->cm_id, &event);
1337}
1338
1339static int ucma_set_option_ib(struct ucma_context *ctx, int optname,
1340                              void *optval, size_t optlen)
1341{
1342        int ret;
1343
1344        switch (optname) {
1345        case RDMA_OPTION_IB_PATH:
1346                ret = ucma_set_ib_path(ctx, optval, optlen);
1347                break;
1348        default:
1349                ret = -ENOSYS;
1350        }
1351
1352        return ret;
1353}
1354
1355static int ucma_set_option_level(struct ucma_context *ctx, int level,
1356                                 int optname, void *optval, size_t optlen)
1357{
1358        int ret;
1359
1360        switch (level) {
1361        case RDMA_OPTION_ID:
1362                mutex_lock(&ctx->mutex);
1363                ret = ucma_set_option_id(ctx, optname, optval, optlen);
1364                mutex_unlock(&ctx->mutex);
1365                break;
1366        case RDMA_OPTION_IB:
1367                ret = ucma_set_option_ib(ctx, optname, optval, optlen);
1368                break;
1369        default:
1370                ret = -ENOSYS;
1371        }
1372
1373        return ret;
1374}
1375
1376static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
1377                               int in_len, int out_len)
1378{
1379        struct rdma_ucm_set_option cmd;
1380        struct ucma_context *ctx;
1381        void *optval;
1382        int ret;
1383
1384        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1385                return -EFAULT;
1386
1387        if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE))
1388                return -EINVAL;
1389
1390        ctx = ucma_get_ctx(file, cmd.id);
1391        if (IS_ERR(ctx))
1392                return PTR_ERR(ctx);
1393
1394        optval = memdup_user(u64_to_user_ptr(cmd.optval),
1395                             cmd.optlen);
1396        if (IS_ERR(optval)) {
1397                ret = PTR_ERR(optval);
1398                goto out;
1399        }
1400
1401        ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval,
1402                                    cmd.optlen);
1403        kfree(optval);
1404
1405out:
1406        ucma_put_ctx(ctx);
1407        return ret;
1408}
1409
1410static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
1411                           int in_len, int out_len)
1412{
1413        struct rdma_ucm_notify cmd;
1414        struct ucma_context *ctx;
1415        int ret = -EINVAL;
1416
1417        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1418                return -EFAULT;
1419
1420        ctx = ucma_get_ctx(file, cmd.id);
1421        if (IS_ERR(ctx))
1422                return PTR_ERR(ctx);
1423
1424        mutex_lock(&ctx->mutex);
1425        if (ctx->cm_id->device)
1426                ret = rdma_notify(ctx->cm_id, (enum ib_event_type)cmd.event);
1427        mutex_unlock(&ctx->mutex);
1428
1429        ucma_put_ctx(ctx);
1430        return ret;
1431}
1432
1433static ssize_t ucma_process_join(struct ucma_file *file,
1434                                 struct rdma_ucm_join_mcast *cmd,  int out_len)
1435{
1436        struct rdma_ucm_create_id_resp resp;
1437        struct ucma_context *ctx;
1438        struct ucma_multicast *mc;
1439        struct sockaddr *addr;
1440        int ret;
1441        u8 join_state;
1442
1443        if (out_len < sizeof(resp))
1444                return -ENOSPC;
1445
1446        addr = (struct sockaddr *) &cmd->addr;
1447        if (cmd->addr_size != rdma_addr_size(addr))
1448                return -EINVAL;
1449
1450        if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER)
1451                join_state = BIT(FULLMEMBER_JOIN);
1452        else if (cmd->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER)
1453                join_state = BIT(SENDONLY_FULLMEMBER_JOIN);
1454        else
1455                return -EINVAL;
1456
1457        ctx = ucma_get_ctx_dev(file, cmd->id);
1458        if (IS_ERR(ctx))
1459                return PTR_ERR(ctx);
1460
1461        mc = kzalloc(sizeof(*mc), GFP_KERNEL);
1462        if (!mc) {
1463                ret = -ENOMEM;
1464                goto err_put_ctx;
1465        }
1466
1467        mc->ctx = ctx;
1468        mc->join_state = join_state;
1469        mc->uid = cmd->uid;
1470        memcpy(&mc->addr, addr, cmd->addr_size);
1471
1472        if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b,
1473                     GFP_KERNEL)) {
1474                ret = -ENOMEM;
1475                goto err_free_mc;
1476        }
1477
1478        mutex_lock(&ctx->mutex);
1479        ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr,
1480                                  join_state, mc);
1481        mutex_unlock(&ctx->mutex);
1482        if (ret)
1483                goto err_xa_erase;
1484
1485        resp.id = mc->id;
1486        if (copy_to_user(u64_to_user_ptr(cmd->response),
1487                         &resp, sizeof(resp))) {
1488                ret = -EFAULT;
1489                goto err_leave_multicast;
1490        }
1491
1492        xa_store(&multicast_table, mc->id, mc, 0);
1493
1494        ucma_put_ctx(ctx);
1495        return 0;
1496
1497err_leave_multicast:
1498        mutex_lock(&ctx->mutex);
1499        rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr);
1500        mutex_unlock(&ctx->mutex);
1501        ucma_cleanup_mc_events(mc);
1502err_xa_erase:
1503        xa_erase(&multicast_table, mc->id);
1504err_free_mc:
1505        kfree(mc);
1506err_put_ctx:
1507        ucma_put_ctx(ctx);
1508        return ret;
1509}
1510
1511static ssize_t ucma_join_ip_multicast(struct ucma_file *file,
1512                                      const char __user *inbuf,
1513                                      int in_len, int out_len)
1514{
1515        struct rdma_ucm_join_ip_mcast cmd;
1516        struct rdma_ucm_join_mcast join_cmd;
1517
1518        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1519                return -EFAULT;
1520
1521        join_cmd.response = cmd.response;
1522        join_cmd.uid = cmd.uid;
1523        join_cmd.id = cmd.id;
1524        join_cmd.addr_size = rdma_addr_size_in6(&cmd.addr);
1525        if (!join_cmd.addr_size)
1526                return -EINVAL;
1527
1528        join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER;
1529        memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size);
1530
1531        return ucma_process_join(file, &join_cmd, out_len);
1532}
1533
1534static ssize_t ucma_join_multicast(struct ucma_file *file,
1535                                   const char __user *inbuf,
1536                                   int in_len, int out_len)
1537{
1538        struct rdma_ucm_join_mcast cmd;
1539
1540        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1541                return -EFAULT;
1542
1543        if (!rdma_addr_size_kss(&cmd.addr))
1544                return -EINVAL;
1545
1546        return ucma_process_join(file, &cmd, out_len);
1547}
1548
1549static ssize_t ucma_leave_multicast(struct ucma_file *file,
1550                                    const char __user *inbuf,
1551                                    int in_len, int out_len)
1552{
1553        struct rdma_ucm_destroy_id cmd;
1554        struct rdma_ucm_destroy_id_resp resp;
1555        struct ucma_multicast *mc;
1556        int ret = 0;
1557
1558        if (out_len < sizeof(resp))
1559                return -ENOSPC;
1560
1561        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1562                return -EFAULT;
1563
1564        xa_lock(&multicast_table);
1565        mc = xa_load(&multicast_table, cmd.id);
1566        if (!mc)
1567                mc = ERR_PTR(-ENOENT);
1568        else if (READ_ONCE(mc->ctx->file) != file)
1569                mc = ERR_PTR(-EINVAL);
1570        else if (!refcount_inc_not_zero(&mc->ctx->ref))
1571                mc = ERR_PTR(-ENXIO);
1572        else
1573                __xa_erase(&multicast_table, mc->id);
1574        xa_unlock(&multicast_table);
1575
1576        if (IS_ERR(mc)) {
1577                ret = PTR_ERR(mc);
1578                goto out;
1579        }
1580
1581        mutex_lock(&mc->ctx->mutex);
1582        rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr);
1583        mutex_unlock(&mc->ctx->mutex);
1584
1585        ucma_cleanup_mc_events(mc);
1586
1587        ucma_put_ctx(mc->ctx);
1588        resp.events_reported = mc->events_reported;
1589        kfree(mc);
1590
1591        if (copy_to_user(u64_to_user_ptr(cmd.response),
1592                         &resp, sizeof(resp)))
1593                ret = -EFAULT;
1594out:
1595        return ret;
1596}
1597
1598static ssize_t ucma_migrate_id(struct ucma_file *new_file,
1599                               const char __user *inbuf,
1600                               int in_len, int out_len)
1601{
1602        struct rdma_ucm_migrate_id cmd;
1603        struct rdma_ucm_migrate_resp resp;
1604        struct ucma_event *uevent, *tmp;
1605        struct ucma_context *ctx;
1606        LIST_HEAD(event_list);
1607        struct fd f;
1608        struct ucma_file *cur_file;
1609        int ret = 0;
1610
1611        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1612                return -EFAULT;
1613
1614        /* Get current fd to protect against it being closed */
1615        f = fdget(cmd.fd);
1616        if (!f.file)
1617                return -ENOENT;
1618        if (f.file->f_op != &ucma_fops) {
1619                ret = -EINVAL;
1620                goto file_put;
1621        }
1622        cur_file = f.file->private_data;
1623
1624        /* Validate current fd and prevent destruction of id. */
1625        ctx = ucma_get_ctx(cur_file, cmd.id);
1626        if (IS_ERR(ctx)) {
1627                ret = PTR_ERR(ctx);
1628                goto file_put;
1629        }
1630
1631        rdma_lock_handler(ctx->cm_id);
1632        /*
1633         * ctx->file can only be changed under the handler & xa_lock. xa_load()
1634         * must be checked again to ensure the ctx hasn't begun destruction
1635         * since the ucma_get_ctx().
1636         */
1637        xa_lock(&ctx_table);
1638        if (_ucma_find_context(cmd.id, cur_file) != ctx) {
1639                xa_unlock(&ctx_table);
1640                ret = -ENOENT;
1641                goto err_unlock;
1642        }
1643        ctx->file = new_file;
1644        xa_unlock(&ctx_table);
1645
1646        mutex_lock(&cur_file->mut);
1647        list_del(&ctx->list);
1648        /*
1649         * At this point lock_handler() prevents addition of new uevents for
1650         * this ctx.
1651         */
1652        list_for_each_entry_safe(uevent, tmp, &cur_file->event_list, list)
1653                if (uevent->ctx == ctx)
1654                        list_move_tail(&uevent->list, &event_list);
1655        resp.events_reported = ctx->events_reported;
1656        mutex_unlock(&cur_file->mut);
1657
1658        mutex_lock(&new_file->mut);
1659        list_add_tail(&ctx->list, &new_file->ctx_list);
1660        list_splice_tail(&event_list, &new_file->event_list);
1661        mutex_unlock(&new_file->mut);
1662
1663        if (copy_to_user(u64_to_user_ptr(cmd.response),
1664                         &resp, sizeof(resp)))
1665                ret = -EFAULT;
1666
1667err_unlock:
1668        rdma_unlock_handler(ctx->cm_id);
1669        ucma_put_ctx(ctx);
1670file_put:
1671        fdput(f);
1672        return ret;
1673}
1674
1675static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
1676                                   const char __user *inbuf,
1677                                   int in_len, int out_len) = {
1678        [RDMA_USER_CM_CMD_CREATE_ID]     = ucma_create_id,
1679        [RDMA_USER_CM_CMD_DESTROY_ID]    = ucma_destroy_id,
1680        [RDMA_USER_CM_CMD_BIND_IP]       = ucma_bind_ip,
1681        [RDMA_USER_CM_CMD_RESOLVE_IP]    = ucma_resolve_ip,
1682        [RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route,
1683        [RDMA_USER_CM_CMD_QUERY_ROUTE]   = ucma_query_route,
1684        [RDMA_USER_CM_CMD_CONNECT]       = ucma_connect,
1685        [RDMA_USER_CM_CMD_LISTEN]        = ucma_listen,
1686        [RDMA_USER_CM_CMD_ACCEPT]        = ucma_accept,
1687        [RDMA_USER_CM_CMD_REJECT]        = ucma_reject,
1688        [RDMA_USER_CM_CMD_DISCONNECT]    = ucma_disconnect,
1689        [RDMA_USER_CM_CMD_INIT_QP_ATTR]  = ucma_init_qp_attr,
1690        [RDMA_USER_CM_CMD_GET_EVENT]     = ucma_get_event,
1691        [RDMA_USER_CM_CMD_GET_OPTION]    = NULL,
1692        [RDMA_USER_CM_CMD_SET_OPTION]    = ucma_set_option,
1693        [RDMA_USER_CM_CMD_NOTIFY]        = ucma_notify,
1694        [RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast,
1695        [RDMA_USER_CM_CMD_LEAVE_MCAST]   = ucma_leave_multicast,
1696        [RDMA_USER_CM_CMD_MIGRATE_ID]    = ucma_migrate_id,
1697        [RDMA_USER_CM_CMD_QUERY]         = ucma_query,
1698        [RDMA_USER_CM_CMD_BIND]          = ucma_bind,
1699        [RDMA_USER_CM_CMD_RESOLVE_ADDR]  = ucma_resolve_addr,
1700        [RDMA_USER_CM_CMD_JOIN_MCAST]    = ucma_join_multicast
1701};
1702
1703static ssize_t ucma_write(struct file *filp, const char __user *buf,
1704                          size_t len, loff_t *pos)
1705{
1706        struct ucma_file *file = filp->private_data;
1707        struct rdma_ucm_cmd_hdr hdr;
1708        ssize_t ret;
1709
1710        if (!ib_safe_file_access(filp)) {
1711                pr_err_once("%s: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
1712                            __func__, task_tgid_vnr(current), current->comm);
1713                return -EACCES;
1714        }
1715
1716        if (len < sizeof(hdr))
1717                return -EINVAL;
1718
1719        if (copy_from_user(&hdr, buf, sizeof(hdr)))
1720                return -EFAULT;
1721
1722        if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table))
1723                return -EINVAL;
1724        hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucma_cmd_table));
1725
1726        if (hdr.in + sizeof(hdr) > len)
1727                return -EINVAL;
1728
1729        if (!ucma_cmd_table[hdr.cmd])
1730                return -ENOSYS;
1731
1732        ret = ucma_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out);
1733        if (!ret)
1734                ret = len;
1735
1736        return ret;
1737}
1738
1739static __poll_t ucma_poll(struct file *filp, struct poll_table_struct *wait)
1740{
1741        struct ucma_file *file = filp->private_data;
1742        __poll_t mask = 0;
1743
1744        poll_wait(filp, &file->poll_wait, wait);
1745
1746        if (!list_empty(&file->event_list))
1747                mask = EPOLLIN | EPOLLRDNORM;
1748
1749        return mask;
1750}
1751
1752/*
1753 * ucma_open() does not need the BKL:
1754 *
1755 *  - no global state is referred to;
1756 *  - there is no ioctl method to race against;
1757 *  - no further module initialization is required for open to work
1758 *    after the device is registered.
1759 */
1760static int ucma_open(struct inode *inode, struct file *filp)
1761{
1762        struct ucma_file *file;
1763
1764        file = kmalloc(sizeof *file, GFP_KERNEL);
1765        if (!file)
1766                return -ENOMEM;
1767
1768        INIT_LIST_HEAD(&file->event_list);
1769        INIT_LIST_HEAD(&file->ctx_list);
1770        init_waitqueue_head(&file->poll_wait);
1771        mutex_init(&file->mut);
1772
1773        filp->private_data = file;
1774        file->filp = filp;
1775
1776        return stream_open(inode, filp);
1777}
1778
1779static int ucma_close(struct inode *inode, struct file *filp)
1780{
1781        struct ucma_file *file = filp->private_data;
1782
1783        /*
1784         * All paths that touch ctx_list or ctx_list starting from write() are
1785         * prevented by this being a FD release function. The list_add_tail() in
1786         * ucma_connect_event_handler() can run concurrently, however it only
1787         * adds to the list *after* a listening ID. By only reading the first of
1788         * the list, and relying on ucma_destroy_private_ctx() to block
1789         * ucma_connect_event_handler(), no additional locking is needed.
1790         */
1791        while (!list_empty(&file->ctx_list)) {
1792                struct ucma_context *ctx = list_first_entry(
1793                        &file->ctx_list, struct ucma_context, list);
1794
1795                WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY,
1796                                   GFP_KERNEL) != ctx);
1797                ucma_destroy_private_ctx(ctx);
1798        }
1799        kfree(file);
1800        return 0;
1801}
1802
1803static const struct file_operations ucma_fops = {
1804        .owner   = THIS_MODULE,
1805        .open    = ucma_open,
1806        .release = ucma_close,
1807        .write   = ucma_write,
1808        .poll    = ucma_poll,
1809        .llseek  = no_llseek,
1810};
1811
1812static struct miscdevice ucma_misc = {
1813        .minor          = MISC_DYNAMIC_MINOR,
1814        .name           = "rdma_cm",
1815        .nodename       = "infiniband/rdma_cm",
1816        .mode           = 0666,
1817        .fops           = &ucma_fops,
1818};
1819
1820static int ucma_get_global_nl_info(struct ib_client_nl_info *res)
1821{
1822        res->abi = RDMA_USER_CM_ABI_VERSION;
1823        res->cdev = ucma_misc.this_device;
1824        return 0;
1825}
1826
1827static struct ib_client rdma_cma_client = {
1828        .name = "rdma_cm",
1829        .get_global_nl_info = ucma_get_global_nl_info,
1830};
1831MODULE_ALIAS_RDMA_CLIENT("rdma_cm");
1832
1833static ssize_t abi_version_show(struct device *dev,
1834                                struct device_attribute *attr, char *buf)
1835{
1836        return sysfs_emit(buf, "%d\n", RDMA_USER_CM_ABI_VERSION);
1837}
1838static DEVICE_ATTR_RO(abi_version);
1839
1840static int __init ucma_init(void)
1841{
1842        int ret;
1843
1844        ret = misc_register(&ucma_misc);
1845        if (ret)
1846                return ret;
1847
1848        ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version);
1849        if (ret) {
1850                pr_err("rdma_ucm: couldn't create abi_version attr\n");
1851                goto err1;
1852        }
1853
1854        ucma_ctl_table_hdr = register_net_sysctl(&init_net, "net/rdma_ucm", ucma_ctl_table);
1855        if (!ucma_ctl_table_hdr) {
1856                pr_err("rdma_ucm: couldn't register sysctl paths\n");
1857                ret = -ENOMEM;
1858                goto err2;
1859        }
1860
1861        ret = ib_register_client(&rdma_cma_client);
1862        if (ret)
1863                goto err3;
1864
1865        return 0;
1866err3:
1867        unregister_net_sysctl_table(ucma_ctl_table_hdr);
1868err2:
1869        device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);
1870err1:
1871        misc_deregister(&ucma_misc);
1872        return ret;
1873}
1874
1875static void __exit ucma_cleanup(void)
1876{
1877        ib_unregister_client(&rdma_cma_client);
1878        unregister_net_sysctl_table(ucma_ctl_table_hdr);
1879        device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);
1880        misc_deregister(&ucma_misc);
1881}
1882
1883module_init(ucma_init);
1884module_exit(ucma_cleanup);
1885