linux/drivers/infiniband/core/ucma.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *      copyright notice, this list of conditions and the following
  16 *      disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *      copyright notice, this list of conditions and the following
  20 *      disclaimer in the documentation and/or other materials
  21 *      provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include <linux/completion.h>
  34#include <linux/file.h>
  35#include <linux/mutex.h>
  36#include <linux/poll.h>
  37#include <linux/sched.h>
  38#include <linux/idr.h>
  39#include <linux/in.h>
  40#include <linux/in6.h>
  41#include <linux/miscdevice.h>
  42#include <linux/slab.h>
  43#include <linux/sysctl.h>
  44#include <linux/module.h>
  45#include <linux/nsproxy.h>
  46
  47#include <linux/nospec.h>
  48
  49#include <rdma/rdma_user_cm.h>
  50#include <rdma/ib_marshall.h>
  51#include <rdma/rdma_cm.h>
  52#include <rdma/rdma_cm_ib.h>
  53#include <rdma/ib_addr.h>
  54#include <rdma/ib.h>
  55#include <rdma/rdma_netlink.h>
  56#include "core_priv.h"
  57
  58MODULE_AUTHOR("Sean Hefty");
  59MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
  60MODULE_LICENSE("Dual BSD/GPL");
  61
  62static unsigned int max_backlog = 1024;
  63
  64static struct ctl_table_header *ucma_ctl_table_hdr;
  65static struct ctl_table ucma_ctl_table[] = {
  66        {
  67                .procname       = "max_backlog",
  68                .data           = &max_backlog,
  69                .maxlen         = sizeof max_backlog,
  70                .mode           = 0644,
  71                .proc_handler   = proc_dointvec,
  72        },
  73        { }
  74};
  75
  76struct ucma_file {
  77        struct mutex            mut;
  78        struct file             *filp;
  79        struct list_head        ctx_list;
  80        struct list_head        event_list;
  81        wait_queue_head_t       poll_wait;
  82        struct workqueue_struct *close_wq;
  83};
  84
  85struct ucma_context {
  86        u32                     id;
  87        struct completion       comp;
  88        atomic_t                ref;
  89        int                     events_reported;
  90        int                     backlog;
  91
  92        struct ucma_file        *file;
  93        struct rdma_cm_id       *cm_id;
  94        u64                     uid;
  95
  96        struct list_head        list;
  97        struct list_head        mc_list;
  98        /* mark that device is in process of destroying the internal HW
  99         * resources, protected by the ctx_table lock
 100         */
 101        int                     closing;
 102        /* sync between removal event and id destroy, protected by file mut */
 103        int                     destroying;
 104        struct work_struct      close_work;
 105};
 106
 107struct ucma_multicast {
 108        struct ucma_context     *ctx;
 109        u32                     id;
 110        int                     events_reported;
 111
 112        u64                     uid;
 113        u8                      join_state;
 114        struct list_head        list;
 115        struct sockaddr_storage addr;
 116};
 117
 118struct ucma_event {
 119        struct ucma_context     *ctx;
 120        struct ucma_multicast   *mc;
 121        struct list_head        list;
 122        struct rdma_cm_id       *cm_id;
 123        struct rdma_ucm_event_resp resp;
 124        struct work_struct      close_work;
 125};
 126
 127static DEFINE_XARRAY_ALLOC(ctx_table);
 128static DEFINE_XARRAY_ALLOC(multicast_table);
 129
 130static const struct file_operations ucma_fops;
 131
 132static inline struct ucma_context *_ucma_find_context(int id,
 133                                                      struct ucma_file *file)
 134{
 135        struct ucma_context *ctx;
 136
 137        ctx = xa_load(&ctx_table, id);
 138        if (!ctx)
 139                ctx = ERR_PTR(-ENOENT);
 140        else if (ctx->file != file || !ctx->cm_id)
 141                ctx = ERR_PTR(-EINVAL);
 142        return ctx;
 143}
 144
 145static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id)
 146{
 147        struct ucma_context *ctx;
 148
 149        xa_lock(&ctx_table);
 150        ctx = _ucma_find_context(id, file);
 151        if (!IS_ERR(ctx)) {
 152                if (ctx->closing)
 153                        ctx = ERR_PTR(-EIO);
 154                else
 155                        atomic_inc(&ctx->ref);
 156        }
 157        xa_unlock(&ctx_table);
 158        return ctx;
 159}
 160
 161static void ucma_put_ctx(struct ucma_context *ctx)
 162{
 163        if (atomic_dec_and_test(&ctx->ref))
 164                complete(&ctx->comp);
 165}
 166
 167/*
 168 * Same as ucm_get_ctx but requires that ->cm_id->device is valid, eg that the
 169 * CM_ID is bound.
 170 */
 171static struct ucma_context *ucma_get_ctx_dev(struct ucma_file *file, int id)
 172{
 173        struct ucma_context *ctx = ucma_get_ctx(file, id);
 174
 175        if (IS_ERR(ctx))
 176                return ctx;
 177        if (!ctx->cm_id->device) {
 178                ucma_put_ctx(ctx);
 179                return ERR_PTR(-EINVAL);
 180        }
 181        return ctx;
 182}
 183
 184static void ucma_close_event_id(struct work_struct *work)
 185{
 186        struct ucma_event *uevent_close =  container_of(work, struct ucma_event, close_work);
 187
 188        rdma_destroy_id(uevent_close->cm_id);
 189        kfree(uevent_close);
 190}
 191
 192static void ucma_close_id(struct work_struct *work)
 193{
 194        struct ucma_context *ctx =  container_of(work, struct ucma_context, close_work);
 195
 196        /* once all inflight tasks are finished, we close all underlying
 197         * resources. The context is still alive till its explicit destryoing
 198         * by its creator.
 199         */
 200        ucma_put_ctx(ctx);
 201        wait_for_completion(&ctx->comp);
 202        /* No new events will be generated after destroying the id. */
 203        rdma_destroy_id(ctx->cm_id);
 204}
 205
 206static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
 207{
 208        struct ucma_context *ctx;
 209
 210        ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 211        if (!ctx)
 212                return NULL;
 213
 214        INIT_WORK(&ctx->close_work, ucma_close_id);
 215        atomic_set(&ctx->ref, 1);
 216        init_completion(&ctx->comp);
 217        INIT_LIST_HEAD(&ctx->mc_list);
 218        ctx->file = file;
 219
 220        if (xa_alloc(&ctx_table, &ctx->id, ctx, xa_limit_32b, GFP_KERNEL))
 221                goto error;
 222
 223        list_add_tail(&ctx->list, &file->ctx_list);
 224        return ctx;
 225
 226error:
 227        kfree(ctx);
 228        return NULL;
 229}
 230
 231static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
 232{
 233        struct ucma_multicast *mc;
 234
 235        mc = kzalloc(sizeof(*mc), GFP_KERNEL);
 236        if (!mc)
 237                return NULL;
 238
 239        mc->ctx = ctx;
 240        if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, GFP_KERNEL))
 241                goto error;
 242
 243        list_add_tail(&mc->list, &ctx->mc_list);
 244        return mc;
 245
 246error:
 247        kfree(mc);
 248        return NULL;
 249}
 250
 251static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
 252                                 struct rdma_conn_param *src)
 253{
 254        if (src->private_data_len)
 255                memcpy(dst->private_data, src->private_data,
 256                       src->private_data_len);
 257        dst->private_data_len = src->private_data_len;
 258        dst->responder_resources =src->responder_resources;
 259        dst->initiator_depth = src->initiator_depth;
 260        dst->flow_control = src->flow_control;
 261        dst->retry_count = src->retry_count;
 262        dst->rnr_retry_count = src->rnr_retry_count;
 263        dst->srq = src->srq;
 264        dst->qp_num = src->qp_num;
 265}
 266
 267static void ucma_copy_ud_event(struct ib_device *device,
 268                               struct rdma_ucm_ud_param *dst,
 269                               struct rdma_ud_param *src)
 270{
 271        if (src->private_data_len)
 272                memcpy(dst->private_data, src->private_data,
 273                       src->private_data_len);
 274        dst->private_data_len = src->private_data_len;
 275        ib_copy_ah_attr_to_user(device, &dst->ah_attr, &src->ah_attr);
 276        dst->qp_num = src->qp_num;
 277        dst->qkey = src->qkey;
 278}
 279
 280static void ucma_set_event_context(struct ucma_context *ctx,
 281                                   struct rdma_cm_event *event,
 282                                   struct ucma_event *uevent)
 283{
 284        uevent->ctx = ctx;
 285        switch (event->event) {
 286        case RDMA_CM_EVENT_MULTICAST_JOIN:
 287        case RDMA_CM_EVENT_MULTICAST_ERROR:
 288                uevent->mc = (struct ucma_multicast *)
 289                             event->param.ud.private_data;
 290                uevent->resp.uid = uevent->mc->uid;
 291                uevent->resp.id = uevent->mc->id;
 292                break;
 293        default:
 294                uevent->resp.uid = ctx->uid;
 295                uevent->resp.id = ctx->id;
 296                break;
 297        }
 298}
 299
 300/* Called with file->mut locked for the relevant context. */
 301static void ucma_removal_event_handler(struct rdma_cm_id *cm_id)
 302{
 303        struct ucma_context *ctx = cm_id->context;
 304        struct ucma_event *con_req_eve;
 305        int event_found = 0;
 306
 307        if (ctx->destroying)
 308                return;
 309
 310        /* only if context is pointing to cm_id that it owns it and can be
 311         * queued to be closed, otherwise that cm_id is an inflight one that
 312         * is part of that context event list pending to be detached and
 313         * reattached to its new context as part of ucma_get_event,
 314         * handled separately below.
 315         */
 316        if (ctx->cm_id == cm_id) {
 317                xa_lock(&ctx_table);
 318                ctx->closing = 1;
 319                xa_unlock(&ctx_table);
 320                queue_work(ctx->file->close_wq, &ctx->close_work);
 321                return;
 322        }
 323
 324        list_for_each_entry(con_req_eve, &ctx->file->event_list, list) {
 325                if (con_req_eve->cm_id == cm_id &&
 326                    con_req_eve->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
 327                        list_del(&con_req_eve->list);
 328                        INIT_WORK(&con_req_eve->close_work, ucma_close_event_id);
 329                        queue_work(ctx->file->close_wq, &con_req_eve->close_work);
 330                        event_found = 1;
 331                        break;
 332                }
 333        }
 334        if (!event_found)
 335                pr_err("ucma_removal_event_handler: warning: connect request event wasn't found\n");
 336}
 337
 338static int ucma_event_handler(struct rdma_cm_id *cm_id,
 339                              struct rdma_cm_event *event)
 340{
 341        struct ucma_event *uevent;
 342        struct ucma_context *ctx = cm_id->context;
 343        int ret = 0;
 344
 345        uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
 346        if (!uevent)
 347                return event->event == RDMA_CM_EVENT_CONNECT_REQUEST;
 348
 349        mutex_lock(&ctx->file->mut);
 350        uevent->cm_id = cm_id;
 351        ucma_set_event_context(ctx, event, uevent);
 352        uevent->resp.event = event->event;
 353        uevent->resp.status = event->status;
 354        if (cm_id->qp_type == IB_QPT_UD)
 355                ucma_copy_ud_event(cm_id->device, &uevent->resp.param.ud,
 356                                   &event->param.ud);
 357        else
 358                ucma_copy_conn_event(&uevent->resp.param.conn,
 359                                     &event->param.conn);
 360
 361        if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {
 362                if (!ctx->backlog) {
 363                        ret = -ENOMEM;
 364                        kfree(uevent);
 365                        goto out;
 366                }
 367                ctx->backlog--;
 368        } else if (!ctx->uid || ctx->cm_id != cm_id) {
 369                /*
 370                 * We ignore events for new connections until userspace has set
 371                 * their context.  This can only happen if an error occurs on a
 372                 * new connection before the user accepts it.  This is okay,
 373                 * since the accept will just fail later. However, we do need
 374                 * to release the underlying HW resources in case of a device
 375                 * removal event.
 376                 */
 377                if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
 378                        ucma_removal_event_handler(cm_id);
 379
 380                kfree(uevent);
 381                goto out;
 382        }
 383
 384        list_add_tail(&uevent->list, &ctx->file->event_list);
 385        wake_up_interruptible(&ctx->file->poll_wait);
 386        if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
 387                ucma_removal_event_handler(cm_id);
 388out:
 389        mutex_unlock(&ctx->file->mut);
 390        return ret;
 391}
 392
 393static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
 394                              int in_len, int out_len)
 395{
 396        struct ucma_context *ctx;
 397        struct rdma_ucm_get_event cmd;
 398        struct ucma_event *uevent;
 399        int ret = 0;
 400
 401        /*
 402         * Old 32 bit user space does not send the 4 byte padding in the
 403         * reserved field. We don't care, allow it to keep working.
 404         */
 405        if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved))
 406                return -ENOSPC;
 407
 408        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 409                return -EFAULT;
 410
 411        mutex_lock(&file->mut);
 412        while (list_empty(&file->event_list)) {
 413                mutex_unlock(&file->mut);
 414
 415                if (file->filp->f_flags & O_NONBLOCK)
 416                        return -EAGAIN;
 417
 418                if (wait_event_interruptible(file->poll_wait,
 419                                             !list_empty(&file->event_list)))
 420                        return -ERESTARTSYS;
 421
 422                mutex_lock(&file->mut);
 423        }
 424
 425        uevent = list_entry(file->event_list.next, struct ucma_event, list);
 426
 427        if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
 428                ctx = ucma_alloc_ctx(file);
 429                if (!ctx) {
 430                        ret = -ENOMEM;
 431                        goto done;
 432                }
 433                uevent->ctx->backlog++;
 434                ctx->cm_id = uevent->cm_id;
 435                ctx->cm_id->context = ctx;
 436                uevent->resp.id = ctx->id;
 437        }
 438
 439        if (copy_to_user(u64_to_user_ptr(cmd.response),
 440                         &uevent->resp,
 441                         min_t(size_t, out_len, sizeof(uevent->resp)))) {
 442                ret = -EFAULT;
 443                goto done;
 444        }
 445
 446        list_del(&uevent->list);
 447        uevent->ctx->events_reported++;
 448        if (uevent->mc)
 449                uevent->mc->events_reported++;
 450        kfree(uevent);
 451done:
 452        mutex_unlock(&file->mut);
 453        return ret;
 454}
 455
 456static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type)
 457{
 458        switch (cmd->ps) {
 459        case RDMA_PS_TCP:
 460                *qp_type = IB_QPT_RC;
 461                return 0;
 462        case RDMA_PS_UDP:
 463        case RDMA_PS_IPOIB:
 464                *qp_type = IB_QPT_UD;
 465                return 0;
 466        case RDMA_PS_IB:
 467                *qp_type = cmd->qp_type;
 468                return 0;
 469        default:
 470                return -EINVAL;
 471        }
 472}
 473
 474static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
 475                              int in_len, int out_len)
 476{
 477        struct rdma_ucm_create_id cmd;
 478        struct rdma_ucm_create_id_resp resp;
 479        struct ucma_context *ctx;
 480        struct rdma_cm_id *cm_id;
 481        enum ib_qp_type qp_type;
 482        int ret;
 483
 484        if (out_len < sizeof(resp))
 485                return -ENOSPC;
 486
 487        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 488                return -EFAULT;
 489
 490        ret = ucma_get_qp_type(&cmd, &qp_type);
 491        if (ret)
 492                return ret;
 493
 494        mutex_lock(&file->mut);
 495        ctx = ucma_alloc_ctx(file);
 496        mutex_unlock(&file->mut);
 497        if (!ctx)
 498                return -ENOMEM;
 499
 500        ctx->uid = cmd.uid;
 501        cm_id = __rdma_create_id(current->nsproxy->net_ns,
 502                                 ucma_event_handler, ctx, cmd.ps, qp_type, NULL);
 503        if (IS_ERR(cm_id)) {
 504                ret = PTR_ERR(cm_id);
 505                goto err1;
 506        }
 507
 508        resp.id = ctx->id;
 509        if (copy_to_user(u64_to_user_ptr(cmd.response),
 510                         &resp, sizeof(resp))) {
 511                ret = -EFAULT;
 512                goto err2;
 513        }
 514
 515        ctx->cm_id = cm_id;
 516        return 0;
 517
 518err2:
 519        rdma_destroy_id(cm_id);
 520err1:
 521        xa_erase(&ctx_table, ctx->id);
 522        mutex_lock(&file->mut);
 523        list_del(&ctx->list);
 524        mutex_unlock(&file->mut);
 525        kfree(ctx);
 526        return ret;
 527}
 528
 529static void ucma_cleanup_multicast(struct ucma_context *ctx)
 530{
 531        struct ucma_multicast *mc, *tmp;
 532
 533        mutex_lock(&ctx->file->mut);
 534        list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
 535                list_del(&mc->list);
 536                xa_erase(&multicast_table, mc->id);
 537                kfree(mc);
 538        }
 539        mutex_unlock(&ctx->file->mut);
 540}
 541
 542static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
 543{
 544        struct ucma_event *uevent, *tmp;
 545
 546        list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) {
 547                if (uevent->mc != mc)
 548                        continue;
 549
 550                list_del(&uevent->list);
 551                kfree(uevent);
 552        }
 553}
 554
 555/*
 556 * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At
 557 * this point, no new events will be reported from the hardware. However, we
 558 * still need to cleanup the UCMA context for this ID. Specifically, there
 559 * might be events that have not yet been consumed by the user space software.
 560 * These might include pending connect requests which we have not completed
 561 * processing.  We cannot call rdma_destroy_id while holding the lock of the
 562 * context (file->mut), as it might cause a deadlock. We therefore extract all
 563 * relevant events from the context pending events list while holding the
 564 * mutex. After that we release them as needed.
 565 */
 566static int ucma_free_ctx(struct ucma_context *ctx)
 567{
 568        int events_reported;
 569        struct ucma_event *uevent, *tmp;
 570        LIST_HEAD(list);
 571
 572
 573        ucma_cleanup_multicast(ctx);
 574
 575        /* Cleanup events not yet reported to the user. */
 576        mutex_lock(&ctx->file->mut);
 577        list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
 578                if (uevent->ctx == ctx)
 579                        list_move_tail(&uevent->list, &list);
 580        }
 581        list_del(&ctx->list);
 582        mutex_unlock(&ctx->file->mut);
 583
 584        list_for_each_entry_safe(uevent, tmp, &list, list) {
 585                list_del(&uevent->list);
 586                if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
 587                        rdma_destroy_id(uevent->cm_id);
 588                kfree(uevent);
 589        }
 590
 591        events_reported = ctx->events_reported;
 592        kfree(ctx);
 593        return events_reported;
 594}
 595
 596static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
 597                               int in_len, int out_len)
 598{
 599        struct rdma_ucm_destroy_id cmd;
 600        struct rdma_ucm_destroy_id_resp resp;
 601        struct ucma_context *ctx;
 602        int ret = 0;
 603
 604        if (out_len < sizeof(resp))
 605                return -ENOSPC;
 606
 607        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 608                return -EFAULT;
 609
 610        xa_lock(&ctx_table);
 611        ctx = _ucma_find_context(cmd.id, file);
 612        if (!IS_ERR(ctx))
 613                __xa_erase(&ctx_table, ctx->id);
 614        xa_unlock(&ctx_table);
 615
 616        if (IS_ERR(ctx))
 617                return PTR_ERR(ctx);
 618
 619        mutex_lock(&ctx->file->mut);
 620        ctx->destroying = 1;
 621        mutex_unlock(&ctx->file->mut);
 622
 623        flush_workqueue(ctx->file->close_wq);
 624        /* At this point it's guaranteed that there is no inflight
 625         * closing task */
 626        xa_lock(&ctx_table);
 627        if (!ctx->closing) {
 628                xa_unlock(&ctx_table);
 629                ucma_put_ctx(ctx);
 630                wait_for_completion(&ctx->comp);
 631                rdma_destroy_id(ctx->cm_id);
 632        } else {
 633                xa_unlock(&ctx_table);
 634        }
 635
 636        resp.events_reported = ucma_free_ctx(ctx);
 637        if (copy_to_user(u64_to_user_ptr(cmd.response),
 638                         &resp, sizeof(resp)))
 639                ret = -EFAULT;
 640
 641        return ret;
 642}
 643
 644static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf,
 645                              int in_len, int out_len)
 646{
 647        struct rdma_ucm_bind_ip cmd;
 648        struct ucma_context *ctx;
 649        int ret;
 650
 651        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 652                return -EFAULT;
 653
 654        if (!rdma_addr_size_in6(&cmd.addr))
 655                return -EINVAL;
 656
 657        ctx = ucma_get_ctx(file, cmd.id);
 658        if (IS_ERR(ctx))
 659                return PTR_ERR(ctx);
 660
 661        ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr);
 662        ucma_put_ctx(ctx);
 663        return ret;
 664}
 665
 666static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf,
 667                         int in_len, int out_len)
 668{
 669        struct rdma_ucm_bind cmd;
 670        struct ucma_context *ctx;
 671        int ret;
 672
 673        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 674                return -EFAULT;
 675
 676        if (cmd.reserved || !cmd.addr_size ||
 677            cmd.addr_size != rdma_addr_size_kss(&cmd.addr))
 678                return -EINVAL;
 679
 680        ctx = ucma_get_ctx(file, cmd.id);
 681        if (IS_ERR(ctx))
 682                return PTR_ERR(ctx);
 683
 684        ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr);
 685        ucma_put_ctx(ctx);
 686        return ret;
 687}
 688
 689static ssize_t ucma_resolve_ip(struct ucma_file *file,
 690                               const char __user *inbuf,
 691                               int in_len, int out_len)
 692{
 693        struct rdma_ucm_resolve_ip cmd;
 694        struct ucma_context *ctx;
 695        int ret;
 696
 697        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 698                return -EFAULT;
 699
 700        if ((cmd.src_addr.sin6_family && !rdma_addr_size_in6(&cmd.src_addr)) ||
 701            !rdma_addr_size_in6(&cmd.dst_addr))
 702                return -EINVAL;
 703
 704        ctx = ucma_get_ctx(file, cmd.id);
 705        if (IS_ERR(ctx))
 706                return PTR_ERR(ctx);
 707
 708        ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
 709                                (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms);
 710        ucma_put_ctx(ctx);
 711        return ret;
 712}
 713
 714static ssize_t ucma_resolve_addr(struct ucma_file *file,
 715                                 const char __user *inbuf,
 716                                 int in_len, int out_len)
 717{
 718        struct rdma_ucm_resolve_addr cmd;
 719        struct ucma_context *ctx;
 720        int ret;
 721
 722        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 723                return -EFAULT;
 724
 725        if (cmd.reserved ||
 726            (cmd.src_size && (cmd.src_size != rdma_addr_size_kss(&cmd.src_addr))) ||
 727            !cmd.dst_size || (cmd.dst_size != rdma_addr_size_kss(&cmd.dst_addr)))
 728                return -EINVAL;
 729
 730        ctx = ucma_get_ctx(file, cmd.id);
 731        if (IS_ERR(ctx))
 732                return PTR_ERR(ctx);
 733
 734        ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
 735                                (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms);
 736        ucma_put_ctx(ctx);
 737        return ret;
 738}
 739
 740static ssize_t ucma_resolve_route(struct ucma_file *file,
 741                                  const char __user *inbuf,
 742                                  int in_len, int out_len)
 743{
 744        struct rdma_ucm_resolve_route cmd;
 745        struct ucma_context *ctx;
 746        int ret;
 747
 748        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 749                return -EFAULT;
 750
 751        ctx = ucma_get_ctx_dev(file, cmd.id);
 752        if (IS_ERR(ctx))
 753                return PTR_ERR(ctx);
 754
 755        ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms);
 756        ucma_put_ctx(ctx);
 757        return ret;
 758}
 759
 760static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
 761                               struct rdma_route *route)
 762{
 763        struct rdma_dev_addr *dev_addr;
 764
 765        resp->num_paths = route->num_paths;
 766        switch (route->num_paths) {
 767        case 0:
 768                dev_addr = &route->addr.dev_addr;
 769                rdma_addr_get_dgid(dev_addr,
 770                                   (union ib_gid *) &resp->ib_route[0].dgid);
 771                rdma_addr_get_sgid(dev_addr,
 772                                   (union ib_gid *) &resp->ib_route[0].sgid);
 773                resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
 774                break;
 775        case 2:
 776                ib_copy_path_rec_to_user(&resp->ib_route[1],
 777                                         &route->path_rec[1]);
 778                /* fall through */
 779        case 1:
 780                ib_copy_path_rec_to_user(&resp->ib_route[0],
 781                                         &route->path_rec[0]);
 782                break;
 783        default:
 784                break;
 785        }
 786}
 787
 788static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
 789                                 struct rdma_route *route)
 790{
 791
 792        resp->num_paths = route->num_paths;
 793        switch (route->num_paths) {
 794        case 0:
 795                rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr,
 796                            (union ib_gid *)&resp->ib_route[0].dgid);
 797                rdma_ip2gid((struct sockaddr *)&route->addr.src_addr,
 798                            (union ib_gid *)&resp->ib_route[0].sgid);
 799                resp->ib_route[0].pkey = cpu_to_be16(0xffff);
 800                break;
 801        case 2:
 802                ib_copy_path_rec_to_user(&resp->ib_route[1],
 803                                         &route->path_rec[1]);
 804                /* fall through */
 805        case 1:
 806                ib_copy_path_rec_to_user(&resp->ib_route[0],
 807                                         &route->path_rec[0]);
 808                break;
 809        default:
 810                break;
 811        }
 812}
 813
 814static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp *resp,
 815                               struct rdma_route *route)
 816{
 817        struct rdma_dev_addr *dev_addr;
 818
 819        dev_addr = &route->addr.dev_addr;
 820        rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid);
 821        rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid);
 822}
 823
 824static ssize_t ucma_query_route(struct ucma_file *file,
 825                                const char __user *inbuf,
 826                                int in_len, int out_len)
 827{
 828        struct rdma_ucm_query cmd;
 829        struct rdma_ucm_query_route_resp resp;
 830        struct ucma_context *ctx;
 831        struct sockaddr *addr;
 832        int ret = 0;
 833
 834        if (out_len < sizeof(resp))
 835                return -ENOSPC;
 836
 837        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 838                return -EFAULT;
 839
 840        ctx = ucma_get_ctx(file, cmd.id);
 841        if (IS_ERR(ctx))
 842                return PTR_ERR(ctx);
 843
 844        memset(&resp, 0, sizeof resp);
 845        addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr;
 846        memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ?
 847                                     sizeof(struct sockaddr_in) :
 848                                     sizeof(struct sockaddr_in6));
 849        addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr;
 850        memcpy(&resp.dst_addr, addr, addr->sa_family == AF_INET ?
 851                                     sizeof(struct sockaddr_in) :
 852                                     sizeof(struct sockaddr_in6));
 853        if (!ctx->cm_id->device)
 854                goto out;
 855
 856        resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid;
 857        resp.port_num = ctx->cm_id->port_num;
 858
 859        if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num))
 860                ucma_copy_ib_route(&resp, &ctx->cm_id->route);
 861        else if (rdma_protocol_roce(ctx->cm_id->device, ctx->cm_id->port_num))
 862                ucma_copy_iboe_route(&resp, &ctx->cm_id->route);
 863        else if (rdma_protocol_iwarp(ctx->cm_id->device, ctx->cm_id->port_num))
 864                ucma_copy_iw_route(&resp, &ctx->cm_id->route);
 865
 866out:
 867        if (copy_to_user(u64_to_user_ptr(cmd.response),
 868                         &resp, sizeof(resp)))
 869                ret = -EFAULT;
 870
 871        ucma_put_ctx(ctx);
 872        return ret;
 873}
 874
 875static void ucma_query_device_addr(struct rdma_cm_id *cm_id,
 876                                   struct rdma_ucm_query_addr_resp *resp)
 877{
 878        if (!cm_id->device)
 879                return;
 880
 881        resp->node_guid = (__force __u64) cm_id->device->node_guid;
 882        resp->port_num = cm_id->port_num;
 883        resp->pkey = (__force __u16) cpu_to_be16(
 884                     ib_addr_get_pkey(&cm_id->route.addr.dev_addr));
 885}
 886
 887static ssize_t ucma_query_addr(struct ucma_context *ctx,
 888                               void __user *response, int out_len)
 889{
 890        struct rdma_ucm_query_addr_resp resp;
 891        struct sockaddr *addr;
 892        int ret = 0;
 893
 894        if (out_len < sizeof(resp))
 895                return -ENOSPC;
 896
 897        memset(&resp, 0, sizeof resp);
 898
 899        addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr;
 900        resp.src_size = rdma_addr_size(addr);
 901        memcpy(&resp.src_addr, addr, resp.src_size);
 902
 903        addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr;
 904        resp.dst_size = rdma_addr_size(addr);
 905        memcpy(&resp.dst_addr, addr, resp.dst_size);
 906
 907        ucma_query_device_addr(ctx->cm_id, &resp);
 908
 909        if (copy_to_user(response, &resp, sizeof(resp)))
 910                ret = -EFAULT;
 911
 912        return ret;
 913}
 914
 915static ssize_t ucma_query_path(struct ucma_context *ctx,
 916                               void __user *response, int out_len)
 917{
 918        struct rdma_ucm_query_path_resp *resp;
 919        int i, ret = 0;
 920
 921        if (out_len < sizeof(*resp))
 922                return -ENOSPC;
 923
 924        resp = kzalloc(out_len, GFP_KERNEL);
 925        if (!resp)
 926                return -ENOMEM;
 927
 928        resp->num_paths = ctx->cm_id->route.num_paths;
 929        for (i = 0, out_len -= sizeof(*resp);
 930             i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data);
 931             i++, out_len -= sizeof(struct ib_path_rec_data)) {
 932                struct sa_path_rec *rec = &ctx->cm_id->route.path_rec[i];
 933
 934                resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY |
 935                                           IB_PATH_BIDIRECTIONAL;
 936                if (rec->rec_type == SA_PATH_REC_TYPE_OPA) {
 937                        struct sa_path_rec ib;
 938
 939                        sa_convert_path_opa_to_ib(&ib, rec);
 940                        ib_sa_pack_path(&ib, &resp->path_data[i].path_rec);
 941
 942                } else {
 943                        ib_sa_pack_path(rec, &resp->path_data[i].path_rec);
 944                }
 945        }
 946
 947        if (copy_to_user(response, resp, struct_size(resp, path_data, i)))
 948                ret = -EFAULT;
 949
 950        kfree(resp);
 951        return ret;
 952}
 953
 954static ssize_t ucma_query_gid(struct ucma_context *ctx,
 955                              void __user *response, int out_len)
 956{
 957        struct rdma_ucm_query_addr_resp resp;
 958        struct sockaddr_ib *addr;
 959        int ret = 0;
 960
 961        if (out_len < sizeof(resp))
 962                return -ENOSPC;
 963
 964        memset(&resp, 0, sizeof resp);
 965
 966        ucma_query_device_addr(ctx->cm_id, &resp);
 967
 968        addr = (struct sockaddr_ib *) &resp.src_addr;
 969        resp.src_size = sizeof(*addr);
 970        if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) {
 971                memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size);
 972        } else {
 973                addr->sib_family = AF_IB;
 974                addr->sib_pkey = (__force __be16) resp.pkey;
 975                rdma_read_gids(ctx->cm_id, (union ib_gid *)&addr->sib_addr,
 976                               NULL);
 977                addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
 978                                                    &ctx->cm_id->route.addr.src_addr);
 979        }
 980
 981        addr = (struct sockaddr_ib *) &resp.dst_addr;
 982        resp.dst_size = sizeof(*addr);
 983        if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) {
 984                memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size);
 985        } else {
 986                addr->sib_family = AF_IB;
 987                addr->sib_pkey = (__force __be16) resp.pkey;
 988                rdma_read_gids(ctx->cm_id, NULL,
 989                               (union ib_gid *)&addr->sib_addr);
 990                addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
 991                                                    &ctx->cm_id->route.addr.dst_addr);
 992        }
 993
 994        if (copy_to_user(response, &resp, sizeof(resp)))
 995                ret = -EFAULT;
 996
 997        return ret;
 998}
 999
1000static ssize_t ucma_query(struct ucma_file *file,
1001                          const char __user *inbuf,
1002                          int in_len, int out_len)
1003{
1004        struct rdma_ucm_query cmd;
1005        struct ucma_context *ctx;
1006        void __user *response;
1007        int ret;
1008
1009        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1010                return -EFAULT;
1011
1012        response = u64_to_user_ptr(cmd.response);
1013        ctx = ucma_get_ctx(file, cmd.id);
1014        if (IS_ERR(ctx))
1015                return PTR_ERR(ctx);
1016
1017        switch (cmd.option) {
1018        case RDMA_USER_CM_QUERY_ADDR:
1019                ret = ucma_query_addr(ctx, response, out_len);
1020                break;
1021        case RDMA_USER_CM_QUERY_PATH:
1022                ret = ucma_query_path(ctx, response, out_len);
1023                break;
1024        case RDMA_USER_CM_QUERY_GID:
1025                ret = ucma_query_gid(ctx, response, out_len);
1026                break;
1027        default:
1028                ret = -ENOSYS;
1029                break;
1030        }
1031
1032        ucma_put_ctx(ctx);
1033        return ret;
1034}
1035
1036static void ucma_copy_conn_param(struct rdma_cm_id *id,
1037                                 struct rdma_conn_param *dst,
1038                                 struct rdma_ucm_conn_param *src)
1039{
1040        dst->private_data = src->private_data;
1041        dst->private_data_len = src->private_data_len;
1042        dst->responder_resources =src->responder_resources;
1043        dst->initiator_depth = src->initiator_depth;
1044        dst->flow_control = src->flow_control;
1045        dst->retry_count = src->retry_count;
1046        dst->rnr_retry_count = src->rnr_retry_count;
1047        dst->srq = src->srq;
1048        dst->qp_num = src->qp_num;
1049        dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0;
1050}
1051
1052static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
1053                            int in_len, int out_len)
1054{
1055        struct rdma_ucm_connect cmd;
1056        struct rdma_conn_param conn_param;
1057        struct ucma_context *ctx;
1058        int ret;
1059
1060        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1061                return -EFAULT;
1062
1063        if (!cmd.conn_param.valid)
1064                return -EINVAL;
1065
1066        ctx = ucma_get_ctx_dev(file, cmd.id);
1067        if (IS_ERR(ctx))
1068                return PTR_ERR(ctx);
1069
1070        ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
1071        ret = rdma_connect(ctx->cm_id, &conn_param);
1072        ucma_put_ctx(ctx);
1073        return ret;
1074}
1075
1076static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf,
1077                           int in_len, int out_len)
1078{
1079        struct rdma_ucm_listen cmd;
1080        struct ucma_context *ctx;
1081        int ret;
1082
1083        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1084                return -EFAULT;
1085
1086        ctx = ucma_get_ctx(file, cmd.id);
1087        if (IS_ERR(ctx))
1088                return PTR_ERR(ctx);
1089
1090        ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ?
1091                       cmd.backlog : max_backlog;
1092        ret = rdma_listen(ctx->cm_id, ctx->backlog);
1093        ucma_put_ctx(ctx);
1094        return ret;
1095}
1096
1097static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
1098                           int in_len, int out_len)
1099{
1100        struct rdma_ucm_accept cmd;
1101        struct rdma_conn_param conn_param;
1102        struct ucma_context *ctx;
1103        int ret;
1104
1105        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1106                return -EFAULT;
1107
1108        ctx = ucma_get_ctx_dev(file, cmd.id);
1109        if (IS_ERR(ctx))
1110                return PTR_ERR(ctx);
1111
1112        if (cmd.conn_param.valid) {
1113                ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
1114                mutex_lock(&file->mut);
1115                ret = __rdma_accept(ctx->cm_id, &conn_param, NULL);
1116                if (!ret)
1117                        ctx->uid = cmd.uid;
1118                mutex_unlock(&file->mut);
1119        } else
1120                ret = __rdma_accept(ctx->cm_id, NULL, NULL);
1121
1122        ucma_put_ctx(ctx);
1123        return ret;
1124}
1125
1126static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf,
1127                           int in_len, int out_len)
1128{
1129        struct rdma_ucm_reject cmd;
1130        struct ucma_context *ctx;
1131        int ret;
1132
1133        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1134                return -EFAULT;
1135
1136        ctx = ucma_get_ctx_dev(file, cmd.id);
1137        if (IS_ERR(ctx))
1138                return PTR_ERR(ctx);
1139
1140        ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len);
1141        ucma_put_ctx(ctx);
1142        return ret;
1143}
1144
1145static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf,
1146                               int in_len, int out_len)
1147{
1148        struct rdma_ucm_disconnect cmd;
1149        struct ucma_context *ctx;
1150        int ret;
1151
1152        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1153                return -EFAULT;
1154
1155        ctx = ucma_get_ctx_dev(file, cmd.id);
1156        if (IS_ERR(ctx))
1157                return PTR_ERR(ctx);
1158
1159        ret = rdma_disconnect(ctx->cm_id);
1160        ucma_put_ctx(ctx);
1161        return ret;
1162}
1163
1164static ssize_t ucma_init_qp_attr(struct ucma_file *file,
1165                                 const char __user *inbuf,
1166                                 int in_len, int out_len)
1167{
1168        struct rdma_ucm_init_qp_attr cmd;
1169        struct ib_uverbs_qp_attr resp;
1170        struct ucma_context *ctx;
1171        struct ib_qp_attr qp_attr;
1172        int ret;
1173
1174        if (out_len < sizeof(resp))
1175                return -ENOSPC;
1176
1177        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1178                return -EFAULT;
1179
1180        if (cmd.qp_state > IB_QPS_ERR)
1181                return -EINVAL;
1182
1183        ctx = ucma_get_ctx_dev(file, cmd.id);
1184        if (IS_ERR(ctx))
1185                return PTR_ERR(ctx);
1186
1187        resp.qp_attr_mask = 0;
1188        memset(&qp_attr, 0, sizeof qp_attr);
1189        qp_attr.qp_state = cmd.qp_state;
1190        ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask);
1191        if (ret)
1192                goto out;
1193
1194        ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr);
1195        if (copy_to_user(u64_to_user_ptr(cmd.response),
1196                         &resp, sizeof(resp)))
1197                ret = -EFAULT;
1198
1199out:
1200        ucma_put_ctx(ctx);
1201        return ret;
1202}
1203
1204static int ucma_set_option_id(struct ucma_context *ctx, int optname,
1205                              void *optval, size_t optlen)
1206{
1207        int ret = 0;
1208
1209        switch (optname) {
1210        case RDMA_OPTION_ID_TOS:
1211                if (optlen != sizeof(u8)) {
1212                        ret = -EINVAL;
1213                        break;
1214                }
1215                rdma_set_service_type(ctx->cm_id, *((u8 *) optval));
1216                break;
1217        case RDMA_OPTION_ID_REUSEADDR:
1218                if (optlen != sizeof(int)) {
1219                        ret = -EINVAL;
1220                        break;
1221                }
1222                ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0);
1223                break;
1224        case RDMA_OPTION_ID_AFONLY:
1225                if (optlen != sizeof(int)) {
1226                        ret = -EINVAL;
1227                        break;
1228                }
1229                ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0);
1230                break;
1231        case RDMA_OPTION_ID_ACK_TIMEOUT:
1232                if (optlen != sizeof(u8)) {
1233                        ret = -EINVAL;
1234                        break;
1235                }
1236                ret = rdma_set_ack_timeout(ctx->cm_id, *((u8 *)optval));
1237                break;
1238        default:
1239                ret = -ENOSYS;
1240        }
1241
1242        return ret;
1243}
1244
1245static int ucma_set_ib_path(struct ucma_context *ctx,
1246                            struct ib_path_rec_data *path_data, size_t optlen)
1247{
1248        struct sa_path_rec sa_path;
1249        struct rdma_cm_event event;
1250        int ret;
1251
1252        if (optlen % sizeof(*path_data))
1253                return -EINVAL;
1254
1255        for (; optlen; optlen -= sizeof(*path_data), path_data++) {
1256                if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY |
1257                                         IB_PATH_BIDIRECTIONAL))
1258                        break;
1259        }
1260
1261        if (!optlen)
1262                return -EINVAL;
1263
1264        if (!ctx->cm_id->device)
1265                return -EINVAL;
1266
1267        memset(&sa_path, 0, sizeof(sa_path));
1268
1269        sa_path.rec_type = SA_PATH_REC_TYPE_IB;
1270        ib_sa_unpack_path(path_data->path_rec, &sa_path);
1271
1272        if (rdma_cap_opa_ah(ctx->cm_id->device, ctx->cm_id->port_num)) {
1273                struct sa_path_rec opa;
1274
1275                sa_convert_path_ib_to_opa(&opa, &sa_path);
1276                ret = rdma_set_ib_path(ctx->cm_id, &opa);
1277        } else {
1278                ret = rdma_set_ib_path(ctx->cm_id, &sa_path);
1279        }
1280        if (ret)
1281                return ret;
1282
1283        memset(&event, 0, sizeof event);
1284        event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1285        return ucma_event_handler(ctx->cm_id, &event);
1286}
1287
1288static int ucma_set_option_ib(struct ucma_context *ctx, int optname,
1289                              void *optval, size_t optlen)
1290{
1291        int ret;
1292
1293        switch (optname) {
1294        case RDMA_OPTION_IB_PATH:
1295                ret = ucma_set_ib_path(ctx, optval, optlen);
1296                break;
1297        default:
1298                ret = -ENOSYS;
1299        }
1300
1301        return ret;
1302}
1303
1304static int ucma_set_option_level(struct ucma_context *ctx, int level,
1305                                 int optname, void *optval, size_t optlen)
1306{
1307        int ret;
1308
1309        switch (level) {
1310        case RDMA_OPTION_ID:
1311                ret = ucma_set_option_id(ctx, optname, optval, optlen);
1312                break;
1313        case RDMA_OPTION_IB:
1314                ret = ucma_set_option_ib(ctx, optname, optval, optlen);
1315                break;
1316        default:
1317                ret = -ENOSYS;
1318        }
1319
1320        return ret;
1321}
1322
1323static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
1324                               int in_len, int out_len)
1325{
1326        struct rdma_ucm_set_option cmd;
1327        struct ucma_context *ctx;
1328        void *optval;
1329        int ret;
1330
1331        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1332                return -EFAULT;
1333
1334        if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE))
1335                return -EINVAL;
1336
1337        ctx = ucma_get_ctx(file, cmd.id);
1338        if (IS_ERR(ctx))
1339                return PTR_ERR(ctx);
1340
1341        optval = memdup_user(u64_to_user_ptr(cmd.optval),
1342                             cmd.optlen);
1343        if (IS_ERR(optval)) {
1344                ret = PTR_ERR(optval);
1345                goto out;
1346        }
1347
1348        ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval,
1349                                    cmd.optlen);
1350        kfree(optval);
1351
1352out:
1353        ucma_put_ctx(ctx);
1354        return ret;
1355}
1356
1357static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
1358                           int in_len, int out_len)
1359{
1360        struct rdma_ucm_notify cmd;
1361        struct ucma_context *ctx;
1362        int ret = -EINVAL;
1363
1364        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1365                return -EFAULT;
1366
1367        ctx = ucma_get_ctx(file, cmd.id);
1368        if (IS_ERR(ctx))
1369                return PTR_ERR(ctx);
1370
1371        if (ctx->cm_id->device)
1372                ret = rdma_notify(ctx->cm_id, (enum ib_event_type)cmd.event);
1373
1374        ucma_put_ctx(ctx);
1375        return ret;
1376}
1377
1378static ssize_t ucma_process_join(struct ucma_file *file,
1379                                 struct rdma_ucm_join_mcast *cmd,  int out_len)
1380{
1381        struct rdma_ucm_create_id_resp resp;
1382        struct ucma_context *ctx;
1383        struct ucma_multicast *mc;
1384        struct sockaddr *addr;
1385        int ret;
1386        u8 join_state;
1387
1388        if (out_len < sizeof(resp))
1389                return -ENOSPC;
1390
1391        addr = (struct sockaddr *) &cmd->addr;
1392        if (cmd->addr_size != rdma_addr_size(addr))
1393                return -EINVAL;
1394
1395        if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER)
1396                join_state = BIT(FULLMEMBER_JOIN);
1397        else if (cmd->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER)
1398                join_state = BIT(SENDONLY_FULLMEMBER_JOIN);
1399        else
1400                return -EINVAL;
1401
1402        ctx = ucma_get_ctx_dev(file, cmd->id);
1403        if (IS_ERR(ctx))
1404                return PTR_ERR(ctx);
1405
1406        mutex_lock(&file->mut);
1407        mc = ucma_alloc_multicast(ctx);
1408        if (!mc) {
1409                ret = -ENOMEM;
1410                goto err1;
1411        }
1412        mc->join_state = join_state;
1413        mc->uid = cmd->uid;
1414        memcpy(&mc->addr, addr, cmd->addr_size);
1415        ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr,
1416                                  join_state, mc);
1417        if (ret)
1418                goto err2;
1419
1420        resp.id = mc->id;
1421        if (copy_to_user(u64_to_user_ptr(cmd->response),
1422                         &resp, sizeof(resp))) {
1423                ret = -EFAULT;
1424                goto err3;
1425        }
1426
1427        xa_store(&multicast_table, mc->id, mc, 0);
1428
1429        mutex_unlock(&file->mut);
1430        ucma_put_ctx(ctx);
1431        return 0;
1432
1433err3:
1434        rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr);
1435        ucma_cleanup_mc_events(mc);
1436err2:
1437        xa_erase(&multicast_table, mc->id);
1438        list_del(&mc->list);
1439        kfree(mc);
1440err1:
1441        mutex_unlock(&file->mut);
1442        ucma_put_ctx(ctx);
1443        return ret;
1444}
1445
1446static ssize_t ucma_join_ip_multicast(struct ucma_file *file,
1447                                      const char __user *inbuf,
1448                                      int in_len, int out_len)
1449{
1450        struct rdma_ucm_join_ip_mcast cmd;
1451        struct rdma_ucm_join_mcast join_cmd;
1452
1453        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1454                return -EFAULT;
1455
1456        join_cmd.response = cmd.response;
1457        join_cmd.uid = cmd.uid;
1458        join_cmd.id = cmd.id;
1459        join_cmd.addr_size = rdma_addr_size_in6(&cmd.addr);
1460        if (!join_cmd.addr_size)
1461                return -EINVAL;
1462
1463        join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER;
1464        memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size);
1465
1466        return ucma_process_join(file, &join_cmd, out_len);
1467}
1468
1469static ssize_t ucma_join_multicast(struct ucma_file *file,
1470                                   const char __user *inbuf,
1471                                   int in_len, int out_len)
1472{
1473        struct rdma_ucm_join_mcast cmd;
1474
1475        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1476                return -EFAULT;
1477
1478        if (!rdma_addr_size_kss(&cmd.addr))
1479                return -EINVAL;
1480
1481        return ucma_process_join(file, &cmd, out_len);
1482}
1483
1484static ssize_t ucma_leave_multicast(struct ucma_file *file,
1485                                    const char __user *inbuf,
1486                                    int in_len, int out_len)
1487{
1488        struct rdma_ucm_destroy_id cmd;
1489        struct rdma_ucm_destroy_id_resp resp;
1490        struct ucma_multicast *mc;
1491        int ret = 0;
1492
1493        if (out_len < sizeof(resp))
1494                return -ENOSPC;
1495
1496        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1497                return -EFAULT;
1498
1499        xa_lock(&multicast_table);
1500        mc = xa_load(&multicast_table, cmd.id);
1501        if (!mc)
1502                mc = ERR_PTR(-ENOENT);
1503        else if (mc->ctx->file != file)
1504                mc = ERR_PTR(-EINVAL);
1505        else if (!atomic_inc_not_zero(&mc->ctx->ref))
1506                mc = ERR_PTR(-ENXIO);
1507        else
1508                __xa_erase(&multicast_table, mc->id);
1509        xa_unlock(&multicast_table);
1510
1511        if (IS_ERR(mc)) {
1512                ret = PTR_ERR(mc);
1513                goto out;
1514        }
1515
1516        rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr);
1517        mutex_lock(&mc->ctx->file->mut);
1518        ucma_cleanup_mc_events(mc);
1519        list_del(&mc->list);
1520        mutex_unlock(&mc->ctx->file->mut);
1521
1522        ucma_put_ctx(mc->ctx);
1523        resp.events_reported = mc->events_reported;
1524        kfree(mc);
1525
1526        if (copy_to_user(u64_to_user_ptr(cmd.response),
1527                         &resp, sizeof(resp)))
1528                ret = -EFAULT;
1529out:
1530        return ret;
1531}
1532
1533static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2)
1534{
1535        /* Acquire mutex's based on pointer comparison to prevent deadlock. */
1536        if (file1 < file2) {
1537                mutex_lock(&file1->mut);
1538                mutex_lock_nested(&file2->mut, SINGLE_DEPTH_NESTING);
1539        } else {
1540                mutex_lock(&file2->mut);
1541                mutex_lock_nested(&file1->mut, SINGLE_DEPTH_NESTING);
1542        }
1543}
1544
1545static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2)
1546{
1547        if (file1 < file2) {
1548                mutex_unlock(&file2->mut);
1549                mutex_unlock(&file1->mut);
1550        } else {
1551                mutex_unlock(&file1->mut);
1552                mutex_unlock(&file2->mut);
1553        }
1554}
1555
1556static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file)
1557{
1558        struct ucma_event *uevent, *tmp;
1559
1560        list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list)
1561                if (uevent->ctx == ctx)
1562                        list_move_tail(&uevent->list, &file->event_list);
1563}
1564
1565static ssize_t ucma_migrate_id(struct ucma_file *new_file,
1566                               const char __user *inbuf,
1567                               int in_len, int out_len)
1568{
1569        struct rdma_ucm_migrate_id cmd;
1570        struct rdma_ucm_migrate_resp resp;
1571        struct ucma_context *ctx;
1572        struct fd f;
1573        struct ucma_file *cur_file;
1574        int ret = 0;
1575
1576        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1577                return -EFAULT;
1578
1579        /* Get current fd to protect against it being closed */
1580        f = fdget(cmd.fd);
1581        if (!f.file)
1582                return -ENOENT;
1583        if (f.file->f_op != &ucma_fops) {
1584                ret = -EINVAL;
1585                goto file_put;
1586        }
1587
1588        /* Validate current fd and prevent destruction of id. */
1589        ctx = ucma_get_ctx(f.file->private_data, cmd.id);
1590        if (IS_ERR(ctx)) {
1591                ret = PTR_ERR(ctx);
1592                goto file_put;
1593        }
1594
1595        cur_file = ctx->file;
1596        if (cur_file == new_file) {
1597                resp.events_reported = ctx->events_reported;
1598                goto response;
1599        }
1600
1601        /*
1602         * Migrate events between fd's, maintaining order, and avoiding new
1603         * events being added before existing events.
1604         */
1605        ucma_lock_files(cur_file, new_file);
1606        xa_lock(&ctx_table);
1607
1608        list_move_tail(&ctx->list, &new_file->ctx_list);
1609        ucma_move_events(ctx, new_file);
1610        ctx->file = new_file;
1611        resp.events_reported = ctx->events_reported;
1612
1613        xa_unlock(&ctx_table);
1614        ucma_unlock_files(cur_file, new_file);
1615
1616response:
1617        if (copy_to_user(u64_to_user_ptr(cmd.response),
1618                         &resp, sizeof(resp)))
1619                ret = -EFAULT;
1620
1621        ucma_put_ctx(ctx);
1622file_put:
1623        fdput(f);
1624        return ret;
1625}
1626
1627static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
1628                                   const char __user *inbuf,
1629                                   int in_len, int out_len) = {
1630        [RDMA_USER_CM_CMD_CREATE_ID]     = ucma_create_id,
1631        [RDMA_USER_CM_CMD_DESTROY_ID]    = ucma_destroy_id,
1632        [RDMA_USER_CM_CMD_BIND_IP]       = ucma_bind_ip,
1633        [RDMA_USER_CM_CMD_RESOLVE_IP]    = ucma_resolve_ip,
1634        [RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route,
1635        [RDMA_USER_CM_CMD_QUERY_ROUTE]   = ucma_query_route,
1636        [RDMA_USER_CM_CMD_CONNECT]       = ucma_connect,
1637        [RDMA_USER_CM_CMD_LISTEN]        = ucma_listen,
1638        [RDMA_USER_CM_CMD_ACCEPT]        = ucma_accept,
1639        [RDMA_USER_CM_CMD_REJECT]        = ucma_reject,
1640        [RDMA_USER_CM_CMD_DISCONNECT]    = ucma_disconnect,
1641        [RDMA_USER_CM_CMD_INIT_QP_ATTR]  = ucma_init_qp_attr,
1642        [RDMA_USER_CM_CMD_GET_EVENT]     = ucma_get_event,
1643        [RDMA_USER_CM_CMD_GET_OPTION]    = NULL,
1644        [RDMA_USER_CM_CMD_SET_OPTION]    = ucma_set_option,
1645        [RDMA_USER_CM_CMD_NOTIFY]        = ucma_notify,
1646        [RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast,
1647        [RDMA_USER_CM_CMD_LEAVE_MCAST]   = ucma_leave_multicast,
1648        [RDMA_USER_CM_CMD_MIGRATE_ID]    = ucma_migrate_id,
1649        [RDMA_USER_CM_CMD_QUERY]         = ucma_query,
1650        [RDMA_USER_CM_CMD_BIND]          = ucma_bind,
1651        [RDMA_USER_CM_CMD_RESOLVE_ADDR]  = ucma_resolve_addr,
1652        [RDMA_USER_CM_CMD_JOIN_MCAST]    = ucma_join_multicast
1653};
1654
1655static ssize_t ucma_write(struct file *filp, const char __user *buf,
1656                          size_t len, loff_t *pos)
1657{
1658        struct ucma_file *file = filp->private_data;
1659        struct rdma_ucm_cmd_hdr hdr;
1660        ssize_t ret;
1661
1662        if (!ib_safe_file_access(filp)) {
1663                pr_err_once("ucma_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
1664                            task_tgid_vnr(current), current->comm);
1665                return -EACCES;
1666        }
1667
1668        if (len < sizeof(hdr))
1669                return -EINVAL;
1670
1671        if (copy_from_user(&hdr, buf, sizeof(hdr)))
1672                return -EFAULT;
1673
1674        if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table))
1675                return -EINVAL;
1676        hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucma_cmd_table));
1677
1678        if (hdr.in + sizeof(hdr) > len)
1679                return -EINVAL;
1680
1681        if (!ucma_cmd_table[hdr.cmd])
1682                return -ENOSYS;
1683
1684        ret = ucma_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out);
1685        if (!ret)
1686                ret = len;
1687
1688        return ret;
1689}
1690
1691static __poll_t ucma_poll(struct file *filp, struct poll_table_struct *wait)
1692{
1693        struct ucma_file *file = filp->private_data;
1694        __poll_t mask = 0;
1695
1696        poll_wait(filp, &file->poll_wait, wait);
1697
1698        if (!list_empty(&file->event_list))
1699                mask = EPOLLIN | EPOLLRDNORM;
1700
1701        return mask;
1702}
1703
1704/*
1705 * ucma_open() does not need the BKL:
1706 *
1707 *  - no global state is referred to;
1708 *  - there is no ioctl method to race against;
1709 *  - no further module initialization is required for open to work
1710 *    after the device is registered.
1711 */
1712static int ucma_open(struct inode *inode, struct file *filp)
1713{
1714        struct ucma_file *file;
1715
1716        file = kmalloc(sizeof *file, GFP_KERNEL);
1717        if (!file)
1718                return -ENOMEM;
1719
1720        file->close_wq = alloc_ordered_workqueue("ucma_close_id",
1721                                                 WQ_MEM_RECLAIM);
1722        if (!file->close_wq) {
1723                kfree(file);
1724                return -ENOMEM;
1725        }
1726
1727        INIT_LIST_HEAD(&file->event_list);
1728        INIT_LIST_HEAD(&file->ctx_list);
1729        init_waitqueue_head(&file->poll_wait);
1730        mutex_init(&file->mut);
1731
1732        filp->private_data = file;
1733        file->filp = filp;
1734
1735        return stream_open(inode, filp);
1736}
1737
1738static int ucma_close(struct inode *inode, struct file *filp)
1739{
1740        struct ucma_file *file = filp->private_data;
1741        struct ucma_context *ctx, *tmp;
1742
1743        mutex_lock(&file->mut);
1744        list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) {
1745                ctx->destroying = 1;
1746                mutex_unlock(&file->mut);
1747
1748                xa_erase(&ctx_table, ctx->id);
1749                flush_workqueue(file->close_wq);
1750                /* At that step once ctx was marked as destroying and workqueue
1751                 * was flushed we are safe from any inflights handlers that
1752                 * might put other closing task.
1753                 */
1754                xa_lock(&ctx_table);
1755                if (!ctx->closing) {
1756                        xa_unlock(&ctx_table);
1757                        ucma_put_ctx(ctx);
1758                        wait_for_completion(&ctx->comp);
1759                        /* rdma_destroy_id ensures that no event handlers are
1760                         * inflight for that id before releasing it.
1761                         */
1762                        rdma_destroy_id(ctx->cm_id);
1763                } else {
1764                        xa_unlock(&ctx_table);
1765                }
1766
1767                ucma_free_ctx(ctx);
1768                mutex_lock(&file->mut);
1769        }
1770        mutex_unlock(&file->mut);
1771        destroy_workqueue(file->close_wq);
1772        kfree(file);
1773        return 0;
1774}
1775
1776static const struct file_operations ucma_fops = {
1777        .owner   = THIS_MODULE,
1778        .open    = ucma_open,
1779        .release = ucma_close,
1780        .write   = ucma_write,
1781        .poll    = ucma_poll,
1782        .llseek  = no_llseek,
1783};
1784
1785static struct miscdevice ucma_misc = {
1786        .minor          = MISC_DYNAMIC_MINOR,
1787        .name           = "rdma_cm",
1788        .nodename       = "infiniband/rdma_cm",
1789        .mode           = 0666,
1790        .fops           = &ucma_fops,
1791};
1792
1793static int ucma_get_global_nl_info(struct ib_client_nl_info *res)
1794{
1795        res->abi = RDMA_USER_CM_ABI_VERSION;
1796        res->cdev = ucma_misc.this_device;
1797        return 0;
1798}
1799
1800static struct ib_client rdma_cma_client = {
1801        .name = "rdma_cm",
1802        .get_global_nl_info = ucma_get_global_nl_info,
1803};
1804MODULE_ALIAS_RDMA_CLIENT("rdma_cm");
1805
1806static ssize_t show_abi_version(struct device *dev,
1807                                struct device_attribute *attr,
1808                                char *buf)
1809{
1810        return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION);
1811}
1812static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
1813
1814static int __init ucma_init(void)
1815{
1816        int ret;
1817
1818        ret = misc_register(&ucma_misc);
1819        if (ret)
1820                return ret;
1821
1822        ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version);
1823        if (ret) {
1824                pr_err("rdma_ucm: couldn't create abi_version attr\n");
1825                goto err1;
1826        }
1827
1828        ucma_ctl_table_hdr = register_net_sysctl(&init_net, "net/rdma_ucm", ucma_ctl_table);
1829        if (!ucma_ctl_table_hdr) {
1830                pr_err("rdma_ucm: couldn't register sysctl paths\n");
1831                ret = -ENOMEM;
1832                goto err2;
1833        }
1834
1835        ret = ib_register_client(&rdma_cma_client);
1836        if (ret)
1837                goto err3;
1838
1839        return 0;
1840err3:
1841        unregister_net_sysctl_table(ucma_ctl_table_hdr);
1842err2:
1843        device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);
1844err1:
1845        misc_deregister(&ucma_misc);
1846        return ret;
1847}
1848
1849static void __exit ucma_cleanup(void)
1850{
1851        ib_unregister_client(&rdma_cma_client);
1852        unregister_net_sysctl_table(ucma_ctl_table_hdr);
1853        device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);
1854        misc_deregister(&ucma_misc);
1855}
1856
1857module_init(ucma_init);
1858module_exit(ucma_cleanup);
1859