linux/drivers/infiniband/core/ucma.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *      copyright notice, this list of conditions and the following
  16 *      disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *      copyright notice, this list of conditions and the following
  20 *      disclaimer in the documentation and/or other materials
  21 *      provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include <linux/completion.h>
  34#include <linux/file.h>
  35#include <linux/mutex.h>
  36#include <linux/poll.h>
  37#include <linux/sched.h>
  38#include <linux/idr.h>
  39#include <linux/in.h>
  40#include <linux/in6.h>
  41#include <linux/miscdevice.h>
  42#include <linux/slab.h>
  43#include <linux/sysctl.h>
  44#include <linux/module.h>
  45#include <linux/nsproxy.h>
  46
  47#include <linux/nospec.h>
  48
  49#include <rdma/rdma_user_cm.h>
  50#include <rdma/ib_marshall.h>
  51#include <rdma/rdma_cm.h>
  52#include <rdma/rdma_cm_ib.h>
  53#include <rdma/ib_addr.h>
  54#include <rdma/ib.h>
  55
  56MODULE_AUTHOR("Sean Hefty");
  57MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
  58MODULE_LICENSE("Dual BSD/GPL");
  59
  60static unsigned int max_backlog = 1024;
  61
  62static struct ctl_table_header *ucma_ctl_table_hdr;
  63static struct ctl_table ucma_ctl_table[] = {
  64        {
  65                .procname       = "max_backlog",
  66                .data           = &max_backlog,
  67                .maxlen         = sizeof max_backlog,
  68                .mode           = 0644,
  69                .proc_handler   = proc_dointvec,
  70        },
  71        { }
  72};
  73
  74struct ucma_file {
  75        struct mutex            mut;
  76        struct file             *filp;
  77        struct list_head        ctx_list;
  78        struct list_head        event_list;
  79        wait_queue_head_t       poll_wait;
  80        struct workqueue_struct *close_wq;
  81};
  82
  83struct ucma_context {
  84        int                     id;
  85        struct completion       comp;
  86        atomic_t                ref;
  87        int                     events_reported;
  88        int                     backlog;
  89
  90        struct ucma_file        *file;
  91        struct rdma_cm_id       *cm_id;
  92        u64                     uid;
  93
  94        struct list_head        list;
  95        struct list_head        mc_list;
  96        /* mark that device is in process of destroying the internal HW
  97         * resources, protected by the global mut
  98         */
  99        int                     closing;
 100        /* sync between removal event and id destroy, protected by file mut */
 101        int                     destroying;
 102        struct work_struct      close_work;
 103};
 104
 105struct ucma_multicast {
 106        struct ucma_context     *ctx;
 107        int                     id;
 108        int                     events_reported;
 109
 110        u64                     uid;
 111        u8                      join_state;
 112        struct list_head        list;
 113        struct sockaddr_storage addr;
 114};
 115
 116struct ucma_event {
 117        struct ucma_context     *ctx;
 118        struct ucma_multicast   *mc;
 119        struct list_head        list;
 120        struct rdma_cm_id       *cm_id;
 121        struct rdma_ucm_event_resp resp;
 122        struct work_struct      close_work;
 123};
 124
 125static DEFINE_MUTEX(mut);
 126static DEFINE_IDR(ctx_idr);
 127static DEFINE_IDR(multicast_idr);
 128
 129static const struct file_operations ucma_fops;
 130
 131static inline struct ucma_context *_ucma_find_context(int id,
 132                                                      struct ucma_file *file)
 133{
 134        struct ucma_context *ctx;
 135
 136        ctx = idr_find(&ctx_idr, id);
 137        if (!ctx)
 138                ctx = ERR_PTR(-ENOENT);
 139        else if (ctx->file != file || !ctx->cm_id)
 140                ctx = ERR_PTR(-EINVAL);
 141        return ctx;
 142}
 143
 144static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id)
 145{
 146        struct ucma_context *ctx;
 147
 148        mutex_lock(&mut);
 149        ctx = _ucma_find_context(id, file);
 150        if (!IS_ERR(ctx)) {
 151                if (ctx->closing)
 152                        ctx = ERR_PTR(-EIO);
 153                else
 154                        atomic_inc(&ctx->ref);
 155        }
 156        mutex_unlock(&mut);
 157        return ctx;
 158}
 159
 160static void ucma_put_ctx(struct ucma_context *ctx)
 161{
 162        if (atomic_dec_and_test(&ctx->ref))
 163                complete(&ctx->comp);
 164}
 165
 166/*
 167 * Same as ucm_get_ctx but requires that ->cm_id->device is valid, eg that the
 168 * CM_ID is bound.
 169 */
 170static struct ucma_context *ucma_get_ctx_dev(struct ucma_file *file, int id)
 171{
 172        struct ucma_context *ctx = ucma_get_ctx(file, id);
 173
 174        if (IS_ERR(ctx))
 175                return ctx;
 176        if (!ctx->cm_id->device) {
 177                ucma_put_ctx(ctx);
 178                return ERR_PTR(-EINVAL);
 179        }
 180        return ctx;
 181}
 182
 183static void ucma_close_event_id(struct work_struct *work)
 184{
 185        struct ucma_event *uevent_close =  container_of(work, struct ucma_event, close_work);
 186
 187        rdma_destroy_id(uevent_close->cm_id);
 188        kfree(uevent_close);
 189}
 190
 191static void ucma_close_id(struct work_struct *work)
 192{
 193        struct ucma_context *ctx =  container_of(work, struct ucma_context, close_work);
 194
 195        /* once all inflight tasks are finished, we close all underlying
 196         * resources. The context is still alive till its explicit destryoing
 197         * by its creator.
 198         */
 199        ucma_put_ctx(ctx);
 200        wait_for_completion(&ctx->comp);
 201        /* No new events will be generated after destroying the id. */
 202        rdma_destroy_id(ctx->cm_id);
 203}
 204
 205static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
 206{
 207        struct ucma_context *ctx;
 208
 209        ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 210        if (!ctx)
 211                return NULL;
 212
 213        INIT_WORK(&ctx->close_work, ucma_close_id);
 214        atomic_set(&ctx->ref, 1);
 215        init_completion(&ctx->comp);
 216        INIT_LIST_HEAD(&ctx->mc_list);
 217        ctx->file = file;
 218
 219        mutex_lock(&mut);
 220        ctx->id = idr_alloc(&ctx_idr, ctx, 0, 0, GFP_KERNEL);
 221        mutex_unlock(&mut);
 222        if (ctx->id < 0)
 223                goto error;
 224
 225        list_add_tail(&ctx->list, &file->ctx_list);
 226        return ctx;
 227
 228error:
 229        kfree(ctx);
 230        return NULL;
 231}
 232
 233static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
 234{
 235        struct ucma_multicast *mc;
 236
 237        mc = kzalloc(sizeof(*mc), GFP_KERNEL);
 238        if (!mc)
 239                return NULL;
 240
 241        mutex_lock(&mut);
 242        mc->id = idr_alloc(&multicast_idr, NULL, 0, 0, GFP_KERNEL);
 243        mutex_unlock(&mut);
 244        if (mc->id < 0)
 245                goto error;
 246
 247        mc->ctx = ctx;
 248        list_add_tail(&mc->list, &ctx->mc_list);
 249        return mc;
 250
 251error:
 252        kfree(mc);
 253        return NULL;
 254}
 255
 256static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
 257                                 struct rdma_conn_param *src)
 258{
 259        if (src->private_data_len)
 260                memcpy(dst->private_data, src->private_data,
 261                       src->private_data_len);
 262        dst->private_data_len = src->private_data_len;
 263        dst->responder_resources =src->responder_resources;
 264        dst->initiator_depth = src->initiator_depth;
 265        dst->flow_control = src->flow_control;
 266        dst->retry_count = src->retry_count;
 267        dst->rnr_retry_count = src->rnr_retry_count;
 268        dst->srq = src->srq;
 269        dst->qp_num = src->qp_num;
 270}
 271
 272static void ucma_copy_ud_event(struct ib_device *device,
 273                               struct rdma_ucm_ud_param *dst,
 274                               struct rdma_ud_param *src)
 275{
 276        if (src->private_data_len)
 277                memcpy(dst->private_data, src->private_data,
 278                       src->private_data_len);
 279        dst->private_data_len = src->private_data_len;
 280        ib_copy_ah_attr_to_user(device, &dst->ah_attr, &src->ah_attr);
 281        dst->qp_num = src->qp_num;
 282        dst->qkey = src->qkey;
 283}
 284
 285static void ucma_set_event_context(struct ucma_context *ctx,
 286                                   struct rdma_cm_event *event,
 287                                   struct ucma_event *uevent)
 288{
 289        uevent->ctx = ctx;
 290        switch (event->event) {
 291        case RDMA_CM_EVENT_MULTICAST_JOIN:
 292        case RDMA_CM_EVENT_MULTICAST_ERROR:
 293                uevent->mc = (struct ucma_multicast *)
 294                             event->param.ud.private_data;
 295                uevent->resp.uid = uevent->mc->uid;
 296                uevent->resp.id = uevent->mc->id;
 297                break;
 298        default:
 299                uevent->resp.uid = ctx->uid;
 300                uevent->resp.id = ctx->id;
 301                break;
 302        }
 303}
 304
 305/* Called with file->mut locked for the relevant context. */
 306static void ucma_removal_event_handler(struct rdma_cm_id *cm_id)
 307{
 308        struct ucma_context *ctx = cm_id->context;
 309        struct ucma_event *con_req_eve;
 310        int event_found = 0;
 311
 312        if (ctx->destroying)
 313                return;
 314
 315        /* only if context is pointing to cm_id that it owns it and can be
 316         * queued to be closed, otherwise that cm_id is an inflight one that
 317         * is part of that context event list pending to be detached and
 318         * reattached to its new context as part of ucma_get_event,
 319         * handled separately below.
 320         */
 321        if (ctx->cm_id == cm_id) {
 322                mutex_lock(&mut);
 323                ctx->closing = 1;
 324                mutex_unlock(&mut);
 325                queue_work(ctx->file->close_wq, &ctx->close_work);
 326                return;
 327        }
 328
 329        list_for_each_entry(con_req_eve, &ctx->file->event_list, list) {
 330                if (con_req_eve->cm_id == cm_id &&
 331                    con_req_eve->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
 332                        list_del(&con_req_eve->list);
 333                        INIT_WORK(&con_req_eve->close_work, ucma_close_event_id);
 334                        queue_work(ctx->file->close_wq, &con_req_eve->close_work);
 335                        event_found = 1;
 336                        break;
 337                }
 338        }
 339        if (!event_found)
 340                pr_err("ucma_removal_event_handler: warning: connect request event wasn't found\n");
 341}
 342
 343static int ucma_event_handler(struct rdma_cm_id *cm_id,
 344                              struct rdma_cm_event *event)
 345{
 346        struct ucma_event *uevent;
 347        struct ucma_context *ctx = cm_id->context;
 348        int ret = 0;
 349
 350        uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
 351        if (!uevent)
 352                return event->event == RDMA_CM_EVENT_CONNECT_REQUEST;
 353
 354        mutex_lock(&ctx->file->mut);
 355        uevent->cm_id = cm_id;
 356        ucma_set_event_context(ctx, event, uevent);
 357        uevent->resp.event = event->event;
 358        uevent->resp.status = event->status;
 359        if (cm_id->qp_type == IB_QPT_UD)
 360                ucma_copy_ud_event(cm_id->device, &uevent->resp.param.ud,
 361                                   &event->param.ud);
 362        else
 363                ucma_copy_conn_event(&uevent->resp.param.conn,
 364                                     &event->param.conn);
 365
 366        if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {
 367                if (!ctx->backlog) {
 368                        ret = -ENOMEM;
 369                        kfree(uevent);
 370                        goto out;
 371                }
 372                ctx->backlog--;
 373        } else if (!ctx->uid || ctx->cm_id != cm_id) {
 374                /*
 375                 * We ignore events for new connections until userspace has set
 376                 * their context.  This can only happen if an error occurs on a
 377                 * new connection before the user accepts it.  This is okay,
 378                 * since the accept will just fail later. However, we do need
 379                 * to release the underlying HW resources in case of a device
 380                 * removal event.
 381                 */
 382                if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
 383                        ucma_removal_event_handler(cm_id);
 384
 385                kfree(uevent);
 386                goto out;
 387        }
 388
 389        list_add_tail(&uevent->list, &ctx->file->event_list);
 390        wake_up_interruptible(&ctx->file->poll_wait);
 391        if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
 392                ucma_removal_event_handler(cm_id);
 393out:
 394        mutex_unlock(&ctx->file->mut);
 395        return ret;
 396}
 397
 398static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
 399                              int in_len, int out_len)
 400{
 401        struct ucma_context *ctx;
 402        struct rdma_ucm_get_event cmd;
 403        struct ucma_event *uevent;
 404        int ret = 0;
 405
 406        /*
 407         * Old 32 bit user space does not send the 4 byte padding in the
 408         * reserved field. We don't care, allow it to keep working.
 409         */
 410        if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved))
 411                return -ENOSPC;
 412
 413        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 414                return -EFAULT;
 415
 416        mutex_lock(&file->mut);
 417        while (list_empty(&file->event_list)) {
 418                mutex_unlock(&file->mut);
 419
 420                if (file->filp->f_flags & O_NONBLOCK)
 421                        return -EAGAIN;
 422
 423                if (wait_event_interruptible(file->poll_wait,
 424                                             !list_empty(&file->event_list)))
 425                        return -ERESTARTSYS;
 426
 427                mutex_lock(&file->mut);
 428        }
 429
 430        uevent = list_entry(file->event_list.next, struct ucma_event, list);
 431
 432        if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
 433                ctx = ucma_alloc_ctx(file);
 434                if (!ctx) {
 435                        ret = -ENOMEM;
 436                        goto done;
 437                }
 438                uevent->ctx->backlog++;
 439                ctx->cm_id = uevent->cm_id;
 440                ctx->cm_id->context = ctx;
 441                uevent->resp.id = ctx->id;
 442        }
 443
 444        if (copy_to_user(u64_to_user_ptr(cmd.response),
 445                         &uevent->resp,
 446                         min_t(size_t, out_len, sizeof(uevent->resp)))) {
 447                ret = -EFAULT;
 448                goto done;
 449        }
 450
 451        list_del(&uevent->list);
 452        uevent->ctx->events_reported++;
 453        if (uevent->mc)
 454                uevent->mc->events_reported++;
 455        kfree(uevent);
 456done:
 457        mutex_unlock(&file->mut);
 458        return ret;
 459}
 460
 461static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type)
 462{
 463        switch (cmd->ps) {
 464        case RDMA_PS_TCP:
 465                *qp_type = IB_QPT_RC;
 466                return 0;
 467        case RDMA_PS_UDP:
 468        case RDMA_PS_IPOIB:
 469                *qp_type = IB_QPT_UD;
 470                return 0;
 471        case RDMA_PS_IB:
 472                *qp_type = cmd->qp_type;
 473                return 0;
 474        default:
 475                return -EINVAL;
 476        }
 477}
 478
 479static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
 480                              int in_len, int out_len)
 481{
 482        struct rdma_ucm_create_id cmd;
 483        struct rdma_ucm_create_id_resp resp;
 484        struct ucma_context *ctx;
 485        struct rdma_cm_id *cm_id;
 486        enum ib_qp_type qp_type;
 487        int ret;
 488
 489        if (out_len < sizeof(resp))
 490                return -ENOSPC;
 491
 492        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 493                return -EFAULT;
 494
 495        ret = ucma_get_qp_type(&cmd, &qp_type);
 496        if (ret)
 497                return ret;
 498
 499        mutex_lock(&file->mut);
 500        ctx = ucma_alloc_ctx(file);
 501        mutex_unlock(&file->mut);
 502        if (!ctx)
 503                return -ENOMEM;
 504
 505        ctx->uid = cmd.uid;
 506        cm_id = __rdma_create_id(current->nsproxy->net_ns,
 507                                 ucma_event_handler, ctx, cmd.ps, qp_type, NULL);
 508        if (IS_ERR(cm_id)) {
 509                ret = PTR_ERR(cm_id);
 510                goto err1;
 511        }
 512
 513        resp.id = ctx->id;
 514        if (copy_to_user(u64_to_user_ptr(cmd.response),
 515                         &resp, sizeof(resp))) {
 516                ret = -EFAULT;
 517                goto err2;
 518        }
 519
 520        ctx->cm_id = cm_id;
 521        return 0;
 522
 523err2:
 524        rdma_destroy_id(cm_id);
 525err1:
 526        mutex_lock(&mut);
 527        idr_remove(&ctx_idr, ctx->id);
 528        mutex_unlock(&mut);
 529        mutex_lock(&file->mut);
 530        list_del(&ctx->list);
 531        mutex_unlock(&file->mut);
 532        kfree(ctx);
 533        return ret;
 534}
 535
 536static void ucma_cleanup_multicast(struct ucma_context *ctx)
 537{
 538        struct ucma_multicast *mc, *tmp;
 539
 540        mutex_lock(&mut);
 541        list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
 542                list_del(&mc->list);
 543                idr_remove(&multicast_idr, mc->id);
 544                kfree(mc);
 545        }
 546        mutex_unlock(&mut);
 547}
 548
 549static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
 550{
 551        struct ucma_event *uevent, *tmp;
 552
 553        list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) {
 554                if (uevent->mc != mc)
 555                        continue;
 556
 557                list_del(&uevent->list);
 558                kfree(uevent);
 559        }
 560}
 561
 562/*
 563 * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At
 564 * this point, no new events will be reported from the hardware. However, we
 565 * still need to cleanup the UCMA context for this ID. Specifically, there
 566 * might be events that have not yet been consumed by the user space software.
 567 * These might include pending connect requests which we have not completed
 568 * processing.  We cannot call rdma_destroy_id while holding the lock of the
 569 * context (file->mut), as it might cause a deadlock. We therefore extract all
 570 * relevant events from the context pending events list while holding the
 571 * mutex. After that we release them as needed.
 572 */
 573static int ucma_free_ctx(struct ucma_context *ctx)
 574{
 575        int events_reported;
 576        struct ucma_event *uevent, *tmp;
 577        LIST_HEAD(list);
 578
 579
 580        ucma_cleanup_multicast(ctx);
 581
 582        /* Cleanup events not yet reported to the user. */
 583        mutex_lock(&ctx->file->mut);
 584        list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
 585                if (uevent->ctx == ctx)
 586                        list_move_tail(&uevent->list, &list);
 587        }
 588        list_del(&ctx->list);
 589        mutex_unlock(&ctx->file->mut);
 590
 591        list_for_each_entry_safe(uevent, tmp, &list, list) {
 592                list_del(&uevent->list);
 593                if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
 594                        rdma_destroy_id(uevent->cm_id);
 595                kfree(uevent);
 596        }
 597
 598        events_reported = ctx->events_reported;
 599        kfree(ctx);
 600        return events_reported;
 601}
 602
 603static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
 604                               int in_len, int out_len)
 605{
 606        struct rdma_ucm_destroy_id cmd;
 607        struct rdma_ucm_destroy_id_resp resp;
 608        struct ucma_context *ctx;
 609        int ret = 0;
 610
 611        if (out_len < sizeof(resp))
 612                return -ENOSPC;
 613
 614        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 615                return -EFAULT;
 616
 617        mutex_lock(&mut);
 618        ctx = _ucma_find_context(cmd.id, file);
 619        if (!IS_ERR(ctx))
 620                idr_remove(&ctx_idr, ctx->id);
 621        mutex_unlock(&mut);
 622
 623        if (IS_ERR(ctx))
 624                return PTR_ERR(ctx);
 625
 626        mutex_lock(&ctx->file->mut);
 627        ctx->destroying = 1;
 628        mutex_unlock(&ctx->file->mut);
 629
 630        flush_workqueue(ctx->file->close_wq);
 631        /* At this point it's guaranteed that there is no inflight
 632         * closing task */
 633        mutex_lock(&mut);
 634        if (!ctx->closing) {
 635                mutex_unlock(&mut);
 636                ucma_put_ctx(ctx);
 637                wait_for_completion(&ctx->comp);
 638                rdma_destroy_id(ctx->cm_id);
 639        } else {
 640                mutex_unlock(&mut);
 641        }
 642
 643        resp.events_reported = ucma_free_ctx(ctx);
 644        if (copy_to_user(u64_to_user_ptr(cmd.response),
 645                         &resp, sizeof(resp)))
 646                ret = -EFAULT;
 647
 648        return ret;
 649}
 650
 651static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf,
 652                              int in_len, int out_len)
 653{
 654        struct rdma_ucm_bind_ip cmd;
 655        struct ucma_context *ctx;
 656        int ret;
 657
 658        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 659                return -EFAULT;
 660
 661        if (!rdma_addr_size_in6(&cmd.addr))
 662                return -EINVAL;
 663
 664        ctx = ucma_get_ctx(file, cmd.id);
 665        if (IS_ERR(ctx))
 666                return PTR_ERR(ctx);
 667
 668        ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr);
 669        ucma_put_ctx(ctx);
 670        return ret;
 671}
 672
 673static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf,
 674                         int in_len, int out_len)
 675{
 676        struct rdma_ucm_bind cmd;
 677        struct ucma_context *ctx;
 678        int ret;
 679
 680        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 681                return -EFAULT;
 682
 683        if (cmd.reserved || !cmd.addr_size ||
 684            cmd.addr_size != rdma_addr_size_kss(&cmd.addr))
 685                return -EINVAL;
 686
 687        ctx = ucma_get_ctx(file, cmd.id);
 688        if (IS_ERR(ctx))
 689                return PTR_ERR(ctx);
 690
 691        ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr);
 692        ucma_put_ctx(ctx);
 693        return ret;
 694}
 695
 696static ssize_t ucma_resolve_ip(struct ucma_file *file,
 697                               const char __user *inbuf,
 698                               int in_len, int out_len)
 699{
 700        struct rdma_ucm_resolve_ip cmd;
 701        struct ucma_context *ctx;
 702        int ret;
 703
 704        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 705                return -EFAULT;
 706
 707        if ((cmd.src_addr.sin6_family && !rdma_addr_size_in6(&cmd.src_addr)) ||
 708            !rdma_addr_size_in6(&cmd.dst_addr))
 709                return -EINVAL;
 710
 711        ctx = ucma_get_ctx(file, cmd.id);
 712        if (IS_ERR(ctx))
 713                return PTR_ERR(ctx);
 714
 715        ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
 716                                (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms);
 717        ucma_put_ctx(ctx);
 718        return ret;
 719}
 720
 721static ssize_t ucma_resolve_addr(struct ucma_file *file,
 722                                 const char __user *inbuf,
 723                                 int in_len, int out_len)
 724{
 725        struct rdma_ucm_resolve_addr cmd;
 726        struct ucma_context *ctx;
 727        int ret;
 728
 729        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 730                return -EFAULT;
 731
 732        if (cmd.reserved ||
 733            (cmd.src_size && (cmd.src_size != rdma_addr_size_kss(&cmd.src_addr))) ||
 734            !cmd.dst_size || (cmd.dst_size != rdma_addr_size_kss(&cmd.dst_addr)))
 735                return -EINVAL;
 736
 737        ctx = ucma_get_ctx(file, cmd.id);
 738        if (IS_ERR(ctx))
 739                return PTR_ERR(ctx);
 740
 741        ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
 742                                (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms);
 743        ucma_put_ctx(ctx);
 744        return ret;
 745}
 746
 747static ssize_t ucma_resolve_route(struct ucma_file *file,
 748                                  const char __user *inbuf,
 749                                  int in_len, int out_len)
 750{
 751        struct rdma_ucm_resolve_route cmd;
 752        struct ucma_context *ctx;
 753        int ret;
 754
 755        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 756                return -EFAULT;
 757
 758        ctx = ucma_get_ctx_dev(file, cmd.id);
 759        if (IS_ERR(ctx))
 760                return PTR_ERR(ctx);
 761
 762        ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms);
 763        ucma_put_ctx(ctx);
 764        return ret;
 765}
 766
 767static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
 768                               struct rdma_route *route)
 769{
 770        struct rdma_dev_addr *dev_addr;
 771
 772        resp->num_paths = route->num_paths;
 773        switch (route->num_paths) {
 774        case 0:
 775                dev_addr = &route->addr.dev_addr;
 776                rdma_addr_get_dgid(dev_addr,
 777                                   (union ib_gid *) &resp->ib_route[0].dgid);
 778                rdma_addr_get_sgid(dev_addr,
 779                                   (union ib_gid *) &resp->ib_route[0].sgid);
 780                resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
 781                break;
 782        case 2:
 783                ib_copy_path_rec_to_user(&resp->ib_route[1],
 784                                         &route->path_rec[1]);
 785                /* fall through */
 786        case 1:
 787                ib_copy_path_rec_to_user(&resp->ib_route[0],
 788                                         &route->path_rec[0]);
 789                break;
 790        default:
 791                break;
 792        }
 793}
 794
 795static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
 796                                 struct rdma_route *route)
 797{
 798
 799        resp->num_paths = route->num_paths;
 800        switch (route->num_paths) {
 801        case 0:
 802                rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr,
 803                            (union ib_gid *)&resp->ib_route[0].dgid);
 804                rdma_ip2gid((struct sockaddr *)&route->addr.src_addr,
 805                            (union ib_gid *)&resp->ib_route[0].sgid);
 806                resp->ib_route[0].pkey = cpu_to_be16(0xffff);
 807                break;
 808        case 2:
 809                ib_copy_path_rec_to_user(&resp->ib_route[1],
 810                                         &route->path_rec[1]);
 811                /* fall through */
 812        case 1:
 813                ib_copy_path_rec_to_user(&resp->ib_route[0],
 814                                         &route->path_rec[0]);
 815                break;
 816        default:
 817                break;
 818        }
 819}
 820
 821static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp *resp,
 822                               struct rdma_route *route)
 823{
 824        struct rdma_dev_addr *dev_addr;
 825
 826        dev_addr = &route->addr.dev_addr;
 827        rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid);
 828        rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid);
 829}
 830
 831static ssize_t ucma_query_route(struct ucma_file *file,
 832                                const char __user *inbuf,
 833                                int in_len, int out_len)
 834{
 835        struct rdma_ucm_query cmd;
 836        struct rdma_ucm_query_route_resp resp;
 837        struct ucma_context *ctx;
 838        struct sockaddr *addr;
 839        int ret = 0;
 840
 841        if (out_len < sizeof(resp))
 842                return -ENOSPC;
 843
 844        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 845                return -EFAULT;
 846
 847        ctx = ucma_get_ctx(file, cmd.id);
 848        if (IS_ERR(ctx))
 849                return PTR_ERR(ctx);
 850
 851        memset(&resp, 0, sizeof resp);
 852        addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr;
 853        memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ?
 854                                     sizeof(struct sockaddr_in) :
 855                                     sizeof(struct sockaddr_in6));
 856        addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr;
 857        memcpy(&resp.dst_addr, addr, addr->sa_family == AF_INET ?
 858                                     sizeof(struct sockaddr_in) :
 859                                     sizeof(struct sockaddr_in6));
 860        if (!ctx->cm_id->device)
 861                goto out;
 862
 863        resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid;
 864        resp.port_num = ctx->cm_id->port_num;
 865
 866        if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num))
 867                ucma_copy_ib_route(&resp, &ctx->cm_id->route);
 868        else if (rdma_protocol_roce(ctx->cm_id->device, ctx->cm_id->port_num))
 869                ucma_copy_iboe_route(&resp, &ctx->cm_id->route);
 870        else if (rdma_protocol_iwarp(ctx->cm_id->device, ctx->cm_id->port_num))
 871                ucma_copy_iw_route(&resp, &ctx->cm_id->route);
 872
 873out:
 874        if (copy_to_user(u64_to_user_ptr(cmd.response),
 875                         &resp, sizeof(resp)))
 876                ret = -EFAULT;
 877
 878        ucma_put_ctx(ctx);
 879        return ret;
 880}
 881
 882static void ucma_query_device_addr(struct rdma_cm_id *cm_id,
 883                                   struct rdma_ucm_query_addr_resp *resp)
 884{
 885        if (!cm_id->device)
 886                return;
 887
 888        resp->node_guid = (__force __u64) cm_id->device->node_guid;
 889        resp->port_num = cm_id->port_num;
 890        resp->pkey = (__force __u16) cpu_to_be16(
 891                     ib_addr_get_pkey(&cm_id->route.addr.dev_addr));
 892}
 893
 894static ssize_t ucma_query_addr(struct ucma_context *ctx,
 895                               void __user *response, int out_len)
 896{
 897        struct rdma_ucm_query_addr_resp resp;
 898        struct sockaddr *addr;
 899        int ret = 0;
 900
 901        if (out_len < sizeof(resp))
 902                return -ENOSPC;
 903
 904        memset(&resp, 0, sizeof resp);
 905
 906        addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr;
 907        resp.src_size = rdma_addr_size(addr);
 908        memcpy(&resp.src_addr, addr, resp.src_size);
 909
 910        addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr;
 911        resp.dst_size = rdma_addr_size(addr);
 912        memcpy(&resp.dst_addr, addr, resp.dst_size);
 913
 914        ucma_query_device_addr(ctx->cm_id, &resp);
 915
 916        if (copy_to_user(response, &resp, sizeof(resp)))
 917                ret = -EFAULT;
 918
 919        return ret;
 920}
 921
 922static ssize_t ucma_query_path(struct ucma_context *ctx,
 923                               void __user *response, int out_len)
 924{
 925        struct rdma_ucm_query_path_resp *resp;
 926        int i, ret = 0;
 927
 928        if (out_len < sizeof(*resp))
 929                return -ENOSPC;
 930
 931        resp = kzalloc(out_len, GFP_KERNEL);
 932        if (!resp)
 933                return -ENOMEM;
 934
 935        resp->num_paths = ctx->cm_id->route.num_paths;
 936        for (i = 0, out_len -= sizeof(*resp);
 937             i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data);
 938             i++, out_len -= sizeof(struct ib_path_rec_data)) {
 939                struct sa_path_rec *rec = &ctx->cm_id->route.path_rec[i];
 940
 941                resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY |
 942                                           IB_PATH_BIDIRECTIONAL;
 943                if (rec->rec_type == SA_PATH_REC_TYPE_OPA) {
 944                        struct sa_path_rec ib;
 945
 946                        sa_convert_path_opa_to_ib(&ib, rec);
 947                        ib_sa_pack_path(&ib, &resp->path_data[i].path_rec);
 948
 949                } else {
 950                        ib_sa_pack_path(rec, &resp->path_data[i].path_rec);
 951                }
 952        }
 953
 954        if (copy_to_user(response, resp,
 955                         sizeof(*resp) + (i * sizeof(struct ib_path_rec_data))))
 956                ret = -EFAULT;
 957
 958        kfree(resp);
 959        return ret;
 960}
 961
 962static ssize_t ucma_query_gid(struct ucma_context *ctx,
 963                              void __user *response, int out_len)
 964{
 965        struct rdma_ucm_query_addr_resp resp;
 966        struct sockaddr_ib *addr;
 967        int ret = 0;
 968
 969        if (out_len < sizeof(resp))
 970                return -ENOSPC;
 971
 972        memset(&resp, 0, sizeof resp);
 973
 974        ucma_query_device_addr(ctx->cm_id, &resp);
 975
 976        addr = (struct sockaddr_ib *) &resp.src_addr;
 977        resp.src_size = sizeof(*addr);
 978        if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) {
 979                memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size);
 980        } else {
 981                addr->sib_family = AF_IB;
 982                addr->sib_pkey = (__force __be16) resp.pkey;
 983                rdma_read_gids(ctx->cm_id, (union ib_gid *)&addr->sib_addr,
 984                               NULL);
 985                addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
 986                                                    &ctx->cm_id->route.addr.src_addr);
 987        }
 988
 989        addr = (struct sockaddr_ib *) &resp.dst_addr;
 990        resp.dst_size = sizeof(*addr);
 991        if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) {
 992                memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size);
 993        } else {
 994                addr->sib_family = AF_IB;
 995                addr->sib_pkey = (__force __be16) resp.pkey;
 996                rdma_read_gids(ctx->cm_id, NULL,
 997                               (union ib_gid *)&addr->sib_addr);
 998                addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
 999                                                    &ctx->cm_id->route.addr.dst_addr);
1000        }
1001
1002        if (copy_to_user(response, &resp, sizeof(resp)))
1003                ret = -EFAULT;
1004
1005        return ret;
1006}
1007
1008static ssize_t ucma_query(struct ucma_file *file,
1009                          const char __user *inbuf,
1010                          int in_len, int out_len)
1011{
1012        struct rdma_ucm_query cmd;
1013        struct ucma_context *ctx;
1014        void __user *response;
1015        int ret;
1016
1017        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1018                return -EFAULT;
1019
1020        response = u64_to_user_ptr(cmd.response);
1021        ctx = ucma_get_ctx(file, cmd.id);
1022        if (IS_ERR(ctx))
1023                return PTR_ERR(ctx);
1024
1025        switch (cmd.option) {
1026        case RDMA_USER_CM_QUERY_ADDR:
1027                ret = ucma_query_addr(ctx, response, out_len);
1028                break;
1029        case RDMA_USER_CM_QUERY_PATH:
1030                ret = ucma_query_path(ctx, response, out_len);
1031                break;
1032        case RDMA_USER_CM_QUERY_GID:
1033                ret = ucma_query_gid(ctx, response, out_len);
1034                break;
1035        default:
1036                ret = -ENOSYS;
1037                break;
1038        }
1039
1040        ucma_put_ctx(ctx);
1041        return ret;
1042}
1043
1044static void ucma_copy_conn_param(struct rdma_cm_id *id,
1045                                 struct rdma_conn_param *dst,
1046                                 struct rdma_ucm_conn_param *src)
1047{
1048        dst->private_data = src->private_data;
1049        dst->private_data_len = src->private_data_len;
1050        dst->responder_resources =src->responder_resources;
1051        dst->initiator_depth = src->initiator_depth;
1052        dst->flow_control = src->flow_control;
1053        dst->retry_count = src->retry_count;
1054        dst->rnr_retry_count = src->rnr_retry_count;
1055        dst->srq = src->srq;
1056        dst->qp_num = src->qp_num;
1057        dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0;
1058}
1059
1060static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
1061                            int in_len, int out_len)
1062{
1063        struct rdma_ucm_connect cmd;
1064        struct rdma_conn_param conn_param;
1065        struct ucma_context *ctx;
1066        int ret;
1067
1068        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1069                return -EFAULT;
1070
1071        if (!cmd.conn_param.valid)
1072                return -EINVAL;
1073
1074        ctx = ucma_get_ctx_dev(file, cmd.id);
1075        if (IS_ERR(ctx))
1076                return PTR_ERR(ctx);
1077
1078        ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
1079        ret = rdma_connect(ctx->cm_id, &conn_param);
1080        ucma_put_ctx(ctx);
1081        return ret;
1082}
1083
1084static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf,
1085                           int in_len, int out_len)
1086{
1087        struct rdma_ucm_listen cmd;
1088        struct ucma_context *ctx;
1089        int ret;
1090
1091        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1092                return -EFAULT;
1093
1094        ctx = ucma_get_ctx(file, cmd.id);
1095        if (IS_ERR(ctx))
1096                return PTR_ERR(ctx);
1097
1098        ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ?
1099                       cmd.backlog : max_backlog;
1100        ret = rdma_listen(ctx->cm_id, ctx->backlog);
1101        ucma_put_ctx(ctx);
1102        return ret;
1103}
1104
1105static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
1106                           int in_len, int out_len)
1107{
1108        struct rdma_ucm_accept cmd;
1109        struct rdma_conn_param conn_param;
1110        struct ucma_context *ctx;
1111        int ret;
1112
1113        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1114                return -EFAULT;
1115
1116        ctx = ucma_get_ctx_dev(file, cmd.id);
1117        if (IS_ERR(ctx))
1118                return PTR_ERR(ctx);
1119
1120        if (cmd.conn_param.valid) {
1121                ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
1122                mutex_lock(&file->mut);
1123                ret = __rdma_accept(ctx->cm_id, &conn_param, NULL);
1124                if (!ret)
1125                        ctx->uid = cmd.uid;
1126                mutex_unlock(&file->mut);
1127        } else
1128                ret = __rdma_accept(ctx->cm_id, NULL, NULL);
1129
1130        ucma_put_ctx(ctx);
1131        return ret;
1132}
1133
1134static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf,
1135                           int in_len, int out_len)
1136{
1137        struct rdma_ucm_reject cmd;
1138        struct ucma_context *ctx;
1139        int ret;
1140
1141        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1142                return -EFAULT;
1143
1144        ctx = ucma_get_ctx_dev(file, cmd.id);
1145        if (IS_ERR(ctx))
1146                return PTR_ERR(ctx);
1147
1148        ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len);
1149        ucma_put_ctx(ctx);
1150        return ret;
1151}
1152
1153static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf,
1154                               int in_len, int out_len)
1155{
1156        struct rdma_ucm_disconnect cmd;
1157        struct ucma_context *ctx;
1158        int ret;
1159
1160        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1161                return -EFAULT;
1162
1163        ctx = ucma_get_ctx_dev(file, cmd.id);
1164        if (IS_ERR(ctx))
1165                return PTR_ERR(ctx);
1166
1167        ret = rdma_disconnect(ctx->cm_id);
1168        ucma_put_ctx(ctx);
1169        return ret;
1170}
1171
1172static ssize_t ucma_init_qp_attr(struct ucma_file *file,
1173                                 const char __user *inbuf,
1174                                 int in_len, int out_len)
1175{
1176        struct rdma_ucm_init_qp_attr cmd;
1177        struct ib_uverbs_qp_attr resp;
1178        struct ucma_context *ctx;
1179        struct ib_qp_attr qp_attr;
1180        int ret;
1181
1182        if (out_len < sizeof(resp))
1183                return -ENOSPC;
1184
1185        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1186                return -EFAULT;
1187
1188        if (cmd.qp_state > IB_QPS_ERR)
1189                return -EINVAL;
1190
1191        ctx = ucma_get_ctx_dev(file, cmd.id);
1192        if (IS_ERR(ctx))
1193                return PTR_ERR(ctx);
1194
1195        resp.qp_attr_mask = 0;
1196        memset(&qp_attr, 0, sizeof qp_attr);
1197        qp_attr.qp_state = cmd.qp_state;
1198        ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask);
1199        if (ret)
1200                goto out;
1201
1202        ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr);
1203        if (copy_to_user(u64_to_user_ptr(cmd.response),
1204                         &resp, sizeof(resp)))
1205                ret = -EFAULT;
1206
1207out:
1208        ucma_put_ctx(ctx);
1209        return ret;
1210}
1211
1212static int ucma_set_option_id(struct ucma_context *ctx, int optname,
1213                              void *optval, size_t optlen)
1214{
1215        int ret = 0;
1216
1217        switch (optname) {
1218        case RDMA_OPTION_ID_TOS:
1219                if (optlen != sizeof(u8)) {
1220                        ret = -EINVAL;
1221                        break;
1222                }
1223                rdma_set_service_type(ctx->cm_id, *((u8 *) optval));
1224                break;
1225        case RDMA_OPTION_ID_REUSEADDR:
1226                if (optlen != sizeof(int)) {
1227                        ret = -EINVAL;
1228                        break;
1229                }
1230                ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0);
1231                break;
1232        case RDMA_OPTION_ID_AFONLY:
1233                if (optlen != sizeof(int)) {
1234                        ret = -EINVAL;
1235                        break;
1236                }
1237                ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0);
1238                break;
1239        case RDMA_OPTION_ID_ACK_TIMEOUT:
1240                if (optlen != sizeof(u8)) {
1241                        ret = -EINVAL;
1242                        break;
1243                }
1244                ret = rdma_set_ack_timeout(ctx->cm_id, *((u8 *)optval));
1245                break;
1246        default:
1247                ret = -ENOSYS;
1248        }
1249
1250        return ret;
1251}
1252
1253static int ucma_set_ib_path(struct ucma_context *ctx,
1254                            struct ib_path_rec_data *path_data, size_t optlen)
1255{
1256        struct sa_path_rec sa_path;
1257        struct rdma_cm_event event;
1258        int ret;
1259
1260        if (optlen % sizeof(*path_data))
1261                return -EINVAL;
1262
1263        for (; optlen; optlen -= sizeof(*path_data), path_data++) {
1264                if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY |
1265                                         IB_PATH_BIDIRECTIONAL))
1266                        break;
1267        }
1268
1269        if (!optlen)
1270                return -EINVAL;
1271
1272        if (!ctx->cm_id->device)
1273                return -EINVAL;
1274
1275        memset(&sa_path, 0, sizeof(sa_path));
1276
1277        sa_path.rec_type = SA_PATH_REC_TYPE_IB;
1278        ib_sa_unpack_path(path_data->path_rec, &sa_path);
1279
1280        if (rdma_cap_opa_ah(ctx->cm_id->device, ctx->cm_id->port_num)) {
1281                struct sa_path_rec opa;
1282
1283                sa_convert_path_ib_to_opa(&opa, &sa_path);
1284                ret = rdma_set_ib_path(ctx->cm_id, &opa);
1285        } else {
1286                ret = rdma_set_ib_path(ctx->cm_id, &sa_path);
1287        }
1288        if (ret)
1289                return ret;
1290
1291        memset(&event, 0, sizeof event);
1292        event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1293        return ucma_event_handler(ctx->cm_id, &event);
1294}
1295
1296static int ucma_set_option_ib(struct ucma_context *ctx, int optname,
1297                              void *optval, size_t optlen)
1298{
1299        int ret;
1300
1301        switch (optname) {
1302        case RDMA_OPTION_IB_PATH:
1303                ret = ucma_set_ib_path(ctx, optval, optlen);
1304                break;
1305        default:
1306                ret = -ENOSYS;
1307        }
1308
1309        return ret;
1310}
1311
1312static int ucma_set_option_level(struct ucma_context *ctx, int level,
1313                                 int optname, void *optval, size_t optlen)
1314{
1315        int ret;
1316
1317        switch (level) {
1318        case RDMA_OPTION_ID:
1319                ret = ucma_set_option_id(ctx, optname, optval, optlen);
1320                break;
1321        case RDMA_OPTION_IB:
1322                ret = ucma_set_option_ib(ctx, optname, optval, optlen);
1323                break;
1324        default:
1325                ret = -ENOSYS;
1326        }
1327
1328        return ret;
1329}
1330
1331static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
1332                               int in_len, int out_len)
1333{
1334        struct rdma_ucm_set_option cmd;
1335        struct ucma_context *ctx;
1336        void *optval;
1337        int ret;
1338
1339        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1340                return -EFAULT;
1341
1342        if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE))
1343                return -EINVAL;
1344
1345        ctx = ucma_get_ctx(file, cmd.id);
1346        if (IS_ERR(ctx))
1347                return PTR_ERR(ctx);
1348
1349        optval = memdup_user(u64_to_user_ptr(cmd.optval),
1350                             cmd.optlen);
1351        if (IS_ERR(optval)) {
1352                ret = PTR_ERR(optval);
1353                goto out;
1354        }
1355
1356        ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval,
1357                                    cmd.optlen);
1358        kfree(optval);
1359
1360out:
1361        ucma_put_ctx(ctx);
1362        return ret;
1363}
1364
1365static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
1366                           int in_len, int out_len)
1367{
1368        struct rdma_ucm_notify cmd;
1369        struct ucma_context *ctx;
1370        int ret = -EINVAL;
1371
1372        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1373                return -EFAULT;
1374
1375        ctx = ucma_get_ctx(file, cmd.id);
1376        if (IS_ERR(ctx))
1377                return PTR_ERR(ctx);
1378
1379        if (ctx->cm_id->device)
1380                ret = rdma_notify(ctx->cm_id, (enum ib_event_type)cmd.event);
1381
1382        ucma_put_ctx(ctx);
1383        return ret;
1384}
1385
1386static ssize_t ucma_process_join(struct ucma_file *file,
1387                                 struct rdma_ucm_join_mcast *cmd,  int out_len)
1388{
1389        struct rdma_ucm_create_id_resp resp;
1390        struct ucma_context *ctx;
1391        struct ucma_multicast *mc;
1392        struct sockaddr *addr;
1393        int ret;
1394        u8 join_state;
1395
1396        if (out_len < sizeof(resp))
1397                return -ENOSPC;
1398
1399        addr = (struct sockaddr *) &cmd->addr;
1400        if (cmd->addr_size != rdma_addr_size(addr))
1401                return -EINVAL;
1402
1403        if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER)
1404                join_state = BIT(FULLMEMBER_JOIN);
1405        else if (cmd->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER)
1406                join_state = BIT(SENDONLY_FULLMEMBER_JOIN);
1407        else
1408                return -EINVAL;
1409
1410        ctx = ucma_get_ctx_dev(file, cmd->id);
1411        if (IS_ERR(ctx))
1412                return PTR_ERR(ctx);
1413
1414        mutex_lock(&file->mut);
1415        mc = ucma_alloc_multicast(ctx);
1416        if (!mc) {
1417                ret = -ENOMEM;
1418                goto err1;
1419        }
1420        mc->join_state = join_state;
1421        mc->uid = cmd->uid;
1422        memcpy(&mc->addr, addr, cmd->addr_size);
1423        ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr,
1424                                  join_state, mc);
1425        if (ret)
1426                goto err2;
1427
1428        resp.id = mc->id;
1429        if (copy_to_user(u64_to_user_ptr(cmd->response),
1430                         &resp, sizeof(resp))) {
1431                ret = -EFAULT;
1432                goto err3;
1433        }
1434
1435        mutex_lock(&mut);
1436        idr_replace(&multicast_idr, mc, mc->id);
1437        mutex_unlock(&mut);
1438
1439        mutex_unlock(&file->mut);
1440        ucma_put_ctx(ctx);
1441        return 0;
1442
1443err3:
1444        rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr);
1445        ucma_cleanup_mc_events(mc);
1446err2:
1447        mutex_lock(&mut);
1448        idr_remove(&multicast_idr, mc->id);
1449        mutex_unlock(&mut);
1450        list_del(&mc->list);
1451        kfree(mc);
1452err1:
1453        mutex_unlock(&file->mut);
1454        ucma_put_ctx(ctx);
1455        return ret;
1456}
1457
1458static ssize_t ucma_join_ip_multicast(struct ucma_file *file,
1459                                      const char __user *inbuf,
1460                                      int in_len, int out_len)
1461{
1462        struct rdma_ucm_join_ip_mcast cmd;
1463        struct rdma_ucm_join_mcast join_cmd;
1464
1465        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1466                return -EFAULT;
1467
1468        join_cmd.response = cmd.response;
1469        join_cmd.uid = cmd.uid;
1470        join_cmd.id = cmd.id;
1471        join_cmd.addr_size = rdma_addr_size_in6(&cmd.addr);
1472        if (!join_cmd.addr_size)
1473                return -EINVAL;
1474
1475        join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER;
1476        memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size);
1477
1478        return ucma_process_join(file, &join_cmd, out_len);
1479}
1480
1481static ssize_t ucma_join_multicast(struct ucma_file *file,
1482                                   const char __user *inbuf,
1483                                   int in_len, int out_len)
1484{
1485        struct rdma_ucm_join_mcast cmd;
1486
1487        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1488                return -EFAULT;
1489
1490        if (!rdma_addr_size_kss(&cmd.addr))
1491                return -EINVAL;
1492
1493        return ucma_process_join(file, &cmd, out_len);
1494}
1495
1496static ssize_t ucma_leave_multicast(struct ucma_file *file,
1497                                    const char __user *inbuf,
1498                                    int in_len, int out_len)
1499{
1500        struct rdma_ucm_destroy_id cmd;
1501        struct rdma_ucm_destroy_id_resp resp;
1502        struct ucma_multicast *mc;
1503        int ret = 0;
1504
1505        if (out_len < sizeof(resp))
1506                return -ENOSPC;
1507
1508        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1509                return -EFAULT;
1510
1511        mutex_lock(&mut);
1512        mc = idr_find(&multicast_idr, cmd.id);
1513        if (!mc)
1514                mc = ERR_PTR(-ENOENT);
1515        else if (mc->ctx->file != file)
1516                mc = ERR_PTR(-EINVAL);
1517        else if (!atomic_inc_not_zero(&mc->ctx->ref))
1518                mc = ERR_PTR(-ENXIO);
1519        else
1520                idr_remove(&multicast_idr, mc->id);
1521        mutex_unlock(&mut);
1522
1523        if (IS_ERR(mc)) {
1524                ret = PTR_ERR(mc);
1525                goto out;
1526        }
1527
1528        rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr);
1529        mutex_lock(&mc->ctx->file->mut);
1530        ucma_cleanup_mc_events(mc);
1531        list_del(&mc->list);
1532        mutex_unlock(&mc->ctx->file->mut);
1533
1534        ucma_put_ctx(mc->ctx);
1535        resp.events_reported = mc->events_reported;
1536        kfree(mc);
1537
1538        if (copy_to_user(u64_to_user_ptr(cmd.response),
1539                         &resp, sizeof(resp)))
1540                ret = -EFAULT;
1541out:
1542        return ret;
1543}
1544
1545static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2)
1546{
1547        /* Acquire mutex's based on pointer comparison to prevent deadlock. */
1548        if (file1 < file2) {
1549                mutex_lock(&file1->mut);
1550                mutex_lock_nested(&file2->mut, SINGLE_DEPTH_NESTING);
1551        } else {
1552                mutex_lock(&file2->mut);
1553                mutex_lock_nested(&file1->mut, SINGLE_DEPTH_NESTING);
1554        }
1555}
1556
1557static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2)
1558{
1559        if (file1 < file2) {
1560                mutex_unlock(&file2->mut);
1561                mutex_unlock(&file1->mut);
1562        } else {
1563                mutex_unlock(&file1->mut);
1564                mutex_unlock(&file2->mut);
1565        }
1566}
1567
1568static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file)
1569{
1570        struct ucma_event *uevent, *tmp;
1571
1572        list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list)
1573                if (uevent->ctx == ctx)
1574                        list_move_tail(&uevent->list, &file->event_list);
1575}
1576
1577static ssize_t ucma_migrate_id(struct ucma_file *new_file,
1578                               const char __user *inbuf,
1579                               int in_len, int out_len)
1580{
1581        struct rdma_ucm_migrate_id cmd;
1582        struct rdma_ucm_migrate_resp resp;
1583        struct ucma_context *ctx;
1584        struct fd f;
1585        struct ucma_file *cur_file;
1586        int ret = 0;
1587
1588        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1589                return -EFAULT;
1590
1591        /* Get current fd to protect against it being closed */
1592        f = fdget(cmd.fd);
1593        if (!f.file)
1594                return -ENOENT;
1595        if (f.file->f_op != &ucma_fops) {
1596                ret = -EINVAL;
1597                goto file_put;
1598        }
1599
1600        /* Validate current fd and prevent destruction of id. */
1601        ctx = ucma_get_ctx(f.file->private_data, cmd.id);
1602        if (IS_ERR(ctx)) {
1603                ret = PTR_ERR(ctx);
1604                goto file_put;
1605        }
1606
1607        cur_file = ctx->file;
1608        if (cur_file == new_file) {
1609                resp.events_reported = ctx->events_reported;
1610                goto response;
1611        }
1612
1613        /*
1614         * Migrate events between fd's, maintaining order, and avoiding new
1615         * events being added before existing events.
1616         */
1617        ucma_lock_files(cur_file, new_file);
1618        mutex_lock(&mut);
1619
1620        list_move_tail(&ctx->list, &new_file->ctx_list);
1621        ucma_move_events(ctx, new_file);
1622        ctx->file = new_file;
1623        resp.events_reported = ctx->events_reported;
1624
1625        mutex_unlock(&mut);
1626        ucma_unlock_files(cur_file, new_file);
1627
1628response:
1629        if (copy_to_user(u64_to_user_ptr(cmd.response),
1630                         &resp, sizeof(resp)))
1631                ret = -EFAULT;
1632
1633        ucma_put_ctx(ctx);
1634file_put:
1635        fdput(f);
1636        return ret;
1637}
1638
1639static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
1640                                   const char __user *inbuf,
1641                                   int in_len, int out_len) = {
1642        [RDMA_USER_CM_CMD_CREATE_ID]     = ucma_create_id,
1643        [RDMA_USER_CM_CMD_DESTROY_ID]    = ucma_destroy_id,
1644        [RDMA_USER_CM_CMD_BIND_IP]       = ucma_bind_ip,
1645        [RDMA_USER_CM_CMD_RESOLVE_IP]    = ucma_resolve_ip,
1646        [RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route,
1647        [RDMA_USER_CM_CMD_QUERY_ROUTE]   = ucma_query_route,
1648        [RDMA_USER_CM_CMD_CONNECT]       = ucma_connect,
1649        [RDMA_USER_CM_CMD_LISTEN]        = ucma_listen,
1650        [RDMA_USER_CM_CMD_ACCEPT]        = ucma_accept,
1651        [RDMA_USER_CM_CMD_REJECT]        = ucma_reject,
1652        [RDMA_USER_CM_CMD_DISCONNECT]    = ucma_disconnect,
1653        [RDMA_USER_CM_CMD_INIT_QP_ATTR]  = ucma_init_qp_attr,
1654        [RDMA_USER_CM_CMD_GET_EVENT]     = ucma_get_event,
1655        [RDMA_USER_CM_CMD_GET_OPTION]    = NULL,
1656        [RDMA_USER_CM_CMD_SET_OPTION]    = ucma_set_option,
1657        [RDMA_USER_CM_CMD_NOTIFY]        = ucma_notify,
1658        [RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast,
1659        [RDMA_USER_CM_CMD_LEAVE_MCAST]   = ucma_leave_multicast,
1660        [RDMA_USER_CM_CMD_MIGRATE_ID]    = ucma_migrate_id,
1661        [RDMA_USER_CM_CMD_QUERY]         = ucma_query,
1662        [RDMA_USER_CM_CMD_BIND]          = ucma_bind,
1663        [RDMA_USER_CM_CMD_RESOLVE_ADDR]  = ucma_resolve_addr,
1664        [RDMA_USER_CM_CMD_JOIN_MCAST]    = ucma_join_multicast
1665};
1666
1667static ssize_t ucma_write(struct file *filp, const char __user *buf,
1668                          size_t len, loff_t *pos)
1669{
1670        struct ucma_file *file = filp->private_data;
1671        struct rdma_ucm_cmd_hdr hdr;
1672        ssize_t ret;
1673
1674        if (!ib_safe_file_access(filp)) {
1675                pr_err_once("ucma_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
1676                            task_tgid_vnr(current), current->comm);
1677                return -EACCES;
1678        }
1679
1680        if (len < sizeof(hdr))
1681                return -EINVAL;
1682
1683        if (copy_from_user(&hdr, buf, sizeof(hdr)))
1684                return -EFAULT;
1685
1686        if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table))
1687                return -EINVAL;
1688        hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucma_cmd_table));
1689
1690        if (hdr.in + sizeof(hdr) > len)
1691                return -EINVAL;
1692
1693        if (!ucma_cmd_table[hdr.cmd])
1694                return -ENOSYS;
1695
1696        ret = ucma_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out);
1697        if (!ret)
1698                ret = len;
1699
1700        return ret;
1701}
1702
1703static __poll_t ucma_poll(struct file *filp, struct poll_table_struct *wait)
1704{
1705        struct ucma_file *file = filp->private_data;
1706        __poll_t mask = 0;
1707
1708        poll_wait(filp, &file->poll_wait, wait);
1709
1710        if (!list_empty(&file->event_list))
1711                mask = EPOLLIN | EPOLLRDNORM;
1712
1713        return mask;
1714}
1715
1716/*
1717 * ucma_open() does not need the BKL:
1718 *
1719 *  - no global state is referred to;
1720 *  - there is no ioctl method to race against;
1721 *  - no further module initialization is required for open to work
1722 *    after the device is registered.
1723 */
1724static int ucma_open(struct inode *inode, struct file *filp)
1725{
1726        struct ucma_file *file;
1727
1728        file = kmalloc(sizeof *file, GFP_KERNEL);
1729        if (!file)
1730                return -ENOMEM;
1731
1732        file->close_wq = alloc_ordered_workqueue("ucma_close_id",
1733                                                 WQ_MEM_RECLAIM);
1734        if (!file->close_wq) {
1735                kfree(file);
1736                return -ENOMEM;
1737        }
1738
1739        INIT_LIST_HEAD(&file->event_list);
1740        INIT_LIST_HEAD(&file->ctx_list);
1741        init_waitqueue_head(&file->poll_wait);
1742        mutex_init(&file->mut);
1743
1744        filp->private_data = file;
1745        file->filp = filp;
1746
1747        return stream_open(inode, filp);
1748}
1749
1750static int ucma_close(struct inode *inode, struct file *filp)
1751{
1752        struct ucma_file *file = filp->private_data;
1753        struct ucma_context *ctx, *tmp;
1754
1755        mutex_lock(&file->mut);
1756        list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) {
1757                ctx->destroying = 1;
1758                mutex_unlock(&file->mut);
1759
1760                mutex_lock(&mut);
1761                idr_remove(&ctx_idr, ctx->id);
1762                mutex_unlock(&mut);
1763
1764                flush_workqueue(file->close_wq);
1765                /* At that step once ctx was marked as destroying and workqueue
1766                 * was flushed we are safe from any inflights handlers that
1767                 * might put other closing task.
1768                 */
1769                mutex_lock(&mut);
1770                if (!ctx->closing) {
1771                        mutex_unlock(&mut);
1772                        ucma_put_ctx(ctx);
1773                        wait_for_completion(&ctx->comp);
1774                        /* rdma_destroy_id ensures that no event handlers are
1775                         * inflight for that id before releasing it.
1776                         */
1777                        rdma_destroy_id(ctx->cm_id);
1778                } else {
1779                        mutex_unlock(&mut);
1780                }
1781
1782                ucma_free_ctx(ctx);
1783                mutex_lock(&file->mut);
1784        }
1785        mutex_unlock(&file->mut);
1786        destroy_workqueue(file->close_wq);
1787        kfree(file);
1788        return 0;
1789}
1790
1791static const struct file_operations ucma_fops = {
1792        .owner   = THIS_MODULE,
1793        .open    = ucma_open,
1794        .release = ucma_close,
1795        .write   = ucma_write,
1796        .poll    = ucma_poll,
1797        .llseek  = no_llseek,
1798};
1799
1800static struct miscdevice ucma_misc = {
1801        .minor          = MISC_DYNAMIC_MINOR,
1802        .name           = "rdma_cm",
1803        .nodename       = "infiniband/rdma_cm",
1804        .mode           = 0666,
1805        .fops           = &ucma_fops,
1806};
1807
1808static ssize_t show_abi_version(struct device *dev,
1809                                struct device_attribute *attr,
1810                                char *buf)
1811{
1812        return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION);
1813}
1814static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
1815
1816static int __init ucma_init(void)
1817{
1818        int ret;
1819
1820        ret = misc_register(&ucma_misc);
1821        if (ret)
1822                return ret;
1823
1824        ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version);
1825        if (ret) {
1826                pr_err("rdma_ucm: couldn't create abi_version attr\n");
1827                goto err1;
1828        }
1829
1830        ucma_ctl_table_hdr = register_net_sysctl(&init_net, "net/rdma_ucm", ucma_ctl_table);
1831        if (!ucma_ctl_table_hdr) {
1832                pr_err("rdma_ucm: couldn't register sysctl paths\n");
1833                ret = -ENOMEM;
1834                goto err2;
1835        }
1836        return 0;
1837err2:
1838        device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);
1839err1:
1840        misc_deregister(&ucma_misc);
1841        return ret;
1842}
1843
1844static void __exit ucma_cleanup(void)
1845{
1846        unregister_net_sysctl_table(ucma_ctl_table_hdr);
1847        device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);
1848        misc_deregister(&ucma_misc);
1849        idr_destroy(&ctx_idr);
1850        idr_destroy(&multicast_idr);
1851}
1852
1853module_init(ucma_init);
1854module_exit(ucma_cleanup);
1855