linux/drivers/infiniband/core/ucma.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *      copyright notice, this list of conditions and the following
  16 *      disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *      copyright notice, this list of conditions and the following
  20 *      disclaimer in the documentation and/or other materials
  21 *      provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include <linux/completion.h>
  34#include <linux/file.h>
  35#include <linux/mutex.h>
  36#include <linux/poll.h>
  37#include <linux/sched.h>
  38#include <linux/idr.h>
  39#include <linux/in.h>
  40#include <linux/in6.h>
  41#include <linux/miscdevice.h>
  42#include <linux/slab.h>
  43#include <linux/sysctl.h>
  44#include <linux/module.h>
  45#include <linux/nsproxy.h>
  46
  47#include <rdma/rdma_user_cm.h>
  48#include <rdma/ib_marshall.h>
  49#include <rdma/rdma_cm.h>
  50#include <rdma/rdma_cm_ib.h>
  51#include <rdma/ib_addr.h>
  52#include <rdma/ib.h>
  53
  54MODULE_AUTHOR("Sean Hefty");
  55MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
  56MODULE_LICENSE("Dual BSD/GPL");
  57
  58static unsigned int max_backlog = 1024;
  59
  60static struct ctl_table_header *ucma_ctl_table_hdr;
  61static struct ctl_table ucma_ctl_table[] = {
  62        {
  63                .procname       = "max_backlog",
  64                .data           = &max_backlog,
  65                .maxlen         = sizeof max_backlog,
  66                .mode           = 0644,
  67                .proc_handler   = proc_dointvec,
  68        },
  69        { }
  70};
  71
  72struct ucma_file {
  73        struct mutex            mut;
  74        struct file             *filp;
  75        struct list_head        ctx_list;
  76        struct list_head        event_list;
  77        wait_queue_head_t       poll_wait;
  78        struct workqueue_struct *close_wq;
  79};
  80
  81struct ucma_context {
  82        int                     id;
  83        struct completion       comp;
  84        atomic_t                ref;
  85        int                     events_reported;
  86        int                     backlog;
  87
  88        struct ucma_file        *file;
  89        struct rdma_cm_id       *cm_id;
  90        u64                     uid;
  91
  92        struct list_head        list;
  93        struct list_head        mc_list;
  94        /* mark that device is in process of destroying the internal HW
  95         * resources, protected by the global mut
  96         */
  97        int                     closing;
  98        /* sync between removal event and id destroy, protected by file mut */
  99        int                     destroying;
 100        struct work_struct      close_work;
 101};
 102
 103struct ucma_multicast {
 104        struct ucma_context     *ctx;
 105        int                     id;
 106        int                     events_reported;
 107
 108        u64                     uid;
 109        struct list_head        list;
 110        struct sockaddr_storage addr;
 111};
 112
 113struct ucma_event {
 114        struct ucma_context     *ctx;
 115        struct ucma_multicast   *mc;
 116        struct list_head        list;
 117        struct rdma_cm_id       *cm_id;
 118        struct rdma_ucm_event_resp resp;
 119        struct work_struct      close_work;
 120};
 121
 122static DEFINE_MUTEX(mut);
 123static DEFINE_IDR(ctx_idr);
 124static DEFINE_IDR(multicast_idr);
 125
 126static inline struct ucma_context *_ucma_find_context(int id,
 127                                                      struct ucma_file *file)
 128{
 129        struct ucma_context *ctx;
 130
 131        ctx = idr_find(&ctx_idr, id);
 132        if (!ctx)
 133                ctx = ERR_PTR(-ENOENT);
 134        else if (ctx->file != file)
 135                ctx = ERR_PTR(-EINVAL);
 136        return ctx;
 137}
 138
 139static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id)
 140{
 141        struct ucma_context *ctx;
 142
 143        mutex_lock(&mut);
 144        ctx = _ucma_find_context(id, file);
 145        if (!IS_ERR(ctx)) {
 146                if (ctx->closing)
 147                        ctx = ERR_PTR(-EIO);
 148                else
 149                        atomic_inc(&ctx->ref);
 150        }
 151        mutex_unlock(&mut);
 152        return ctx;
 153}
 154
 155static void ucma_put_ctx(struct ucma_context *ctx)
 156{
 157        if (atomic_dec_and_test(&ctx->ref))
 158                complete(&ctx->comp);
 159}
 160
 161static void ucma_close_event_id(struct work_struct *work)
 162{
 163        struct ucma_event *uevent_close =  container_of(work, struct ucma_event, close_work);
 164
 165        rdma_destroy_id(uevent_close->cm_id);
 166        kfree(uevent_close);
 167}
 168
 169static void ucma_close_id(struct work_struct *work)
 170{
 171        struct ucma_context *ctx =  container_of(work, struct ucma_context, close_work);
 172
 173        /* once all inflight tasks are finished, we close all underlying
 174         * resources. The context is still alive till its explicit destryoing
 175         * by its creator.
 176         */
 177        ucma_put_ctx(ctx);
 178        wait_for_completion(&ctx->comp);
 179        /* No new events will be generated after destroying the id. */
 180        rdma_destroy_id(ctx->cm_id);
 181}
 182
 183static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
 184{
 185        struct ucma_context *ctx;
 186
 187        ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 188        if (!ctx)
 189                return NULL;
 190
 191        INIT_WORK(&ctx->close_work, ucma_close_id);
 192        atomic_set(&ctx->ref, 1);
 193        init_completion(&ctx->comp);
 194        INIT_LIST_HEAD(&ctx->mc_list);
 195        ctx->file = file;
 196
 197        mutex_lock(&mut);
 198        ctx->id = idr_alloc(&ctx_idr, ctx, 0, 0, GFP_KERNEL);
 199        mutex_unlock(&mut);
 200        if (ctx->id < 0)
 201                goto error;
 202
 203        list_add_tail(&ctx->list, &file->ctx_list);
 204        return ctx;
 205
 206error:
 207        kfree(ctx);
 208        return NULL;
 209}
 210
 211static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
 212{
 213        struct ucma_multicast *mc;
 214
 215        mc = kzalloc(sizeof(*mc), GFP_KERNEL);
 216        if (!mc)
 217                return NULL;
 218
 219        mutex_lock(&mut);
 220        mc->id = idr_alloc(&multicast_idr, mc, 0, 0, GFP_KERNEL);
 221        mutex_unlock(&mut);
 222        if (mc->id < 0)
 223                goto error;
 224
 225        mc->ctx = ctx;
 226        list_add_tail(&mc->list, &ctx->mc_list);
 227        return mc;
 228
 229error:
 230        kfree(mc);
 231        return NULL;
 232}
 233
 234static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
 235                                 struct rdma_conn_param *src)
 236{
 237        if (src->private_data_len)
 238                memcpy(dst->private_data, src->private_data,
 239                       src->private_data_len);
 240        dst->private_data_len = src->private_data_len;
 241        dst->responder_resources =src->responder_resources;
 242        dst->initiator_depth = src->initiator_depth;
 243        dst->flow_control = src->flow_control;
 244        dst->retry_count = src->retry_count;
 245        dst->rnr_retry_count = src->rnr_retry_count;
 246        dst->srq = src->srq;
 247        dst->qp_num = src->qp_num;
 248}
 249
 250static void ucma_copy_ud_event(struct rdma_ucm_ud_param *dst,
 251                               struct rdma_ud_param *src)
 252{
 253        if (src->private_data_len)
 254                memcpy(dst->private_data, src->private_data,
 255                       src->private_data_len);
 256        dst->private_data_len = src->private_data_len;
 257        ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr);
 258        dst->qp_num = src->qp_num;
 259        dst->qkey = src->qkey;
 260}
 261
 262static void ucma_set_event_context(struct ucma_context *ctx,
 263                                   struct rdma_cm_event *event,
 264                                   struct ucma_event *uevent)
 265{
 266        uevent->ctx = ctx;
 267        switch (event->event) {
 268        case RDMA_CM_EVENT_MULTICAST_JOIN:
 269        case RDMA_CM_EVENT_MULTICAST_ERROR:
 270                uevent->mc = (struct ucma_multicast *)
 271                             event->param.ud.private_data;
 272                uevent->resp.uid = uevent->mc->uid;
 273                uevent->resp.id = uevent->mc->id;
 274                break;
 275        default:
 276                uevent->resp.uid = ctx->uid;
 277                uevent->resp.id = ctx->id;
 278                break;
 279        }
 280}
 281
 282/* Called with file->mut locked for the relevant context. */
 283static void ucma_removal_event_handler(struct rdma_cm_id *cm_id)
 284{
 285        struct ucma_context *ctx = cm_id->context;
 286        struct ucma_event *con_req_eve;
 287        int event_found = 0;
 288
 289        if (ctx->destroying)
 290                return;
 291
 292        /* only if context is pointing to cm_id that it owns it and can be
 293         * queued to be closed, otherwise that cm_id is an inflight one that
 294         * is part of that context event list pending to be detached and
 295         * reattached to its new context as part of ucma_get_event,
 296         * handled separately below.
 297         */
 298        if (ctx->cm_id == cm_id) {
 299                mutex_lock(&mut);
 300                ctx->closing = 1;
 301                mutex_unlock(&mut);
 302                queue_work(ctx->file->close_wq, &ctx->close_work);
 303                return;
 304        }
 305
 306        list_for_each_entry(con_req_eve, &ctx->file->event_list, list) {
 307                if (con_req_eve->cm_id == cm_id &&
 308                    con_req_eve->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
 309                        list_del(&con_req_eve->list);
 310                        INIT_WORK(&con_req_eve->close_work, ucma_close_event_id);
 311                        queue_work(ctx->file->close_wq, &con_req_eve->close_work);
 312                        event_found = 1;
 313                        break;
 314                }
 315        }
 316        if (!event_found)
 317                printk(KERN_ERR "ucma_removal_event_handler: warning: connect request event wasn't found\n");
 318}
 319
 320static int ucma_event_handler(struct rdma_cm_id *cm_id,
 321                              struct rdma_cm_event *event)
 322{
 323        struct ucma_event *uevent;
 324        struct ucma_context *ctx = cm_id->context;
 325        int ret = 0;
 326
 327        uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
 328        if (!uevent)
 329                return event->event == RDMA_CM_EVENT_CONNECT_REQUEST;
 330
 331        mutex_lock(&ctx->file->mut);
 332        uevent->cm_id = cm_id;
 333        ucma_set_event_context(ctx, event, uevent);
 334        uevent->resp.event = event->event;
 335        uevent->resp.status = event->status;
 336        if (cm_id->qp_type == IB_QPT_UD)
 337                ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud);
 338        else
 339                ucma_copy_conn_event(&uevent->resp.param.conn,
 340                                     &event->param.conn);
 341
 342        if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {
 343                if (!ctx->backlog) {
 344                        ret = -ENOMEM;
 345                        kfree(uevent);
 346                        goto out;
 347                }
 348                ctx->backlog--;
 349        } else if (!ctx->uid || ctx->cm_id != cm_id) {
 350                /*
 351                 * We ignore events for new connections until userspace has set
 352                 * their context.  This can only happen if an error occurs on a
 353                 * new connection before the user accepts it.  This is okay,
 354                 * since the accept will just fail later. However, we do need
 355                 * to release the underlying HW resources in case of a device
 356                 * removal event.
 357                 */
 358                if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
 359                        ucma_removal_event_handler(cm_id);
 360
 361                kfree(uevent);
 362                goto out;
 363        }
 364
 365        list_add_tail(&uevent->list, &ctx->file->event_list);
 366        wake_up_interruptible(&ctx->file->poll_wait);
 367        if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
 368                ucma_removal_event_handler(cm_id);
 369out:
 370        mutex_unlock(&ctx->file->mut);
 371        return ret;
 372}
 373
 374static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
 375                              int in_len, int out_len)
 376{
 377        struct ucma_context *ctx;
 378        struct rdma_ucm_get_event cmd;
 379        struct ucma_event *uevent;
 380        int ret = 0;
 381
 382        if (out_len < sizeof uevent->resp)
 383                return -ENOSPC;
 384
 385        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 386                return -EFAULT;
 387
 388        mutex_lock(&file->mut);
 389        while (list_empty(&file->event_list)) {
 390                mutex_unlock(&file->mut);
 391
 392                if (file->filp->f_flags & O_NONBLOCK)
 393                        return -EAGAIN;
 394
 395                if (wait_event_interruptible(file->poll_wait,
 396                                             !list_empty(&file->event_list)))
 397                        return -ERESTARTSYS;
 398
 399                mutex_lock(&file->mut);
 400        }
 401
 402        uevent = list_entry(file->event_list.next, struct ucma_event, list);
 403
 404        if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
 405                ctx = ucma_alloc_ctx(file);
 406                if (!ctx) {
 407                        ret = -ENOMEM;
 408                        goto done;
 409                }
 410                uevent->ctx->backlog++;
 411                ctx->cm_id = uevent->cm_id;
 412                ctx->cm_id->context = ctx;
 413                uevent->resp.id = ctx->id;
 414        }
 415
 416        if (copy_to_user((void __user *)(unsigned long)cmd.response,
 417                         &uevent->resp, sizeof uevent->resp)) {
 418                ret = -EFAULT;
 419                goto done;
 420        }
 421
 422        list_del(&uevent->list);
 423        uevent->ctx->events_reported++;
 424        if (uevent->mc)
 425                uevent->mc->events_reported++;
 426        kfree(uevent);
 427done:
 428        mutex_unlock(&file->mut);
 429        return ret;
 430}
 431
 432static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type)
 433{
 434        switch (cmd->ps) {
 435        case RDMA_PS_TCP:
 436                *qp_type = IB_QPT_RC;
 437                return 0;
 438        case RDMA_PS_UDP:
 439        case RDMA_PS_IPOIB:
 440                *qp_type = IB_QPT_UD;
 441                return 0;
 442        case RDMA_PS_IB:
 443                *qp_type = cmd->qp_type;
 444                return 0;
 445        default:
 446                return -EINVAL;
 447        }
 448}
 449
 450static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
 451                              int in_len, int out_len)
 452{
 453        struct rdma_ucm_create_id cmd;
 454        struct rdma_ucm_create_id_resp resp;
 455        struct ucma_context *ctx;
 456        enum ib_qp_type qp_type;
 457        int ret;
 458
 459        if (out_len < sizeof(resp))
 460                return -ENOSPC;
 461
 462        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 463                return -EFAULT;
 464
 465        ret = ucma_get_qp_type(&cmd, &qp_type);
 466        if (ret)
 467                return ret;
 468
 469        mutex_lock(&file->mut);
 470        ctx = ucma_alloc_ctx(file);
 471        mutex_unlock(&file->mut);
 472        if (!ctx)
 473                return -ENOMEM;
 474
 475        ctx->uid = cmd.uid;
 476        ctx->cm_id = rdma_create_id(current->nsproxy->net_ns,
 477                                    ucma_event_handler, ctx, cmd.ps, qp_type);
 478        if (IS_ERR(ctx->cm_id)) {
 479                ret = PTR_ERR(ctx->cm_id);
 480                goto err1;
 481        }
 482
 483        resp.id = ctx->id;
 484        if (copy_to_user((void __user *)(unsigned long)cmd.response,
 485                         &resp, sizeof(resp))) {
 486                ret = -EFAULT;
 487                goto err2;
 488        }
 489        return 0;
 490
 491err2:
 492        rdma_destroy_id(ctx->cm_id);
 493err1:
 494        mutex_lock(&mut);
 495        idr_remove(&ctx_idr, ctx->id);
 496        mutex_unlock(&mut);
 497        kfree(ctx);
 498        return ret;
 499}
 500
 501static void ucma_cleanup_multicast(struct ucma_context *ctx)
 502{
 503        struct ucma_multicast *mc, *tmp;
 504
 505        mutex_lock(&mut);
 506        list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
 507                list_del(&mc->list);
 508                idr_remove(&multicast_idr, mc->id);
 509                kfree(mc);
 510        }
 511        mutex_unlock(&mut);
 512}
 513
 514static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
 515{
 516        struct ucma_event *uevent, *tmp;
 517
 518        list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) {
 519                if (uevent->mc != mc)
 520                        continue;
 521
 522                list_del(&uevent->list);
 523                kfree(uevent);
 524        }
 525}
 526
 527/*
 528 * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At
 529 * this point, no new events will be reported from the hardware. However, we
 530 * still need to cleanup the UCMA context for this ID. Specifically, there
 531 * might be events that have not yet been consumed by the user space software.
 532 * These might include pending connect requests which we have not completed
 533 * processing.  We cannot call rdma_destroy_id while holding the lock of the
 534 * context (file->mut), as it might cause a deadlock. We therefore extract all
 535 * relevant events from the context pending events list while holding the
 536 * mutex. After that we release them as needed.
 537 */
 538static int ucma_free_ctx(struct ucma_context *ctx)
 539{
 540        int events_reported;
 541        struct ucma_event *uevent, *tmp;
 542        LIST_HEAD(list);
 543
 544
 545        ucma_cleanup_multicast(ctx);
 546
 547        /* Cleanup events not yet reported to the user. */
 548        mutex_lock(&ctx->file->mut);
 549        list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
 550                if (uevent->ctx == ctx)
 551                        list_move_tail(&uevent->list, &list);
 552        }
 553        list_del(&ctx->list);
 554        mutex_unlock(&ctx->file->mut);
 555
 556        list_for_each_entry_safe(uevent, tmp, &list, list) {
 557                list_del(&uevent->list);
 558                if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
 559                        rdma_destroy_id(uevent->cm_id);
 560                kfree(uevent);
 561        }
 562
 563        events_reported = ctx->events_reported;
 564        kfree(ctx);
 565        return events_reported;
 566}
 567
 568static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
 569                               int in_len, int out_len)
 570{
 571        struct rdma_ucm_destroy_id cmd;
 572        struct rdma_ucm_destroy_id_resp resp;
 573        struct ucma_context *ctx;
 574        int ret = 0;
 575
 576        if (out_len < sizeof(resp))
 577                return -ENOSPC;
 578
 579        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 580                return -EFAULT;
 581
 582        mutex_lock(&mut);
 583        ctx = _ucma_find_context(cmd.id, file);
 584        if (!IS_ERR(ctx))
 585                idr_remove(&ctx_idr, ctx->id);
 586        mutex_unlock(&mut);
 587
 588        if (IS_ERR(ctx))
 589                return PTR_ERR(ctx);
 590
 591        mutex_lock(&ctx->file->mut);
 592        ctx->destroying = 1;
 593        mutex_unlock(&ctx->file->mut);
 594
 595        flush_workqueue(ctx->file->close_wq);
 596        /* At this point it's guaranteed that there is no inflight
 597         * closing task */
 598        mutex_lock(&mut);
 599        if (!ctx->closing) {
 600                mutex_unlock(&mut);
 601                ucma_put_ctx(ctx);
 602                wait_for_completion(&ctx->comp);
 603                rdma_destroy_id(ctx->cm_id);
 604        } else {
 605                mutex_unlock(&mut);
 606        }
 607
 608        resp.events_reported = ucma_free_ctx(ctx);
 609        if (copy_to_user((void __user *)(unsigned long)cmd.response,
 610                         &resp, sizeof(resp)))
 611                ret = -EFAULT;
 612
 613        return ret;
 614}
 615
 616static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf,
 617                              int in_len, int out_len)
 618{
 619        struct rdma_ucm_bind_ip cmd;
 620        struct ucma_context *ctx;
 621        int ret;
 622
 623        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 624                return -EFAULT;
 625
 626        ctx = ucma_get_ctx(file, cmd.id);
 627        if (IS_ERR(ctx))
 628                return PTR_ERR(ctx);
 629
 630        ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr);
 631        ucma_put_ctx(ctx);
 632        return ret;
 633}
 634
 635static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf,
 636                         int in_len, int out_len)
 637{
 638        struct rdma_ucm_bind cmd;
 639        struct sockaddr *addr;
 640        struct ucma_context *ctx;
 641        int ret;
 642
 643        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 644                return -EFAULT;
 645
 646        addr = (struct sockaddr *) &cmd.addr;
 647        if (cmd.reserved || !cmd.addr_size || (cmd.addr_size != rdma_addr_size(addr)))
 648                return -EINVAL;
 649
 650        ctx = ucma_get_ctx(file, cmd.id);
 651        if (IS_ERR(ctx))
 652                return PTR_ERR(ctx);
 653
 654        ret = rdma_bind_addr(ctx->cm_id, addr);
 655        ucma_put_ctx(ctx);
 656        return ret;
 657}
 658
 659static ssize_t ucma_resolve_ip(struct ucma_file *file,
 660                               const char __user *inbuf,
 661                               int in_len, int out_len)
 662{
 663        struct rdma_ucm_resolve_ip cmd;
 664        struct ucma_context *ctx;
 665        int ret;
 666
 667        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 668                return -EFAULT;
 669
 670        ctx = ucma_get_ctx(file, cmd.id);
 671        if (IS_ERR(ctx))
 672                return PTR_ERR(ctx);
 673
 674        ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
 675                                (struct sockaddr *) &cmd.dst_addr,
 676                                cmd.timeout_ms);
 677        ucma_put_ctx(ctx);
 678        return ret;
 679}
 680
 681static ssize_t ucma_resolve_addr(struct ucma_file *file,
 682                                 const char __user *inbuf,
 683                                 int in_len, int out_len)
 684{
 685        struct rdma_ucm_resolve_addr cmd;
 686        struct sockaddr *src, *dst;
 687        struct ucma_context *ctx;
 688        int ret;
 689
 690        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 691                return -EFAULT;
 692
 693        src = (struct sockaddr *) &cmd.src_addr;
 694        dst = (struct sockaddr *) &cmd.dst_addr;
 695        if (cmd.reserved || (cmd.src_size && (cmd.src_size != rdma_addr_size(src))) ||
 696            !cmd.dst_size || (cmd.dst_size != rdma_addr_size(dst)))
 697                return -EINVAL;
 698
 699        ctx = ucma_get_ctx(file, cmd.id);
 700        if (IS_ERR(ctx))
 701                return PTR_ERR(ctx);
 702
 703        ret = rdma_resolve_addr(ctx->cm_id, src, dst, cmd.timeout_ms);
 704        ucma_put_ctx(ctx);
 705        return ret;
 706}
 707
 708static ssize_t ucma_resolve_route(struct ucma_file *file,
 709                                  const char __user *inbuf,
 710                                  int in_len, int out_len)
 711{
 712        struct rdma_ucm_resolve_route cmd;
 713        struct ucma_context *ctx;
 714        int ret;
 715
 716        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 717                return -EFAULT;
 718
 719        ctx = ucma_get_ctx(file, cmd.id);
 720        if (IS_ERR(ctx))
 721                return PTR_ERR(ctx);
 722
 723        ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms);
 724        ucma_put_ctx(ctx);
 725        return ret;
 726}
 727
 728static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
 729                               struct rdma_route *route)
 730{
 731        struct rdma_dev_addr *dev_addr;
 732
 733        resp->num_paths = route->num_paths;
 734        switch (route->num_paths) {
 735        case 0:
 736                dev_addr = &route->addr.dev_addr;
 737                rdma_addr_get_dgid(dev_addr,
 738                                   (union ib_gid *) &resp->ib_route[0].dgid);
 739                rdma_addr_get_sgid(dev_addr,
 740                                   (union ib_gid *) &resp->ib_route[0].sgid);
 741                resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
 742                break;
 743        case 2:
 744                ib_copy_path_rec_to_user(&resp->ib_route[1],
 745                                         &route->path_rec[1]);
 746                /* fall through */
 747        case 1:
 748                ib_copy_path_rec_to_user(&resp->ib_route[0],
 749                                         &route->path_rec[0]);
 750                break;
 751        default:
 752                break;
 753        }
 754}
 755
 756static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
 757                                 struct rdma_route *route)
 758{
 759
 760        resp->num_paths = route->num_paths;
 761        switch (route->num_paths) {
 762        case 0:
 763                rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr,
 764                            (union ib_gid *)&resp->ib_route[0].dgid);
 765                rdma_ip2gid((struct sockaddr *)&route->addr.src_addr,
 766                            (union ib_gid *)&resp->ib_route[0].sgid);
 767                resp->ib_route[0].pkey = cpu_to_be16(0xffff);
 768                break;
 769        case 2:
 770                ib_copy_path_rec_to_user(&resp->ib_route[1],
 771                                         &route->path_rec[1]);
 772                /* fall through */
 773        case 1:
 774                ib_copy_path_rec_to_user(&resp->ib_route[0],
 775                                         &route->path_rec[0]);
 776                break;
 777        default:
 778                break;
 779        }
 780}
 781
 782static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp *resp,
 783                               struct rdma_route *route)
 784{
 785        struct rdma_dev_addr *dev_addr;
 786
 787        dev_addr = &route->addr.dev_addr;
 788        rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid);
 789        rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid);
 790}
 791
 792static ssize_t ucma_query_route(struct ucma_file *file,
 793                                const char __user *inbuf,
 794                                int in_len, int out_len)
 795{
 796        struct rdma_ucm_query cmd;
 797        struct rdma_ucm_query_route_resp resp;
 798        struct ucma_context *ctx;
 799        struct sockaddr *addr;
 800        int ret = 0;
 801
 802        if (out_len < sizeof(resp))
 803                return -ENOSPC;
 804
 805        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 806                return -EFAULT;
 807
 808        ctx = ucma_get_ctx(file, cmd.id);
 809        if (IS_ERR(ctx))
 810                return PTR_ERR(ctx);
 811
 812        memset(&resp, 0, sizeof resp);
 813        addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr;
 814        memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ?
 815                                     sizeof(struct sockaddr_in) :
 816                                     sizeof(struct sockaddr_in6));
 817        addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr;
 818        memcpy(&resp.dst_addr, addr, addr->sa_family == AF_INET ?
 819                                     sizeof(struct sockaddr_in) :
 820                                     sizeof(struct sockaddr_in6));
 821        if (!ctx->cm_id->device)
 822                goto out;
 823
 824        resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid;
 825        resp.port_num = ctx->cm_id->port_num;
 826
 827        if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num))
 828                ucma_copy_ib_route(&resp, &ctx->cm_id->route);
 829        else if (rdma_protocol_roce(ctx->cm_id->device, ctx->cm_id->port_num))
 830                ucma_copy_iboe_route(&resp, &ctx->cm_id->route);
 831        else if (rdma_protocol_iwarp(ctx->cm_id->device, ctx->cm_id->port_num))
 832                ucma_copy_iw_route(&resp, &ctx->cm_id->route);
 833
 834out:
 835        if (copy_to_user((void __user *)(unsigned long)cmd.response,
 836                         &resp, sizeof(resp)))
 837                ret = -EFAULT;
 838
 839        ucma_put_ctx(ctx);
 840        return ret;
 841}
 842
 843static void ucma_query_device_addr(struct rdma_cm_id *cm_id,
 844                                   struct rdma_ucm_query_addr_resp *resp)
 845{
 846        if (!cm_id->device)
 847                return;
 848
 849        resp->node_guid = (__force __u64) cm_id->device->node_guid;
 850        resp->port_num = cm_id->port_num;
 851        resp->pkey = (__force __u16) cpu_to_be16(
 852                     ib_addr_get_pkey(&cm_id->route.addr.dev_addr));
 853}
 854
 855static ssize_t ucma_query_addr(struct ucma_context *ctx,
 856                               void __user *response, int out_len)
 857{
 858        struct rdma_ucm_query_addr_resp resp;
 859        struct sockaddr *addr;
 860        int ret = 0;
 861
 862        if (out_len < sizeof(resp))
 863                return -ENOSPC;
 864
 865        memset(&resp, 0, sizeof resp);
 866
 867        addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr;
 868        resp.src_size = rdma_addr_size(addr);
 869        memcpy(&resp.src_addr, addr, resp.src_size);
 870
 871        addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr;
 872        resp.dst_size = rdma_addr_size(addr);
 873        memcpy(&resp.dst_addr, addr, resp.dst_size);
 874
 875        ucma_query_device_addr(ctx->cm_id, &resp);
 876
 877        if (copy_to_user(response, &resp, sizeof(resp)))
 878                ret = -EFAULT;
 879
 880        return ret;
 881}
 882
 883static ssize_t ucma_query_path(struct ucma_context *ctx,
 884                               void __user *response, int out_len)
 885{
 886        struct rdma_ucm_query_path_resp *resp;
 887        int i, ret = 0;
 888
 889        if (out_len < sizeof(*resp))
 890                return -ENOSPC;
 891
 892        resp = kzalloc(out_len, GFP_KERNEL);
 893        if (!resp)
 894                return -ENOMEM;
 895
 896        resp->num_paths = ctx->cm_id->route.num_paths;
 897        for (i = 0, out_len -= sizeof(*resp);
 898             i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data);
 899             i++, out_len -= sizeof(struct ib_path_rec_data)) {
 900
 901                resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY |
 902                                           IB_PATH_BIDIRECTIONAL;
 903                ib_sa_pack_path(&ctx->cm_id->route.path_rec[i],
 904                                &resp->path_data[i].path_rec);
 905        }
 906
 907        if (copy_to_user(response, resp,
 908                         sizeof(*resp) + (i * sizeof(struct ib_path_rec_data))))
 909                ret = -EFAULT;
 910
 911        kfree(resp);
 912        return ret;
 913}
 914
 915static ssize_t ucma_query_gid(struct ucma_context *ctx,
 916                              void __user *response, int out_len)
 917{
 918        struct rdma_ucm_query_addr_resp resp;
 919        struct sockaddr_ib *addr;
 920        int ret = 0;
 921
 922        if (out_len < sizeof(resp))
 923                return -ENOSPC;
 924
 925        memset(&resp, 0, sizeof resp);
 926
 927        ucma_query_device_addr(ctx->cm_id, &resp);
 928
 929        addr = (struct sockaddr_ib *) &resp.src_addr;
 930        resp.src_size = sizeof(*addr);
 931        if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) {
 932                memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size);
 933        } else {
 934                addr->sib_family = AF_IB;
 935                addr->sib_pkey = (__force __be16) resp.pkey;
 936                rdma_addr_get_sgid(&ctx->cm_id->route.addr.dev_addr,
 937                                   (union ib_gid *) &addr->sib_addr);
 938                addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
 939                                                    &ctx->cm_id->route.addr.src_addr);
 940        }
 941
 942        addr = (struct sockaddr_ib *) &resp.dst_addr;
 943        resp.dst_size = sizeof(*addr);
 944        if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) {
 945                memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size);
 946        } else {
 947                addr->sib_family = AF_IB;
 948                addr->sib_pkey = (__force __be16) resp.pkey;
 949                rdma_addr_get_dgid(&ctx->cm_id->route.addr.dev_addr,
 950                                   (union ib_gid *) &addr->sib_addr);
 951                addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
 952                                                    &ctx->cm_id->route.addr.dst_addr);
 953        }
 954
 955        if (copy_to_user(response, &resp, sizeof(resp)))
 956                ret = -EFAULT;
 957
 958        return ret;
 959}
 960
 961static ssize_t ucma_query(struct ucma_file *file,
 962                          const char __user *inbuf,
 963                          int in_len, int out_len)
 964{
 965        struct rdma_ucm_query cmd;
 966        struct ucma_context *ctx;
 967        void __user *response;
 968        int ret;
 969
 970        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 971                return -EFAULT;
 972
 973        response = (void __user *)(unsigned long) cmd.response;
 974        ctx = ucma_get_ctx(file, cmd.id);
 975        if (IS_ERR(ctx))
 976                return PTR_ERR(ctx);
 977
 978        switch (cmd.option) {
 979        case RDMA_USER_CM_QUERY_ADDR:
 980                ret = ucma_query_addr(ctx, response, out_len);
 981                break;
 982        case RDMA_USER_CM_QUERY_PATH:
 983                ret = ucma_query_path(ctx, response, out_len);
 984                break;
 985        case RDMA_USER_CM_QUERY_GID:
 986                ret = ucma_query_gid(ctx, response, out_len);
 987                break;
 988        default:
 989                ret = -ENOSYS;
 990                break;
 991        }
 992
 993        ucma_put_ctx(ctx);
 994        return ret;
 995}
 996
 997static void ucma_copy_conn_param(struct rdma_cm_id *id,
 998                                 struct rdma_conn_param *dst,
 999                                 struct rdma_ucm_conn_param *src)
1000{
1001        dst->private_data = src->private_data;
1002        dst->private_data_len = src->private_data_len;
1003        dst->responder_resources =src->responder_resources;
1004        dst->initiator_depth = src->initiator_depth;
1005        dst->flow_control = src->flow_control;
1006        dst->retry_count = src->retry_count;
1007        dst->rnr_retry_count = src->rnr_retry_count;
1008        dst->srq = src->srq;
1009        dst->qp_num = src->qp_num;
1010        dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0;
1011}
1012
1013static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
1014                            int in_len, int out_len)
1015{
1016        struct rdma_ucm_connect cmd;
1017        struct rdma_conn_param conn_param;
1018        struct ucma_context *ctx;
1019        int ret;
1020
1021        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1022                return -EFAULT;
1023
1024        if (!cmd.conn_param.valid)
1025                return -EINVAL;
1026
1027        ctx = ucma_get_ctx(file, cmd.id);
1028        if (IS_ERR(ctx))
1029                return PTR_ERR(ctx);
1030
1031        ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
1032        ret = rdma_connect(ctx->cm_id, &conn_param);
1033        ucma_put_ctx(ctx);
1034        return ret;
1035}
1036
1037static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf,
1038                           int in_len, int out_len)
1039{
1040        struct rdma_ucm_listen cmd;
1041        struct ucma_context *ctx;
1042        int ret;
1043
1044        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1045                return -EFAULT;
1046
1047        ctx = ucma_get_ctx(file, cmd.id);
1048        if (IS_ERR(ctx))
1049                return PTR_ERR(ctx);
1050
1051        ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ?
1052                       cmd.backlog : max_backlog;
1053        ret = rdma_listen(ctx->cm_id, ctx->backlog);
1054        ucma_put_ctx(ctx);
1055        return ret;
1056}
1057
1058static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
1059                           int in_len, int out_len)
1060{
1061        struct rdma_ucm_accept cmd;
1062        struct rdma_conn_param conn_param;
1063        struct ucma_context *ctx;
1064        int ret;
1065
1066        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1067                return -EFAULT;
1068
1069        ctx = ucma_get_ctx(file, cmd.id);
1070        if (IS_ERR(ctx))
1071                return PTR_ERR(ctx);
1072
1073        if (cmd.conn_param.valid) {
1074                ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
1075                mutex_lock(&file->mut);
1076                ret = rdma_accept(ctx->cm_id, &conn_param);
1077                if (!ret)
1078                        ctx->uid = cmd.uid;
1079                mutex_unlock(&file->mut);
1080        } else
1081                ret = rdma_accept(ctx->cm_id, NULL);
1082
1083        ucma_put_ctx(ctx);
1084        return ret;
1085}
1086
1087static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf,
1088                           int in_len, int out_len)
1089{
1090        struct rdma_ucm_reject cmd;
1091        struct ucma_context *ctx;
1092        int ret;
1093
1094        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1095                return -EFAULT;
1096
1097        ctx = ucma_get_ctx(file, cmd.id);
1098        if (IS_ERR(ctx))
1099                return PTR_ERR(ctx);
1100
1101        ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len);
1102        ucma_put_ctx(ctx);
1103        return ret;
1104}
1105
1106static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf,
1107                               int in_len, int out_len)
1108{
1109        struct rdma_ucm_disconnect cmd;
1110        struct ucma_context *ctx;
1111        int ret;
1112
1113        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1114                return -EFAULT;
1115
1116        ctx = ucma_get_ctx(file, cmd.id);
1117        if (IS_ERR(ctx))
1118                return PTR_ERR(ctx);
1119
1120        ret = rdma_disconnect(ctx->cm_id);
1121        ucma_put_ctx(ctx);
1122        return ret;
1123}
1124
1125static ssize_t ucma_init_qp_attr(struct ucma_file *file,
1126                                 const char __user *inbuf,
1127                                 int in_len, int out_len)
1128{
1129        struct rdma_ucm_init_qp_attr cmd;
1130        struct ib_uverbs_qp_attr resp;
1131        struct ucma_context *ctx;
1132        struct ib_qp_attr qp_attr;
1133        int ret;
1134
1135        if (out_len < sizeof(resp))
1136                return -ENOSPC;
1137
1138        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1139                return -EFAULT;
1140
1141        ctx = ucma_get_ctx(file, cmd.id);
1142        if (IS_ERR(ctx))
1143                return PTR_ERR(ctx);
1144
1145        resp.qp_attr_mask = 0;
1146        memset(&qp_attr, 0, sizeof qp_attr);
1147        qp_attr.qp_state = cmd.qp_state;
1148        ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask);
1149        if (ret)
1150                goto out;
1151
1152        ib_copy_qp_attr_to_user(&resp, &qp_attr);
1153        if (copy_to_user((void __user *)(unsigned long)cmd.response,
1154                         &resp, sizeof(resp)))
1155                ret = -EFAULT;
1156
1157out:
1158        ucma_put_ctx(ctx);
1159        return ret;
1160}
1161
1162static int ucma_set_option_id(struct ucma_context *ctx, int optname,
1163                              void *optval, size_t optlen)
1164{
1165        int ret = 0;
1166
1167        switch (optname) {
1168        case RDMA_OPTION_ID_TOS:
1169                if (optlen != sizeof(u8)) {
1170                        ret = -EINVAL;
1171                        break;
1172                }
1173                rdma_set_service_type(ctx->cm_id, *((u8 *) optval));
1174                break;
1175        case RDMA_OPTION_ID_REUSEADDR:
1176                if (optlen != sizeof(int)) {
1177                        ret = -EINVAL;
1178                        break;
1179                }
1180                ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0);
1181                break;
1182        case RDMA_OPTION_ID_AFONLY:
1183                if (optlen != sizeof(int)) {
1184                        ret = -EINVAL;
1185                        break;
1186                }
1187                ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0);
1188                break;
1189        default:
1190                ret = -ENOSYS;
1191        }
1192
1193        return ret;
1194}
1195
1196static int ucma_set_ib_path(struct ucma_context *ctx,
1197                            struct ib_path_rec_data *path_data, size_t optlen)
1198{
1199        struct ib_sa_path_rec sa_path;
1200        struct rdma_cm_event event;
1201        int ret;
1202
1203        if (optlen % sizeof(*path_data))
1204                return -EINVAL;
1205
1206        for (; optlen; optlen -= sizeof(*path_data), path_data++) {
1207                if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY |
1208                                         IB_PATH_BIDIRECTIONAL))
1209                        break;
1210        }
1211
1212        if (!optlen)
1213                return -EINVAL;
1214
1215        memset(&sa_path, 0, sizeof(sa_path));
1216
1217        ib_sa_unpack_path(path_data->path_rec, &sa_path);
1218        ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
1219        if (ret)
1220                return ret;
1221
1222        memset(&event, 0, sizeof event);
1223        event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1224        return ucma_event_handler(ctx->cm_id, &event);
1225}
1226
1227static int ucma_set_option_ib(struct ucma_context *ctx, int optname,
1228                              void *optval, size_t optlen)
1229{
1230        int ret;
1231
1232        switch (optname) {
1233        case RDMA_OPTION_IB_PATH:
1234                ret = ucma_set_ib_path(ctx, optval, optlen);
1235                break;
1236        default:
1237                ret = -ENOSYS;
1238        }
1239
1240        return ret;
1241}
1242
1243static int ucma_set_option_level(struct ucma_context *ctx, int level,
1244                                 int optname, void *optval, size_t optlen)
1245{
1246        int ret;
1247
1248        switch (level) {
1249        case RDMA_OPTION_ID:
1250                ret = ucma_set_option_id(ctx, optname, optval, optlen);
1251                break;
1252        case RDMA_OPTION_IB:
1253                ret = ucma_set_option_ib(ctx, optname, optval, optlen);
1254                break;
1255        default:
1256                ret = -ENOSYS;
1257        }
1258
1259        return ret;
1260}
1261
1262static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
1263                               int in_len, int out_len)
1264{
1265        struct rdma_ucm_set_option cmd;
1266        struct ucma_context *ctx;
1267        void *optval;
1268        int ret;
1269
1270        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1271                return -EFAULT;
1272
1273        ctx = ucma_get_ctx(file, cmd.id);
1274        if (IS_ERR(ctx))
1275                return PTR_ERR(ctx);
1276
1277        optval = memdup_user((void __user *) (unsigned long) cmd.optval,
1278                             cmd.optlen);
1279        if (IS_ERR(optval)) {
1280                ret = PTR_ERR(optval);
1281                goto out;
1282        }
1283
1284        ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval,
1285                                    cmd.optlen);
1286        kfree(optval);
1287
1288out:
1289        ucma_put_ctx(ctx);
1290        return ret;
1291}
1292
1293static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
1294                           int in_len, int out_len)
1295{
1296        struct rdma_ucm_notify cmd;
1297        struct ucma_context *ctx;
1298        int ret;
1299
1300        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1301                return -EFAULT;
1302
1303        ctx = ucma_get_ctx(file, cmd.id);
1304        if (IS_ERR(ctx))
1305                return PTR_ERR(ctx);
1306
1307        ret = rdma_notify(ctx->cm_id, (enum ib_event_type) cmd.event);
1308        ucma_put_ctx(ctx);
1309        return ret;
1310}
1311
1312static ssize_t ucma_process_join(struct ucma_file *file,
1313                                 struct rdma_ucm_join_mcast *cmd,  int out_len)
1314{
1315        struct rdma_ucm_create_id_resp resp;
1316        struct ucma_context *ctx;
1317        struct ucma_multicast *mc;
1318        struct sockaddr *addr;
1319        int ret;
1320
1321        if (out_len < sizeof(resp))
1322                return -ENOSPC;
1323
1324        addr = (struct sockaddr *) &cmd->addr;
1325        if (cmd->reserved || !cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr)))
1326                return -EINVAL;
1327
1328        ctx = ucma_get_ctx(file, cmd->id);
1329        if (IS_ERR(ctx))
1330                return PTR_ERR(ctx);
1331
1332        mutex_lock(&file->mut);
1333        mc = ucma_alloc_multicast(ctx);
1334        if (!mc) {
1335                ret = -ENOMEM;
1336                goto err1;
1337        }
1338
1339        mc->uid = cmd->uid;
1340        memcpy(&mc->addr, addr, cmd->addr_size);
1341        ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr, mc);
1342        if (ret)
1343                goto err2;
1344
1345        resp.id = mc->id;
1346        if (copy_to_user((void __user *)(unsigned long) cmd->response,
1347                         &resp, sizeof(resp))) {
1348                ret = -EFAULT;
1349                goto err3;
1350        }
1351
1352        mutex_unlock(&file->mut);
1353        ucma_put_ctx(ctx);
1354        return 0;
1355
1356err3:
1357        rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr);
1358        ucma_cleanup_mc_events(mc);
1359err2:
1360        mutex_lock(&mut);
1361        idr_remove(&multicast_idr, mc->id);
1362        mutex_unlock(&mut);
1363        list_del(&mc->list);
1364        kfree(mc);
1365err1:
1366        mutex_unlock(&file->mut);
1367        ucma_put_ctx(ctx);
1368        return ret;
1369}
1370
1371static ssize_t ucma_join_ip_multicast(struct ucma_file *file,
1372                                      const char __user *inbuf,
1373                                      int in_len, int out_len)
1374{
1375        struct rdma_ucm_join_ip_mcast cmd;
1376        struct rdma_ucm_join_mcast join_cmd;
1377
1378        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1379                return -EFAULT;
1380
1381        join_cmd.response = cmd.response;
1382        join_cmd.uid = cmd.uid;
1383        join_cmd.id = cmd.id;
1384        join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr);
1385        join_cmd.reserved = 0;
1386        memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size);
1387
1388        return ucma_process_join(file, &join_cmd, out_len);
1389}
1390
1391static ssize_t ucma_join_multicast(struct ucma_file *file,
1392                                   const char __user *inbuf,
1393                                   int in_len, int out_len)
1394{
1395        struct rdma_ucm_join_mcast cmd;
1396
1397        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1398                return -EFAULT;
1399
1400        return ucma_process_join(file, &cmd, out_len);
1401}
1402
1403static ssize_t ucma_leave_multicast(struct ucma_file *file,
1404                                    const char __user *inbuf,
1405                                    int in_len, int out_len)
1406{
1407        struct rdma_ucm_destroy_id cmd;
1408        struct rdma_ucm_destroy_id_resp resp;
1409        struct ucma_multicast *mc;
1410        int ret = 0;
1411
1412        if (out_len < sizeof(resp))
1413                return -ENOSPC;
1414
1415        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1416                return -EFAULT;
1417
1418        mutex_lock(&mut);
1419        mc = idr_find(&multicast_idr, cmd.id);
1420        if (!mc)
1421                mc = ERR_PTR(-ENOENT);
1422        else if (mc->ctx->file != file)
1423                mc = ERR_PTR(-EINVAL);
1424        else if (!atomic_inc_not_zero(&mc->ctx->ref))
1425                mc = ERR_PTR(-ENXIO);
1426        else
1427                idr_remove(&multicast_idr, mc->id);
1428        mutex_unlock(&mut);
1429
1430        if (IS_ERR(mc)) {
1431                ret = PTR_ERR(mc);
1432                goto out;
1433        }
1434
1435        rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr);
1436        mutex_lock(&mc->ctx->file->mut);
1437        ucma_cleanup_mc_events(mc);
1438        list_del(&mc->list);
1439        mutex_unlock(&mc->ctx->file->mut);
1440
1441        ucma_put_ctx(mc->ctx);
1442        resp.events_reported = mc->events_reported;
1443        kfree(mc);
1444
1445        if (copy_to_user((void __user *)(unsigned long)cmd.response,
1446                         &resp, sizeof(resp)))
1447                ret = -EFAULT;
1448out:
1449        return ret;
1450}
1451
1452static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2)
1453{
1454        /* Acquire mutex's based on pointer comparison to prevent deadlock. */
1455        if (file1 < file2) {
1456                mutex_lock(&file1->mut);
1457                mutex_lock_nested(&file2->mut, SINGLE_DEPTH_NESTING);
1458        } else {
1459                mutex_lock(&file2->mut);
1460                mutex_lock_nested(&file1->mut, SINGLE_DEPTH_NESTING);
1461        }
1462}
1463
1464static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2)
1465{
1466        if (file1 < file2) {
1467                mutex_unlock(&file2->mut);
1468                mutex_unlock(&file1->mut);
1469        } else {
1470                mutex_unlock(&file1->mut);
1471                mutex_unlock(&file2->mut);
1472        }
1473}
1474
1475static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file)
1476{
1477        struct ucma_event *uevent, *tmp;
1478
1479        list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list)
1480                if (uevent->ctx == ctx)
1481                        list_move_tail(&uevent->list, &file->event_list);
1482}
1483
1484static ssize_t ucma_migrate_id(struct ucma_file *new_file,
1485                               const char __user *inbuf,
1486                               int in_len, int out_len)
1487{
1488        struct rdma_ucm_migrate_id cmd;
1489        struct rdma_ucm_migrate_resp resp;
1490        struct ucma_context *ctx;
1491        struct fd f;
1492        struct ucma_file *cur_file;
1493        int ret = 0;
1494
1495        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1496                return -EFAULT;
1497
1498        /* Get current fd to protect against it being closed */
1499        f = fdget(cmd.fd);
1500        if (!f.file)
1501                return -ENOENT;
1502
1503        /* Validate current fd and prevent destruction of id. */
1504        ctx = ucma_get_ctx(f.file->private_data, cmd.id);
1505        if (IS_ERR(ctx)) {
1506                ret = PTR_ERR(ctx);
1507                goto file_put;
1508        }
1509
1510        cur_file = ctx->file;
1511        if (cur_file == new_file) {
1512                resp.events_reported = ctx->events_reported;
1513                goto response;
1514        }
1515
1516        /*
1517         * Migrate events between fd's, maintaining order, and avoiding new
1518         * events being added before existing events.
1519         */
1520        ucma_lock_files(cur_file, new_file);
1521        mutex_lock(&mut);
1522
1523        list_move_tail(&ctx->list, &new_file->ctx_list);
1524        ucma_move_events(ctx, new_file);
1525        ctx->file = new_file;
1526        resp.events_reported = ctx->events_reported;
1527
1528        mutex_unlock(&mut);
1529        ucma_unlock_files(cur_file, new_file);
1530
1531response:
1532        if (copy_to_user((void __user *)(unsigned long)cmd.response,
1533                         &resp, sizeof(resp)))
1534                ret = -EFAULT;
1535
1536        ucma_put_ctx(ctx);
1537file_put:
1538        fdput(f);
1539        return ret;
1540}
1541
1542static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
1543                                   const char __user *inbuf,
1544                                   int in_len, int out_len) = {
1545        [RDMA_USER_CM_CMD_CREATE_ID]     = ucma_create_id,
1546        [RDMA_USER_CM_CMD_DESTROY_ID]    = ucma_destroy_id,
1547        [RDMA_USER_CM_CMD_BIND_IP]       = ucma_bind_ip,
1548        [RDMA_USER_CM_CMD_RESOLVE_IP]    = ucma_resolve_ip,
1549        [RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route,
1550        [RDMA_USER_CM_CMD_QUERY_ROUTE]   = ucma_query_route,
1551        [RDMA_USER_CM_CMD_CONNECT]       = ucma_connect,
1552        [RDMA_USER_CM_CMD_LISTEN]        = ucma_listen,
1553        [RDMA_USER_CM_CMD_ACCEPT]        = ucma_accept,
1554        [RDMA_USER_CM_CMD_REJECT]        = ucma_reject,
1555        [RDMA_USER_CM_CMD_DISCONNECT]    = ucma_disconnect,
1556        [RDMA_USER_CM_CMD_INIT_QP_ATTR]  = ucma_init_qp_attr,
1557        [RDMA_USER_CM_CMD_GET_EVENT]     = ucma_get_event,
1558        [RDMA_USER_CM_CMD_GET_OPTION]    = NULL,
1559        [RDMA_USER_CM_CMD_SET_OPTION]    = ucma_set_option,
1560        [RDMA_USER_CM_CMD_NOTIFY]        = ucma_notify,
1561        [RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast,
1562        [RDMA_USER_CM_CMD_LEAVE_MCAST]   = ucma_leave_multicast,
1563        [RDMA_USER_CM_CMD_MIGRATE_ID]    = ucma_migrate_id,
1564        [RDMA_USER_CM_CMD_QUERY]         = ucma_query,
1565        [RDMA_USER_CM_CMD_BIND]          = ucma_bind,
1566        [RDMA_USER_CM_CMD_RESOLVE_ADDR]  = ucma_resolve_addr,
1567        [RDMA_USER_CM_CMD_JOIN_MCAST]    = ucma_join_multicast
1568};
1569
1570static ssize_t ucma_write(struct file *filp, const char __user *buf,
1571                          size_t len, loff_t *pos)
1572{
1573        struct ucma_file *file = filp->private_data;
1574        struct rdma_ucm_cmd_hdr hdr;
1575        ssize_t ret;
1576
1577        if (len < sizeof(hdr))
1578                return -EINVAL;
1579
1580        if (copy_from_user(&hdr, buf, sizeof(hdr)))
1581                return -EFAULT;
1582
1583        if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table))
1584                return -EINVAL;
1585
1586        if (hdr.in + sizeof(hdr) > len)
1587                return -EINVAL;
1588
1589        if (!ucma_cmd_table[hdr.cmd])
1590                return -ENOSYS;
1591
1592        ret = ucma_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out);
1593        if (!ret)
1594                ret = len;
1595
1596        return ret;
1597}
1598
1599static unsigned int ucma_poll(struct file *filp, struct poll_table_struct *wait)
1600{
1601        struct ucma_file *file = filp->private_data;
1602        unsigned int mask = 0;
1603
1604        poll_wait(filp, &file->poll_wait, wait);
1605
1606        if (!list_empty(&file->event_list))
1607                mask = POLLIN | POLLRDNORM;
1608
1609        return mask;
1610}
1611
1612/*
1613 * ucma_open() does not need the BKL:
1614 *
1615 *  - no global state is referred to;
1616 *  - there is no ioctl method to race against;
1617 *  - no further module initialization is required for open to work
1618 *    after the device is registered.
1619 */
1620static int ucma_open(struct inode *inode, struct file *filp)
1621{
1622        struct ucma_file *file;
1623
1624        file = kmalloc(sizeof *file, GFP_KERNEL);
1625        if (!file)
1626                return -ENOMEM;
1627
1628        file->close_wq = create_singlethread_workqueue("ucma_close_id");
1629        if (!file->close_wq) {
1630                kfree(file);
1631                return -ENOMEM;
1632        }
1633
1634        INIT_LIST_HEAD(&file->event_list);
1635        INIT_LIST_HEAD(&file->ctx_list);
1636        init_waitqueue_head(&file->poll_wait);
1637        mutex_init(&file->mut);
1638
1639        filp->private_data = file;
1640        file->filp = filp;
1641
1642        return nonseekable_open(inode, filp);
1643}
1644
1645static int ucma_close(struct inode *inode, struct file *filp)
1646{
1647        struct ucma_file *file = filp->private_data;
1648        struct ucma_context *ctx, *tmp;
1649
1650        mutex_lock(&file->mut);
1651        list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) {
1652                ctx->destroying = 1;
1653                mutex_unlock(&file->mut);
1654
1655                mutex_lock(&mut);
1656                idr_remove(&ctx_idr, ctx->id);
1657                mutex_unlock(&mut);
1658
1659                flush_workqueue(file->close_wq);
1660                /* At that step once ctx was marked as destroying and workqueue
1661                 * was flushed we are safe from any inflights handlers that
1662                 * might put other closing task.
1663                 */
1664                mutex_lock(&mut);
1665                if (!ctx->closing) {
1666                        mutex_unlock(&mut);
1667                        /* rdma_destroy_id ensures that no event handlers are
1668                         * inflight for that id before releasing it.
1669                         */
1670                        rdma_destroy_id(ctx->cm_id);
1671                } else {
1672                        mutex_unlock(&mut);
1673                }
1674
1675                ucma_free_ctx(ctx);
1676                mutex_lock(&file->mut);
1677        }
1678        mutex_unlock(&file->mut);
1679        destroy_workqueue(file->close_wq);
1680        kfree(file);
1681        return 0;
1682}
1683
1684static const struct file_operations ucma_fops = {
1685        .owner   = THIS_MODULE,
1686        .open    = ucma_open,
1687        .release = ucma_close,
1688        .write   = ucma_write,
1689        .poll    = ucma_poll,
1690        .llseek  = no_llseek,
1691};
1692
1693static struct miscdevice ucma_misc = {
1694        .minor          = MISC_DYNAMIC_MINOR,
1695        .name           = "rdma_cm",
1696        .nodename       = "infiniband/rdma_cm",
1697        .mode           = 0666,
1698        .fops           = &ucma_fops,
1699};
1700
1701static ssize_t show_abi_version(struct device *dev,
1702                                struct device_attribute *attr,
1703                                char *buf)
1704{
1705        return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION);
1706}
1707static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
1708
1709static int __init ucma_init(void)
1710{
1711        int ret;
1712
1713        ret = misc_register(&ucma_misc);
1714        if (ret)
1715                return ret;
1716
1717        ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version);
1718        if (ret) {
1719                printk(KERN_ERR "rdma_ucm: couldn't create abi_version attr\n");
1720                goto err1;
1721        }
1722
1723        ucma_ctl_table_hdr = register_net_sysctl(&init_net, "net/rdma_ucm", ucma_ctl_table);
1724        if (!ucma_ctl_table_hdr) {
1725                printk(KERN_ERR "rdma_ucm: couldn't register sysctl paths\n");
1726                ret = -ENOMEM;
1727                goto err2;
1728        }
1729        return 0;
1730err2:
1731        device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);
1732err1:
1733        misc_deregister(&ucma_misc);
1734        return ret;
1735}
1736
1737static void __exit ucma_cleanup(void)
1738{
1739        unregister_net_sysctl_table(ucma_ctl_table_hdr);
1740        device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);
1741        misc_deregister(&ucma_misc);
1742        idr_destroy(&ctx_idr);
1743        idr_destroy(&multicast_idr);
1744}
1745
1746module_init(ucma_init);
1747module_exit(ucma_cleanup);
1748