linux/drivers/infiniband/core/uverbs_main.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2005 Topspin Communications.  All rights reserved.
   3 * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
   4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
   5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
   6 * Copyright (c) 2005 PathScale, Inc. All rights reserved.
   7 *
   8 * This software is available to you under a choice of one of two
   9 * licenses.  You may choose to be licensed under the terms of the GNU
  10 * General Public License (GPL) Version 2, available from the file
  11 * COPYING in the main directory of this source tree, or the
  12 * OpenIB.org BSD license below:
  13 *
  14 *     Redistribution and use in source and binary forms, with or
  15 *     without modification, are permitted provided that the following
  16 *     conditions are met:
  17 *
  18 *      - Redistributions of source code must retain the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer.
  21 *
  22 *      - Redistributions in binary form must reproduce the above
  23 *        copyright notice, this list of conditions and the following
  24 *        disclaimer in the documentation and/or other materials
  25 *        provided with the distribution.
  26 *
  27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  34 * SOFTWARE.
  35 */
  36
  37#include <linux/module.h>
  38#include <linux/init.h>
  39#include <linux/device.h>
  40#include <linux/err.h>
  41#include <linux/fs.h>
  42#include <linux/poll.h>
  43#include <linux/sched.h>
  44#include <linux/file.h>
  45#include <linux/cdev.h>
  46#include <linux/anon_inodes.h>
  47#include <linux/slab.h>
  48
  49#include <asm/uaccess.h>
  50
  51#include "uverbs.h"
  52
  53MODULE_AUTHOR("Roland Dreier");
  54MODULE_DESCRIPTION("InfiniBand userspace verbs access");
  55MODULE_LICENSE("Dual BSD/GPL");
  56
  57enum {
  58        IB_UVERBS_MAJOR       = 231,
  59        IB_UVERBS_BASE_MINOR  = 192,
  60        IB_UVERBS_MAX_DEVICES = 32
  61};
  62
  63#define IB_UVERBS_BASE_DEV      MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
  64
  65static struct class *uverbs_class;
  66
  67DEFINE_SPINLOCK(ib_uverbs_idr_lock);
  68DEFINE_IDR(ib_uverbs_pd_idr);
  69DEFINE_IDR(ib_uverbs_mr_idr);
  70DEFINE_IDR(ib_uverbs_mw_idr);
  71DEFINE_IDR(ib_uverbs_ah_idr);
  72DEFINE_IDR(ib_uverbs_cq_idr);
  73DEFINE_IDR(ib_uverbs_qp_idr);
  74DEFINE_IDR(ib_uverbs_srq_idr);
  75DEFINE_IDR(ib_uverbs_xrcd_idr);
  76DEFINE_IDR(ib_uverbs_rule_idr);
  77
  78static DEFINE_SPINLOCK(map_lock);
  79static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
  80
  81static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
  82                                     const char __user *buf, int in_len,
  83                                     int out_len) = {
  84        [IB_USER_VERBS_CMD_GET_CONTEXT]         = ib_uverbs_get_context,
  85        [IB_USER_VERBS_CMD_QUERY_DEVICE]        = ib_uverbs_query_device,
  86        [IB_USER_VERBS_CMD_QUERY_PORT]          = ib_uverbs_query_port,
  87        [IB_USER_VERBS_CMD_ALLOC_PD]            = ib_uverbs_alloc_pd,
  88        [IB_USER_VERBS_CMD_DEALLOC_PD]          = ib_uverbs_dealloc_pd,
  89        [IB_USER_VERBS_CMD_REG_MR]              = ib_uverbs_reg_mr,
  90        [IB_USER_VERBS_CMD_DEREG_MR]            = ib_uverbs_dereg_mr,
  91        [IB_USER_VERBS_CMD_ALLOC_MW]            = ib_uverbs_alloc_mw,
  92        [IB_USER_VERBS_CMD_DEALLOC_MW]          = ib_uverbs_dealloc_mw,
  93        [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
  94        [IB_USER_VERBS_CMD_CREATE_CQ]           = ib_uverbs_create_cq,
  95        [IB_USER_VERBS_CMD_RESIZE_CQ]           = ib_uverbs_resize_cq,
  96        [IB_USER_VERBS_CMD_POLL_CQ]             = ib_uverbs_poll_cq,
  97        [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ]       = ib_uverbs_req_notify_cq,
  98        [IB_USER_VERBS_CMD_DESTROY_CQ]          = ib_uverbs_destroy_cq,
  99        [IB_USER_VERBS_CMD_CREATE_QP]           = ib_uverbs_create_qp,
 100        [IB_USER_VERBS_CMD_QUERY_QP]            = ib_uverbs_query_qp,
 101        [IB_USER_VERBS_CMD_MODIFY_QP]           = ib_uverbs_modify_qp,
 102        [IB_USER_VERBS_CMD_DESTROY_QP]          = ib_uverbs_destroy_qp,
 103        [IB_USER_VERBS_CMD_POST_SEND]           = ib_uverbs_post_send,
 104        [IB_USER_VERBS_CMD_POST_RECV]           = ib_uverbs_post_recv,
 105        [IB_USER_VERBS_CMD_POST_SRQ_RECV]       = ib_uverbs_post_srq_recv,
 106        [IB_USER_VERBS_CMD_CREATE_AH]           = ib_uverbs_create_ah,
 107        [IB_USER_VERBS_CMD_DESTROY_AH]          = ib_uverbs_destroy_ah,
 108        [IB_USER_VERBS_CMD_ATTACH_MCAST]        = ib_uverbs_attach_mcast,
 109        [IB_USER_VERBS_CMD_DETACH_MCAST]        = ib_uverbs_detach_mcast,
 110        [IB_USER_VERBS_CMD_CREATE_SRQ]          = ib_uverbs_create_srq,
 111        [IB_USER_VERBS_CMD_MODIFY_SRQ]          = ib_uverbs_modify_srq,
 112        [IB_USER_VERBS_CMD_QUERY_SRQ]           = ib_uverbs_query_srq,
 113        [IB_USER_VERBS_CMD_DESTROY_SRQ]         = ib_uverbs_destroy_srq,
 114        [IB_USER_VERBS_CMD_OPEN_XRCD]           = ib_uverbs_open_xrcd,
 115        [IB_USER_VERBS_CMD_CLOSE_XRCD]          = ib_uverbs_close_xrcd,
 116        [IB_USER_VERBS_CMD_CREATE_XSRQ]         = ib_uverbs_create_xsrq,
 117        [IB_USER_VERBS_CMD_OPEN_QP]             = ib_uverbs_open_qp,
 118#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
 119        [IB_USER_VERBS_CMD_CREATE_FLOW]         = ib_uverbs_create_flow,
 120        [IB_USER_VERBS_CMD_DESTROY_FLOW]        = ib_uverbs_destroy_flow
 121#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
 122};
 123
 124static void ib_uverbs_add_one(struct ib_device *device);
 125static void ib_uverbs_remove_one(struct ib_device *device);
 126
 127static void ib_uverbs_release_dev(struct kref *ref)
 128{
 129        struct ib_uverbs_device *dev =
 130                container_of(ref, struct ib_uverbs_device, ref);
 131
 132        complete(&dev->comp);
 133}
 134
 135static void ib_uverbs_release_event_file(struct kref *ref)
 136{
 137        struct ib_uverbs_event_file *file =
 138                container_of(ref, struct ib_uverbs_event_file, ref);
 139
 140        kfree(file);
 141}
 142
 143void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
 144                          struct ib_uverbs_event_file *ev_file,
 145                          struct ib_ucq_object *uobj)
 146{
 147        struct ib_uverbs_event *evt, *tmp;
 148
 149        if (ev_file) {
 150                spin_lock_irq(&ev_file->lock);
 151                list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
 152                        list_del(&evt->list);
 153                        kfree(evt);
 154                }
 155                spin_unlock_irq(&ev_file->lock);
 156
 157                kref_put(&ev_file->ref, ib_uverbs_release_event_file);
 158        }
 159
 160        spin_lock_irq(&file->async_file->lock);
 161        list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
 162                list_del(&evt->list);
 163                kfree(evt);
 164        }
 165        spin_unlock_irq(&file->async_file->lock);
 166}
 167
 168void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
 169                              struct ib_uevent_object *uobj)
 170{
 171        struct ib_uverbs_event *evt, *tmp;
 172
 173        spin_lock_irq(&file->async_file->lock);
 174        list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
 175                list_del(&evt->list);
 176                kfree(evt);
 177        }
 178        spin_unlock_irq(&file->async_file->lock);
 179}
 180
 181static void ib_uverbs_detach_umcast(struct ib_qp *qp,
 182                                    struct ib_uqp_object *uobj)
 183{
 184        struct ib_uverbs_mcast_entry *mcast, *tmp;
 185
 186        list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) {
 187                ib_detach_mcast(qp, &mcast->gid, mcast->lid);
 188                list_del(&mcast->list);
 189                kfree(mcast);
 190        }
 191}
 192
 193static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 194                                      struct ib_ucontext *context)
 195{
 196        struct ib_uobject *uobj, *tmp;
 197
 198        if (!context)
 199                return 0;
 200
 201        context->closing = 1;
 202
 203        list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
 204                struct ib_ah *ah = uobj->object;
 205
 206                idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
 207                ib_destroy_ah(ah);
 208                kfree(uobj);
 209        }
 210
 211        /* Remove MWs before QPs, in order to support type 2A MWs. */
 212        list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) {
 213                struct ib_mw *mw = uobj->object;
 214
 215                idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
 216                ib_dealloc_mw(mw);
 217                kfree(uobj);
 218        }
 219
 220        list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) {
 221                struct ib_flow *flow_id = uobj->object;
 222
 223                idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
 224                ib_destroy_flow(flow_id);
 225                kfree(uobj);
 226        }
 227
 228        list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
 229                struct ib_qp *qp = uobj->object;
 230                struct ib_uqp_object *uqp =
 231                        container_of(uobj, struct ib_uqp_object, uevent.uobject);
 232
 233                idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
 234                if (qp != qp->real_qp) {
 235                        ib_close_qp(qp);
 236                } else {
 237                        ib_uverbs_detach_umcast(qp, uqp);
 238                        ib_destroy_qp(qp);
 239                }
 240                ib_uverbs_release_uevent(file, &uqp->uevent);
 241                kfree(uqp);
 242        }
 243
 244        list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
 245                struct ib_cq *cq = uobj->object;
 246                struct ib_uverbs_event_file *ev_file = cq->cq_context;
 247                struct ib_ucq_object *ucq =
 248                        container_of(uobj, struct ib_ucq_object, uobject);
 249
 250                idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
 251                ib_destroy_cq(cq);
 252                ib_uverbs_release_ucq(file, ev_file, ucq);
 253                kfree(ucq);
 254        }
 255
 256        list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
 257                struct ib_srq *srq = uobj->object;
 258                struct ib_uevent_object *uevent =
 259                        container_of(uobj, struct ib_uevent_object, uobject);
 260
 261                idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
 262                ib_destroy_srq(srq);
 263                ib_uverbs_release_uevent(file, uevent);
 264                kfree(uevent);
 265        }
 266
 267        list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
 268                struct ib_mr *mr = uobj->object;
 269
 270                idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
 271                ib_dereg_mr(mr);
 272                kfree(uobj);
 273        }
 274
 275        mutex_lock(&file->device->xrcd_tree_mutex);
 276        list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
 277                struct ib_xrcd *xrcd = uobj->object;
 278                struct ib_uxrcd_object *uxrcd =
 279                        container_of(uobj, struct ib_uxrcd_object, uobject);
 280
 281                idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
 282                ib_uverbs_dealloc_xrcd(file->device, xrcd);
 283                kfree(uxrcd);
 284        }
 285        mutex_unlock(&file->device->xrcd_tree_mutex);
 286
 287        list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
 288                struct ib_pd *pd = uobj->object;
 289
 290                idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
 291                ib_dealloc_pd(pd);
 292                kfree(uobj);
 293        }
 294
 295        return context->device->dealloc_ucontext(context);
 296}
 297
 298static void ib_uverbs_release_file(struct kref *ref)
 299{
 300        struct ib_uverbs_file *file =
 301                container_of(ref, struct ib_uverbs_file, ref);
 302
 303        module_put(file->device->ib_dev->owner);
 304        kref_put(&file->device->ref, ib_uverbs_release_dev);
 305
 306        kfree(file);
 307}
 308
 309static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
 310                                    size_t count, loff_t *pos)
 311{
 312        struct ib_uverbs_event_file *file = filp->private_data;
 313        struct ib_uverbs_event *event;
 314        int eventsz;
 315        int ret = 0;
 316
 317        spin_lock_irq(&file->lock);
 318
 319        while (list_empty(&file->event_list)) {
 320                spin_unlock_irq(&file->lock);
 321
 322                if (filp->f_flags & O_NONBLOCK)
 323                        return -EAGAIN;
 324
 325                if (wait_event_interruptible(file->poll_wait,
 326                                             !list_empty(&file->event_list)))
 327                        return -ERESTARTSYS;
 328
 329                spin_lock_irq(&file->lock);
 330        }
 331
 332        event = list_entry(file->event_list.next, struct ib_uverbs_event, list);
 333
 334        if (file->is_async)
 335                eventsz = sizeof (struct ib_uverbs_async_event_desc);
 336        else
 337                eventsz = sizeof (struct ib_uverbs_comp_event_desc);
 338
 339        if (eventsz > count) {
 340                ret   = -EINVAL;
 341                event = NULL;
 342        } else {
 343                list_del(file->event_list.next);
 344                if (event->counter) {
 345                        ++(*event->counter);
 346                        list_del(&event->obj_list);
 347                }
 348        }
 349
 350        spin_unlock_irq(&file->lock);
 351
 352        if (event) {
 353                if (copy_to_user(buf, event, eventsz))
 354                        ret = -EFAULT;
 355                else
 356                        ret = eventsz;
 357        }
 358
 359        kfree(event);
 360
 361        return ret;
 362}
 363
 364static unsigned int ib_uverbs_event_poll(struct file *filp,
 365                                         struct poll_table_struct *wait)
 366{
 367        unsigned int pollflags = 0;
 368        struct ib_uverbs_event_file *file = filp->private_data;
 369
 370        poll_wait(filp, &file->poll_wait, wait);
 371
 372        spin_lock_irq(&file->lock);
 373        if (!list_empty(&file->event_list))
 374                pollflags = POLLIN | POLLRDNORM;
 375        spin_unlock_irq(&file->lock);
 376
 377        return pollflags;
 378}
 379
 380static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
 381{
 382        struct ib_uverbs_event_file *file = filp->private_data;
 383
 384        return fasync_helper(fd, filp, on, &file->async_queue);
 385}
 386
 387static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
 388{
 389        struct ib_uverbs_event_file *file = filp->private_data;
 390        struct ib_uverbs_event *entry, *tmp;
 391
 392        spin_lock_irq(&file->lock);
 393        file->is_closed = 1;
 394        list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
 395                if (entry->counter)
 396                        list_del(&entry->obj_list);
 397                kfree(entry);
 398        }
 399        spin_unlock_irq(&file->lock);
 400
 401        if (file->is_async) {
 402                ib_unregister_event_handler(&file->uverbs_file->event_handler);
 403                kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
 404        }
 405        kref_put(&file->ref, ib_uverbs_release_event_file);
 406
 407        return 0;
 408}
 409
 410static const struct file_operations uverbs_event_fops = {
 411        .owner   = THIS_MODULE,
 412        .read    = ib_uverbs_event_read,
 413        .poll    = ib_uverbs_event_poll,
 414        .release = ib_uverbs_event_close,
 415        .fasync  = ib_uverbs_event_fasync,
 416        .llseek  = no_llseek,
 417};
 418
 419void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
 420{
 421        struct ib_uverbs_event_file    *file = cq_context;
 422        struct ib_ucq_object           *uobj;
 423        struct ib_uverbs_event         *entry;
 424        unsigned long                   flags;
 425
 426        if (!file)
 427                return;
 428
 429        spin_lock_irqsave(&file->lock, flags);
 430        if (file->is_closed) {
 431                spin_unlock_irqrestore(&file->lock, flags);
 432                return;
 433        }
 434
 435        entry = kmalloc(sizeof *entry, GFP_ATOMIC);
 436        if (!entry) {
 437                spin_unlock_irqrestore(&file->lock, flags);
 438                return;
 439        }
 440
 441        uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
 442
 443        entry->desc.comp.cq_handle = cq->uobject->user_handle;
 444        entry->counter             = &uobj->comp_events_reported;
 445
 446        list_add_tail(&entry->list, &file->event_list);
 447        list_add_tail(&entry->obj_list, &uobj->comp_list);
 448        spin_unlock_irqrestore(&file->lock, flags);
 449
 450        wake_up_interruptible(&file->poll_wait);
 451        kill_fasync(&file->async_queue, SIGIO, POLL_IN);
 452}
 453
 454static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
 455                                    __u64 element, __u64 event,
 456                                    struct list_head *obj_list,
 457                                    u32 *counter)
 458{
 459        struct ib_uverbs_event *entry;
 460        unsigned long flags;
 461
 462        spin_lock_irqsave(&file->async_file->lock, flags);
 463        if (file->async_file->is_closed) {
 464                spin_unlock_irqrestore(&file->async_file->lock, flags);
 465                return;
 466        }
 467
 468        entry = kmalloc(sizeof *entry, GFP_ATOMIC);
 469        if (!entry) {
 470                spin_unlock_irqrestore(&file->async_file->lock, flags);
 471                return;
 472        }
 473
 474        entry->desc.async.element    = element;
 475        entry->desc.async.event_type = event;
 476        entry->counter               = counter;
 477
 478        list_add_tail(&entry->list, &file->async_file->event_list);
 479        if (obj_list)
 480                list_add_tail(&entry->obj_list, obj_list);
 481        spin_unlock_irqrestore(&file->async_file->lock, flags);
 482
 483        wake_up_interruptible(&file->async_file->poll_wait);
 484        kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
 485}
 486
 487void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
 488{
 489        struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
 490                                                  struct ib_ucq_object, uobject);
 491
 492        ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle,
 493                                event->event, &uobj->async_list,
 494                                &uobj->async_events_reported);
 495}
 496
 497void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
 498{
 499        struct ib_uevent_object *uobj;
 500
 501        uobj = container_of(event->element.qp->uobject,
 502                            struct ib_uevent_object, uobject);
 503
 504        ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
 505                                event->event, &uobj->event_list,
 506                                &uobj->events_reported);
 507}
 508
 509void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
 510{
 511        struct ib_uevent_object *uobj;
 512
 513        uobj = container_of(event->element.srq->uobject,
 514                            struct ib_uevent_object, uobject);
 515
 516        ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
 517                                event->event, &uobj->event_list,
 518                                &uobj->events_reported);
 519}
 520
 521void ib_uverbs_event_handler(struct ib_event_handler *handler,
 522                             struct ib_event *event)
 523{
 524        struct ib_uverbs_file *file =
 525                container_of(handler, struct ib_uverbs_file, event_handler);
 526
 527        ib_uverbs_async_handler(file, event->element.port_num, event->event,
 528                                NULL, NULL);
 529}
 530
 531struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
 532                                        int is_async)
 533{
 534        struct ib_uverbs_event_file *ev_file;
 535        struct file *filp;
 536
 537        ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL);
 538        if (!ev_file)
 539                return ERR_PTR(-ENOMEM);
 540
 541        kref_init(&ev_file->ref);
 542        spin_lock_init(&ev_file->lock);
 543        INIT_LIST_HEAD(&ev_file->event_list);
 544        init_waitqueue_head(&ev_file->poll_wait);
 545        ev_file->uverbs_file = uverbs_file;
 546        ev_file->async_queue = NULL;
 547        ev_file->is_async    = is_async;
 548        ev_file->is_closed   = 0;
 549
 550        filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops,
 551                                  ev_file, O_RDONLY);
 552        if (IS_ERR(filp))
 553                kfree(ev_file);
 554
 555        return filp;
 556}
 557
 558/*
 559 * Look up a completion event file by FD.  If lookup is successful,
 560 * takes a ref to the event file struct that it returns; if
 561 * unsuccessful, returns NULL.
 562 */
 563struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
 564{
 565        struct ib_uverbs_event_file *ev_file = NULL;
 566        struct fd f = fdget(fd);
 567
 568        if (!f.file)
 569                return NULL;
 570
 571        if (f.file->f_op != &uverbs_event_fops)
 572                goto out;
 573
 574        ev_file = f.file->private_data;
 575        if (ev_file->is_async) {
 576                ev_file = NULL;
 577                goto out;
 578        }
 579
 580        kref_get(&ev_file->ref);
 581
 582out:
 583        fdput(f);
 584        return ev_file;
 585}
 586
 587static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 588                             size_t count, loff_t *pos)
 589{
 590        struct ib_uverbs_file *file = filp->private_data;
 591        struct ib_uverbs_cmd_hdr hdr;
 592
 593        if (count < sizeof hdr)
 594                return -EINVAL;
 595
 596        if (copy_from_user(&hdr, buf, sizeof hdr))
 597                return -EFAULT;
 598
 599        if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
 600            !uverbs_cmd_table[hdr.command])
 601                return -EINVAL;
 602
 603        if (!file->ucontext &&
 604            hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT)
 605                return -EINVAL;
 606
 607        if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
 608                return -ENOSYS;
 609
 610#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
 611        if (hdr.command >= IB_USER_VERBS_CMD_THRESHOLD) {
 612                struct ib_uverbs_cmd_hdr_ex hdr_ex;
 613
 614                if (copy_from_user(&hdr_ex, buf, sizeof(hdr_ex)))
 615                        return -EFAULT;
 616
 617                if (((hdr_ex.in_words + hdr_ex.provider_in_words) * 4) != count)
 618                        return -EINVAL;
 619
 620                return uverbs_cmd_table[hdr.command](file,
 621                                                     buf + sizeof(hdr_ex),
 622                                                     (hdr_ex.in_words +
 623                                                      hdr_ex.provider_in_words) * 4,
 624                                                     (hdr_ex.out_words +
 625                                                      hdr_ex.provider_out_words) * 4);
 626        } else {
 627#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
 628                if (hdr.in_words * 4 != count)
 629                        return -EINVAL;
 630
 631                return uverbs_cmd_table[hdr.command](file,
 632                                                     buf + sizeof(hdr),
 633                                                     hdr.in_words * 4,
 634                                                     hdr.out_words * 4);
 635#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
 636        }
 637#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
 638}
 639
 640static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
 641{
 642        struct ib_uverbs_file *file = filp->private_data;
 643
 644        if (!file->ucontext)
 645                return -ENODEV;
 646        else
 647                return file->device->ib_dev->mmap(file->ucontext, vma);
 648}
 649
 650/*
 651 * ib_uverbs_open() does not need the BKL:
 652 *
 653 *  - the ib_uverbs_device structures are properly reference counted and
 654 *    everything else is purely local to the file being created, so
 655 *    races against other open calls are not a problem;
 656 *  - there is no ioctl method to race against;
 657 *  - the open method will either immediately run -ENXIO, or all
 658 *    required initialization will be done.
 659 */
 660static int ib_uverbs_open(struct inode *inode, struct file *filp)
 661{
 662        struct ib_uverbs_device *dev;
 663        struct ib_uverbs_file *file;
 664        int ret;
 665
 666        dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
 667        if (dev)
 668                kref_get(&dev->ref);
 669        else
 670                return -ENXIO;
 671
 672        if (!try_module_get(dev->ib_dev->owner)) {
 673                ret = -ENODEV;
 674                goto err;
 675        }
 676
 677        file = kmalloc(sizeof *file, GFP_KERNEL);
 678        if (!file) {
 679                ret = -ENOMEM;
 680                goto err_module;
 681        }
 682
 683        file->device     = dev;
 684        file->ucontext   = NULL;
 685        file->async_file = NULL;
 686        kref_init(&file->ref);
 687        mutex_init(&file->mutex);
 688
 689        filp->private_data = file;
 690
 691        return nonseekable_open(inode, filp);
 692
 693err_module:
 694        module_put(dev->ib_dev->owner);
 695
 696err:
 697        kref_put(&dev->ref, ib_uverbs_release_dev);
 698        return ret;
 699}
 700
 701static int ib_uverbs_close(struct inode *inode, struct file *filp)
 702{
 703        struct ib_uverbs_file *file = filp->private_data;
 704
 705        ib_uverbs_cleanup_ucontext(file, file->ucontext);
 706
 707        if (file->async_file)
 708                kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
 709
 710        kref_put(&file->ref, ib_uverbs_release_file);
 711
 712        return 0;
 713}
 714
 715static const struct file_operations uverbs_fops = {
 716        .owner   = THIS_MODULE,
 717        .write   = ib_uverbs_write,
 718        .open    = ib_uverbs_open,
 719        .release = ib_uverbs_close,
 720        .llseek  = no_llseek,
 721};
 722
 723static const struct file_operations uverbs_mmap_fops = {
 724        .owner   = THIS_MODULE,
 725        .write   = ib_uverbs_write,
 726        .mmap    = ib_uverbs_mmap,
 727        .open    = ib_uverbs_open,
 728        .release = ib_uverbs_close,
 729        .llseek  = no_llseek,
 730};
 731
 732static struct ib_client uverbs_client = {
 733        .name   = "uverbs",
 734        .add    = ib_uverbs_add_one,
 735        .remove = ib_uverbs_remove_one
 736};
 737
 738static ssize_t show_ibdev(struct device *device, struct device_attribute *attr,
 739                          char *buf)
 740{
 741        struct ib_uverbs_device *dev = dev_get_drvdata(device);
 742
 743        if (!dev)
 744                return -ENODEV;
 745
 746        return sprintf(buf, "%s\n", dev->ib_dev->name);
 747}
 748static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
 749
 750static ssize_t show_dev_abi_version(struct device *device,
 751                                    struct device_attribute *attr, char *buf)
 752{
 753        struct ib_uverbs_device *dev = dev_get_drvdata(device);
 754
 755        if (!dev)
 756                return -ENODEV;
 757
 758        return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver);
 759}
 760static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
 761
 762static CLASS_ATTR_STRING(abi_version, S_IRUGO,
 763                         __stringify(IB_USER_VERBS_ABI_VERSION));
 764
 765static dev_t overflow_maj;
 766static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES);
 767
 768/*
 769 * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by
 770 * requesting a new major number and doubling the number of max devices we
 771 * support. It's stupid, but simple.
 772 */
 773static int find_overflow_devnum(void)
 774{
 775        int ret;
 776
 777        if (!overflow_maj) {
 778                ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES,
 779                                          "infiniband_verbs");
 780                if (ret) {
 781                        printk(KERN_ERR "user_verbs: couldn't register dynamic device number\n");
 782                        return ret;
 783                }
 784        }
 785
 786        ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES);
 787        if (ret >= IB_UVERBS_MAX_DEVICES)
 788                return -1;
 789
 790        return ret;
 791}
 792
 793static void ib_uverbs_add_one(struct ib_device *device)
 794{
 795        int devnum;
 796        dev_t base;
 797        struct ib_uverbs_device *uverbs_dev;
 798
 799        if (!device->alloc_ucontext)
 800                return;
 801
 802        uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL);
 803        if (!uverbs_dev)
 804                return;
 805
 806        kref_init(&uverbs_dev->ref);
 807        init_completion(&uverbs_dev->comp);
 808        uverbs_dev->xrcd_tree = RB_ROOT;
 809        mutex_init(&uverbs_dev->xrcd_tree_mutex);
 810
 811        spin_lock(&map_lock);
 812        devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
 813        if (devnum >= IB_UVERBS_MAX_DEVICES) {
 814                spin_unlock(&map_lock);
 815                devnum = find_overflow_devnum();
 816                if (devnum < 0)
 817                        goto err;
 818
 819                spin_lock(&map_lock);
 820                uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES;
 821                base = devnum + overflow_maj;
 822                set_bit(devnum, overflow_map);
 823        } else {
 824                uverbs_dev->devnum = devnum;
 825                base = devnum + IB_UVERBS_BASE_DEV;
 826                set_bit(devnum, dev_map);
 827        }
 828        spin_unlock(&map_lock);
 829
 830        uverbs_dev->ib_dev           = device;
 831        uverbs_dev->num_comp_vectors = device->num_comp_vectors;
 832
 833        cdev_init(&uverbs_dev->cdev, NULL);
 834        uverbs_dev->cdev.owner = THIS_MODULE;
 835        uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
 836        kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
 837        if (cdev_add(&uverbs_dev->cdev, base, 1))
 838                goto err_cdev;
 839
 840        uverbs_dev->dev = device_create(uverbs_class, device->dma_device,
 841                                        uverbs_dev->cdev.dev, uverbs_dev,
 842                                        "uverbs%d", uverbs_dev->devnum);
 843        if (IS_ERR(uverbs_dev->dev))
 844                goto err_cdev;
 845
 846        if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev))
 847                goto err_class;
 848        if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
 849                goto err_class;
 850
 851        ib_set_client_data(device, &uverbs_client, uverbs_dev);
 852
 853        return;
 854
 855err_class:
 856        device_destroy(uverbs_class, uverbs_dev->cdev.dev);
 857
 858err_cdev:
 859        cdev_del(&uverbs_dev->cdev);
 860        if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
 861                clear_bit(devnum, dev_map);
 862        else
 863                clear_bit(devnum, overflow_map);
 864
 865err:
 866        kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
 867        wait_for_completion(&uverbs_dev->comp);
 868        kfree(uverbs_dev);
 869        return;
 870}
 871
 872static void ib_uverbs_remove_one(struct ib_device *device)
 873{
 874        struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client);
 875
 876        if (!uverbs_dev)
 877                return;
 878
 879        dev_set_drvdata(uverbs_dev->dev, NULL);
 880        device_destroy(uverbs_class, uverbs_dev->cdev.dev);
 881        cdev_del(&uverbs_dev->cdev);
 882
 883        if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
 884                clear_bit(uverbs_dev->devnum, dev_map);
 885        else
 886                clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
 887
 888        kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
 889        wait_for_completion(&uverbs_dev->comp);
 890        kfree(uverbs_dev);
 891}
 892
 893static char *uverbs_devnode(struct device *dev, umode_t *mode)
 894{
 895        if (mode)
 896                *mode = 0666;
 897        return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
 898}
 899
 900static int __init ib_uverbs_init(void)
 901{
 902        int ret;
 903
 904        ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
 905                                     "infiniband_verbs");
 906        if (ret) {
 907                printk(KERN_ERR "user_verbs: couldn't register device number\n");
 908                goto out;
 909        }
 910
 911        uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
 912        if (IS_ERR(uverbs_class)) {
 913                ret = PTR_ERR(uverbs_class);
 914                printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n");
 915                goto out_chrdev;
 916        }
 917
 918        uverbs_class->devnode = uverbs_devnode;
 919
 920        ret = class_create_file(uverbs_class, &class_attr_abi_version.attr);
 921        if (ret) {
 922                printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n");
 923                goto out_class;
 924        }
 925
 926        ret = ib_register_client(&uverbs_client);
 927        if (ret) {
 928                printk(KERN_ERR "user_verbs: couldn't register client\n");
 929                goto out_class;
 930        }
 931
 932        return 0;
 933
 934out_class:
 935        class_destroy(uverbs_class);
 936
 937out_chrdev:
 938        unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
 939
 940out:
 941        return ret;
 942}
 943
 944static void __exit ib_uverbs_cleanup(void)
 945{
 946        ib_unregister_client(&uverbs_client);
 947        class_destroy(uverbs_class);
 948        unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
 949        if (overflow_maj)
 950                unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES);
 951        idr_destroy(&ib_uverbs_pd_idr);
 952        idr_destroy(&ib_uverbs_mr_idr);
 953        idr_destroy(&ib_uverbs_mw_idr);
 954        idr_destroy(&ib_uverbs_ah_idr);
 955        idr_destroy(&ib_uverbs_cq_idr);
 956        idr_destroy(&ib_uverbs_qp_idr);
 957        idr_destroy(&ib_uverbs_srq_idr);
 958}
 959
 960module_init(ib_uverbs_init);
 961module_exit(ib_uverbs_cleanup);
 962