linux/drivers/infiniband/core/rdma_core.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2016, Mellanox Technologies inc.  All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include <linux/file.h>
  34#include <linux/anon_inodes.h>
  35#include <linux/sched/mm.h>
  36#include <rdma/ib_verbs.h>
  37#include <rdma/uverbs_types.h>
  38#include <linux/rcupdate.h>
  39#include <rdma/uverbs_ioctl.h>
  40#include <rdma/rdma_user_ioctl.h>
  41#include "uverbs.h"
  42#include "core_priv.h"
  43#include "rdma_core.h"
  44
  45void uverbs_uobject_get(struct ib_uobject *uobject)
  46{
  47        kref_get(&uobject->ref);
  48}
  49
  50static void uverbs_uobject_free(struct kref *ref)
  51{
  52        struct ib_uobject *uobj =
  53                container_of(ref, struct ib_uobject, ref);
  54
  55        if (uobj->uapi_object->type_class->needs_kfree_rcu)
  56                kfree_rcu(uobj, rcu);
  57        else
  58                kfree(uobj);
  59}
  60
  61void uverbs_uobject_put(struct ib_uobject *uobject)
  62{
  63        kref_put(&uobject->ref, uverbs_uobject_free);
  64}
  65
  66static int uverbs_try_lock_object(struct ib_uobject *uobj,
  67                                  enum rdma_lookup_mode mode)
  68{
  69        /*
  70         * When a shared access is required, we use a positive counter. Each
  71         * shared access request checks that the value != -1 and increment it.
  72         * Exclusive access is required for operations like write or destroy.
  73         * In exclusive access mode, we check that the counter is zero (nobody
  74         * claimed this object) and we set it to -1. Releasing a shared access
  75         * lock is done simply by decreasing the counter. As for exclusive
  76         * access locks, since only a single one of them is is allowed
  77         * concurrently, setting the counter to zero is enough for releasing
  78         * this lock.
  79         */
  80        switch (mode) {
  81        case UVERBS_LOOKUP_READ:
  82                return atomic_fetch_add_unless(&uobj->usecnt, 1, -1) == -1 ?
  83                        -EBUSY : 0;
  84        case UVERBS_LOOKUP_WRITE:
  85                /* lock is exclusive */
  86                return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
  87        case UVERBS_LOOKUP_DESTROY:
  88                return 0;
  89        }
  90        return 0;
  91}
  92
  93static void assert_uverbs_usecnt(struct ib_uobject *uobj,
  94                                 enum rdma_lookup_mode mode)
  95{
  96#ifdef CONFIG_LOCKDEP
  97        switch (mode) {
  98        case UVERBS_LOOKUP_READ:
  99                WARN_ON(atomic_read(&uobj->usecnt) <= 0);
 100                break;
 101        case UVERBS_LOOKUP_WRITE:
 102                WARN_ON(atomic_read(&uobj->usecnt) != -1);
 103                break;
 104        case UVERBS_LOOKUP_DESTROY:
 105                break;
 106        }
 107#endif
 108}
 109
 110/*
 111 * This must be called with the hw_destroy_rwsem locked for read or write,
 112 * also the uobject itself must be locked for write.
 113 *
 114 * Upon return the HW object is guaranteed to be destroyed.
 115 *
 116 * For RDMA_REMOVE_ABORT, the hw_destroy_rwsem is not required to be held,
 117 * however the type's allocat_commit function cannot have been called and the
 118 * uobject cannot be on the uobjects_lists
 119 *
 120 * For RDMA_REMOVE_DESTROY the caller shold be holding a kref (eg via
 121 * rdma_lookup_get_uobject) and the object is left in a state where the caller
 122 * needs to call rdma_lookup_put_uobject.
 123 *
 124 * For all other destroy modes this function internally unlocks the uobject
 125 * and consumes the kref on the uobj.
 126 */
 127static int uverbs_destroy_uobject(struct ib_uobject *uobj,
 128                                  enum rdma_remove_reason reason)
 129{
 130        struct ib_uverbs_file *ufile = uobj->ufile;
 131        unsigned long flags;
 132        int ret;
 133
 134        lockdep_assert_held(&ufile->hw_destroy_rwsem);
 135        assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE);
 136
 137        if (uobj->object) {
 138                ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason);
 139                if (ret) {
 140                        if (ib_is_destroy_retryable(ret, reason, uobj))
 141                                return ret;
 142
 143                        /* Nothing to be done, dangle the memory and move on */
 144                        WARN(true,
 145                             "ib_uverbs: failed to remove uobject id %d, driver err=%d",
 146                             uobj->id, ret);
 147                }
 148
 149                uobj->object = NULL;
 150        }
 151
 152        if (reason == RDMA_REMOVE_ABORT) {
 153                WARN_ON(!list_empty(&uobj->list));
 154                WARN_ON(!uobj->context);
 155                uobj->uapi_object->type_class->alloc_abort(uobj);
 156        }
 157
 158        uobj->context = NULL;
 159
 160        /*
 161         * For DESTROY the usecnt is held write locked, the caller is expected
 162         * to put it unlock and put the object when done with it. Only DESTROY
 163         * can remove the IDR handle.
 164         */
 165        if (reason != RDMA_REMOVE_DESTROY)
 166                atomic_set(&uobj->usecnt, 0);
 167        else
 168                uobj->uapi_object->type_class->remove_handle(uobj);
 169
 170        if (!list_empty(&uobj->list)) {
 171                spin_lock_irqsave(&ufile->uobjects_lock, flags);
 172                list_del_init(&uobj->list);
 173                spin_unlock_irqrestore(&ufile->uobjects_lock, flags);
 174
 175                /*
 176                 * Pairs with the get in rdma_alloc_commit_uobject(), could
 177                 * destroy uobj.
 178                 */
 179                uverbs_uobject_put(uobj);
 180        }
 181
 182        /*
 183         * When aborting the stack kref remains owned by the core code, and is
 184         * not transferred into the type. Pairs with the get in alloc_uobj
 185         */
 186        if (reason == RDMA_REMOVE_ABORT)
 187                uverbs_uobject_put(uobj);
 188
 189        return 0;
 190}
 191
 192/*
 193 * This calls uverbs_destroy_uobject() using the RDMA_REMOVE_DESTROY
 194 * sequence. It should only be used from command callbacks. On success the
 195 * caller must pair this with rdma_lookup_put_uobject(LOOKUP_WRITE). This
 196 * version requires the caller to have already obtained an
 197 * LOOKUP_DESTROY uobject kref.
 198 */
 199int uobj_destroy(struct ib_uobject *uobj)
 200{
 201        struct ib_uverbs_file *ufile = uobj->ufile;
 202        int ret;
 203
 204        down_read(&ufile->hw_destroy_rwsem);
 205
 206        ret = uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE);
 207        if (ret)
 208                goto out_unlock;
 209
 210        ret = uverbs_destroy_uobject(uobj, RDMA_REMOVE_DESTROY);
 211        if (ret) {
 212                atomic_set(&uobj->usecnt, 0);
 213                goto out_unlock;
 214        }
 215
 216out_unlock:
 217        up_read(&ufile->hw_destroy_rwsem);
 218        return ret;
 219}
 220
 221/*
 222 * uobj_get_destroy destroys the HW object and returns a handle to the uobj
 223 * with a NULL object pointer. The caller must pair this with
 224 * uverbs_put_destroy.
 225 */
 226struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
 227                                      u32 id, struct ib_uverbs_file *ufile)
 228{
 229        struct ib_uobject *uobj;
 230        int ret;
 231
 232        uobj = rdma_lookup_get_uobject(obj, ufile, id, UVERBS_LOOKUP_DESTROY);
 233        if (IS_ERR(uobj))
 234                return uobj;
 235
 236        ret = uobj_destroy(uobj);
 237        if (ret) {
 238                rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
 239                return ERR_PTR(ret);
 240        }
 241
 242        return uobj;
 243}
 244
 245/*
 246 * Does both uobj_get_destroy() and uobj_put_destroy().  Returns success_res
 247 * on success (negative errno on failure). For use by callers that do not need
 248 * the uobj.
 249 */
 250int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
 251                           struct ib_uverbs_file *ufile, int success_res)
 252{
 253        struct ib_uobject *uobj;
 254
 255        uobj = __uobj_get_destroy(obj, id, ufile);
 256        if (IS_ERR(uobj))
 257                return PTR_ERR(uobj);
 258
 259        rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
 260        return success_res;
 261}
 262
 263/* alloc_uobj must be undone by uverbs_destroy_uobject() */
 264static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
 265                                     const struct uverbs_api_object *obj)
 266{
 267        struct ib_uobject *uobj;
 268        struct ib_ucontext *ucontext;
 269
 270        ucontext = ib_uverbs_get_ucontext(ufile);
 271        if (IS_ERR(ucontext))
 272                return ERR_CAST(ucontext);
 273
 274        uobj = kzalloc(obj->type_attrs->obj_size, GFP_KERNEL);
 275        if (!uobj)
 276                return ERR_PTR(-ENOMEM);
 277        /*
 278         * user_handle should be filled by the handler,
 279         * The object is added to the list in the commit stage.
 280         */
 281        uobj->ufile = ufile;
 282        uobj->context = ucontext;
 283        INIT_LIST_HEAD(&uobj->list);
 284        uobj->uapi_object = obj;
 285        /*
 286         * Allocated objects start out as write locked to deny any other
 287         * syscalls from accessing them until they are committed. See
 288         * rdma_alloc_commit_uobject
 289         */
 290        atomic_set(&uobj->usecnt, -1);
 291        kref_init(&uobj->ref);
 292
 293        return uobj;
 294}
 295
 296static int idr_add_uobj(struct ib_uobject *uobj)
 297{
 298        int ret;
 299
 300        idr_preload(GFP_KERNEL);
 301        spin_lock(&uobj->ufile->idr_lock);
 302
 303        /*
 304         * We start with allocating an idr pointing to NULL. This represents an
 305         * object which isn't initialized yet. We'll replace it later on with
 306         * the real object once we commit.
 307         */
 308        ret = idr_alloc(&uobj->ufile->idr, NULL, 0,
 309                        min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT);
 310        if (ret >= 0)
 311                uobj->id = ret;
 312
 313        spin_unlock(&uobj->ufile->idr_lock);
 314        idr_preload_end();
 315
 316        return ret < 0 ? ret : 0;
 317}
 318
 319/* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
 320static struct ib_uobject *
 321lookup_get_idr_uobject(const struct uverbs_api_object *obj,
 322                       struct ib_uverbs_file *ufile, s64 id,
 323                       enum rdma_lookup_mode mode)
 324{
 325        struct ib_uobject *uobj;
 326        unsigned long idrno = id;
 327
 328        if (id < 0 || id > ULONG_MAX)
 329                return ERR_PTR(-EINVAL);
 330
 331        rcu_read_lock();
 332        /* object won't be released as we're protected in rcu */
 333        uobj = idr_find(&ufile->idr, idrno);
 334        if (!uobj) {
 335                uobj = ERR_PTR(-ENOENT);
 336                goto free;
 337        }
 338
 339        /*
 340         * The idr_find is guaranteed to return a pointer to something that
 341         * isn't freed yet, or NULL, as the free after idr_remove goes through
 342         * kfree_rcu(). However the object may still have been released and
 343         * kfree() could be called at any time.
 344         */
 345        if (!kref_get_unless_zero(&uobj->ref))
 346                uobj = ERR_PTR(-ENOENT);
 347
 348free:
 349        rcu_read_unlock();
 350        return uobj;
 351}
 352
 353static struct ib_uobject *
 354lookup_get_fd_uobject(const struct uverbs_api_object *obj,
 355                      struct ib_uverbs_file *ufile, s64 id,
 356                      enum rdma_lookup_mode mode)
 357{
 358        const struct uverbs_obj_fd_type *fd_type;
 359        struct file *f;
 360        struct ib_uobject *uobject;
 361        int fdno = id;
 362
 363        if (fdno != id)
 364                return ERR_PTR(-EINVAL);
 365
 366        if (mode != UVERBS_LOOKUP_READ)
 367                return ERR_PTR(-EOPNOTSUPP);
 368
 369        if (!obj->type_attrs)
 370                return ERR_PTR(-EIO);
 371        fd_type =
 372                container_of(obj->type_attrs, struct uverbs_obj_fd_type, type);
 373
 374        f = fget(fdno);
 375        if (!f)
 376                return ERR_PTR(-EBADF);
 377
 378        uobject = f->private_data;
 379        /*
 380         * fget(id) ensures we are not currently running uverbs_close_fd,
 381         * and the caller is expected to ensure that uverbs_close_fd is never
 382         * done while a call top lookup is possible.
 383         */
 384        if (f->f_op != fd_type->fops) {
 385                fput(f);
 386                return ERR_PTR(-EBADF);
 387        }
 388
 389        uverbs_uobject_get(uobject);
 390        return uobject;
 391}
 392
 393struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
 394                                           struct ib_uverbs_file *ufile, s64 id,
 395                                           enum rdma_lookup_mode mode)
 396{
 397        struct ib_uobject *uobj;
 398        int ret;
 399
 400        if (!obj)
 401                return ERR_PTR(-EINVAL);
 402
 403        uobj = obj->type_class->lookup_get(obj, ufile, id, mode);
 404        if (IS_ERR(uobj))
 405                return uobj;
 406
 407        if (uobj->uapi_object != obj) {
 408                ret = -EINVAL;
 409                goto free;
 410        }
 411
 412        /*
 413         * If we have been disassociated block every command except for
 414         * DESTROY based commands.
 415         */
 416        if (mode != UVERBS_LOOKUP_DESTROY &&
 417            !srcu_dereference(ufile->device->ib_dev,
 418                              &ufile->device->disassociate_srcu)) {
 419                ret = -EIO;
 420                goto free;
 421        }
 422
 423        ret = uverbs_try_lock_object(uobj, mode);
 424        if (ret)
 425                goto free;
 426
 427        return uobj;
 428free:
 429        obj->type_class->lookup_put(uobj, mode);
 430        uverbs_uobject_put(uobj);
 431        return ERR_PTR(ret);
 432}
 433
 434static struct ib_uobject *
 435alloc_begin_idr_uobject(const struct uverbs_api_object *obj,
 436                        struct ib_uverbs_file *ufile)
 437{
 438        int ret;
 439        struct ib_uobject *uobj;
 440
 441        uobj = alloc_uobj(ufile, obj);
 442        if (IS_ERR(uobj))
 443                return uobj;
 444
 445        ret = idr_add_uobj(uobj);
 446        if (ret)
 447                goto uobj_put;
 448
 449        ret = ib_rdmacg_try_charge(&uobj->cg_obj, uobj->context->device,
 450                                   RDMACG_RESOURCE_HCA_OBJECT);
 451        if (ret)
 452                goto idr_remove;
 453
 454        return uobj;
 455
 456idr_remove:
 457        spin_lock(&ufile->idr_lock);
 458        idr_remove(&ufile->idr, uobj->id);
 459        spin_unlock(&ufile->idr_lock);
 460uobj_put:
 461        uverbs_uobject_put(uobj);
 462        return ERR_PTR(ret);
 463}
 464
 465static struct ib_uobject *
 466alloc_begin_fd_uobject(const struct uverbs_api_object *obj,
 467                       struct ib_uverbs_file *ufile)
 468{
 469        int new_fd;
 470        struct ib_uobject *uobj;
 471
 472        new_fd = get_unused_fd_flags(O_CLOEXEC);
 473        if (new_fd < 0)
 474                return ERR_PTR(new_fd);
 475
 476        uobj = alloc_uobj(ufile, obj);
 477        if (IS_ERR(uobj)) {
 478                put_unused_fd(new_fd);
 479                return uobj;
 480        }
 481
 482        uobj->id = new_fd;
 483        uobj->ufile = ufile;
 484
 485        return uobj;
 486}
 487
 488struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
 489                                            struct ib_uverbs_file *ufile)
 490{
 491        struct ib_uobject *ret;
 492
 493        if (!obj)
 494                return ERR_PTR(-EINVAL);
 495
 496        /*
 497         * The hw_destroy_rwsem is held across the entire object creation and
 498         * released during rdma_alloc_commit_uobject or
 499         * rdma_alloc_abort_uobject
 500         */
 501        if (!down_read_trylock(&ufile->hw_destroy_rwsem))
 502                return ERR_PTR(-EIO);
 503
 504        ret = obj->type_class->alloc_begin(obj, ufile);
 505        if (IS_ERR(ret)) {
 506                up_read(&ufile->hw_destroy_rwsem);
 507                return ret;
 508        }
 509        return ret;
 510}
 511
 512static void alloc_abort_idr_uobject(struct ib_uobject *uobj)
 513{
 514        ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
 515                           RDMACG_RESOURCE_HCA_OBJECT);
 516
 517        spin_lock(&uobj->ufile->idr_lock);
 518        idr_remove(&uobj->ufile->idr, uobj->id);
 519        spin_unlock(&uobj->ufile->idr_lock);
 520}
 521
 522static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj,
 523                                               enum rdma_remove_reason why)
 524{
 525        const struct uverbs_obj_idr_type *idr_type =
 526                container_of(uobj->uapi_object->type_attrs,
 527                             struct uverbs_obj_idr_type, type);
 528        int ret = idr_type->destroy_object(uobj, why);
 529
 530        /*
 531         * We can only fail gracefully if the user requested to destroy the
 532         * object or when a retry may be called upon an error.
 533         * In the rest of the cases, just remove whatever you can.
 534         */
 535        if (ib_is_destroy_retryable(ret, why, uobj))
 536                return ret;
 537
 538        if (why == RDMA_REMOVE_ABORT)
 539                return 0;
 540
 541        ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
 542                           RDMACG_RESOURCE_HCA_OBJECT);
 543
 544        return 0;
 545}
 546
 547static void remove_handle_idr_uobject(struct ib_uobject *uobj)
 548{
 549        spin_lock(&uobj->ufile->idr_lock);
 550        idr_remove(&uobj->ufile->idr, uobj->id);
 551        spin_unlock(&uobj->ufile->idr_lock);
 552        /* Matches the kref in alloc_commit_idr_uobject */
 553        uverbs_uobject_put(uobj);
 554}
 555
 556static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
 557{
 558        put_unused_fd(uobj->id);
 559}
 560
 561static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj,
 562                                              enum rdma_remove_reason why)
 563{
 564        const struct uverbs_obj_fd_type *fd_type = container_of(
 565                uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
 566        int ret = fd_type->context_closed(uobj, why);
 567
 568        if (ib_is_destroy_retryable(ret, why, uobj))
 569                return ret;
 570
 571        return 0;
 572}
 573
 574static void remove_handle_fd_uobject(struct ib_uobject *uobj)
 575{
 576}
 577
 578static int alloc_commit_idr_uobject(struct ib_uobject *uobj)
 579{
 580        struct ib_uverbs_file *ufile = uobj->ufile;
 581
 582        spin_lock(&ufile->idr_lock);
 583        /*
 584         * We already allocated this IDR with a NULL object, so
 585         * this shouldn't fail.
 586         *
 587         * NOTE: Once we set the IDR we loose ownership of our kref on uobj.
 588         * It will be put by remove_commit_idr_uobject()
 589         */
 590        WARN_ON(idr_replace(&ufile->idr, uobj, uobj->id));
 591        spin_unlock(&ufile->idr_lock);
 592
 593        return 0;
 594}
 595
 596static int alloc_commit_fd_uobject(struct ib_uobject *uobj)
 597{
 598        const struct uverbs_obj_fd_type *fd_type = container_of(
 599                uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
 600        int fd = uobj->id;
 601        struct file *filp;
 602
 603        /*
 604         * The kref for uobj is moved into filp->private data and put in
 605         * uverbs_close_fd(). Once alloc_commit() succeeds uverbs_close_fd()
 606         * must be guaranteed to be called from the provided fops release
 607         * callback.
 608         */
 609        filp = anon_inode_getfile(fd_type->name,
 610                                  fd_type->fops,
 611                                  uobj,
 612                                  fd_type->flags);
 613        if (IS_ERR(filp))
 614                return PTR_ERR(filp);
 615
 616        uobj->object = filp;
 617
 618        /* Matching put will be done in uverbs_close_fd() */
 619        kref_get(&uobj->ufile->ref);
 620
 621        /* This shouldn't be used anymore. Use the file object instead */
 622        uobj->id = 0;
 623
 624        /*
 625         * NOTE: Once we install the file we loose ownership of our kref on
 626         * uobj. It will be put by uverbs_close_fd()
 627         */
 628        fd_install(fd, filp);
 629
 630        return 0;
 631}
 632
 633/*
 634 * In all cases rdma_alloc_commit_uobject() consumes the kref to uobj and the
 635 * caller can no longer assume uobj is valid. If this function fails it
 636 * destroys the uboject, including the attached HW object.
 637 */
 638int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj)
 639{
 640        struct ib_uverbs_file *ufile = uobj->ufile;
 641        int ret;
 642
 643        /* alloc_commit consumes the uobj kref */
 644        ret = uobj->uapi_object->type_class->alloc_commit(uobj);
 645        if (ret) {
 646                uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT);
 647                up_read(&ufile->hw_destroy_rwsem);
 648                return ret;
 649        }
 650
 651        /* kref is held so long as the uobj is on the uobj list. */
 652        uverbs_uobject_get(uobj);
 653        spin_lock_irq(&ufile->uobjects_lock);
 654        list_add(&uobj->list, &ufile->uobjects);
 655        spin_unlock_irq(&ufile->uobjects_lock);
 656
 657        /* matches atomic_set(-1) in alloc_uobj */
 658        atomic_set(&uobj->usecnt, 0);
 659
 660        /* Matches the down_read in rdma_alloc_begin_uobject */
 661        up_read(&ufile->hw_destroy_rwsem);
 662
 663        return 0;
 664}
 665
 666/*
 667 * This consumes the kref for uobj. It is up to the caller to unwind the HW
 668 * object and anything else connected to uobj before calling this.
 669 */
 670void rdma_alloc_abort_uobject(struct ib_uobject *uobj)
 671{
 672        struct ib_uverbs_file *ufile = uobj->ufile;
 673
 674        uobj->object = NULL;
 675        uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT);
 676
 677        /* Matches the down_read in rdma_alloc_begin_uobject */
 678        up_read(&ufile->hw_destroy_rwsem);
 679}
 680
 681static void lookup_put_idr_uobject(struct ib_uobject *uobj,
 682                                   enum rdma_lookup_mode mode)
 683{
 684}
 685
 686static void lookup_put_fd_uobject(struct ib_uobject *uobj,
 687                                  enum rdma_lookup_mode mode)
 688{
 689        struct file *filp = uobj->object;
 690
 691        WARN_ON(mode != UVERBS_LOOKUP_READ);
 692        /* This indirectly calls uverbs_close_fd and free the object */
 693        fput(filp);
 694}
 695
 696void rdma_lookup_put_uobject(struct ib_uobject *uobj,
 697                             enum rdma_lookup_mode mode)
 698{
 699        assert_uverbs_usecnt(uobj, mode);
 700        uobj->uapi_object->type_class->lookup_put(uobj, mode);
 701        /*
 702         * In order to unlock an object, either decrease its usecnt for
 703         * read access or zero it in case of exclusive access. See
 704         * uverbs_try_lock_object for locking schema information.
 705         */
 706        switch (mode) {
 707        case UVERBS_LOOKUP_READ:
 708                atomic_dec(&uobj->usecnt);
 709                break;
 710        case UVERBS_LOOKUP_WRITE:
 711                atomic_set(&uobj->usecnt, 0);
 712                break;
 713        case UVERBS_LOOKUP_DESTROY:
 714                break;
 715        }
 716
 717        /* Pairs with the kref obtained by type->lookup_get */
 718        uverbs_uobject_put(uobj);
 719}
 720
 721void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile)
 722{
 723        spin_lock_init(&ufile->idr_lock);
 724        idr_init(&ufile->idr);
 725}
 726
 727void release_ufile_idr_uobject(struct ib_uverbs_file *ufile)
 728{
 729        struct ib_uobject *entry;
 730        int id;
 731
 732        /*
 733         * At this point uverbs_cleanup_ufile() is guaranteed to have run, and
 734         * there are no HW objects left, however the IDR is still populated
 735         * with anything that has not been cleaned up by userspace. Since the
 736         * kref on ufile is 0, nothing is allowed to call lookup_get.
 737         *
 738         * This is an optimized equivalent to remove_handle_idr_uobject
 739         */
 740        idr_for_each_entry(&ufile->idr, entry, id) {
 741                WARN_ON(entry->object);
 742                uverbs_uobject_put(entry);
 743        }
 744
 745        idr_destroy(&ufile->idr);
 746}
 747
 748const struct uverbs_obj_type_class uverbs_idr_class = {
 749        .alloc_begin = alloc_begin_idr_uobject,
 750        .lookup_get = lookup_get_idr_uobject,
 751        .alloc_commit = alloc_commit_idr_uobject,
 752        .alloc_abort = alloc_abort_idr_uobject,
 753        .lookup_put = lookup_put_idr_uobject,
 754        .destroy_hw = destroy_hw_idr_uobject,
 755        .remove_handle = remove_handle_idr_uobject,
 756        /*
 757         * When we destroy an object, we first just lock it for WRITE and
 758         * actually DESTROY it in the finalize stage. So, the problematic
 759         * scenario is when we just started the finalize stage of the
 760         * destruction (nothing was executed yet). Now, the other thread
 761         * fetched the object for READ access, but it didn't lock it yet.
 762         * The DESTROY thread continues and starts destroying the object.
 763         * When the other thread continue - without the RCU, it would
 764         * access freed memory. However, the rcu_read_lock delays the free
 765         * until the rcu_read_lock of the READ operation quits. Since the
 766         * exclusive lock of the object is still taken by the DESTROY flow, the
 767         * READ operation will get -EBUSY and it'll just bail out.
 768         */
 769        .needs_kfree_rcu = true,
 770};
 771EXPORT_SYMBOL(uverbs_idr_class);
 772
 773void uverbs_close_fd(struct file *f)
 774{
 775        struct ib_uobject *uobj = f->private_data;
 776        struct ib_uverbs_file *ufile = uobj->ufile;
 777
 778        if (down_read_trylock(&ufile->hw_destroy_rwsem)) {
 779                /*
 780                 * lookup_get_fd_uobject holds the kref on the struct file any
 781                 * time a FD uobj is locked, which prevents this release
 782                 * method from being invoked. Meaning we can always get the
 783                 * write lock here, or we have a kernel bug.
 784                 */
 785                WARN_ON(uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE));
 786                uverbs_destroy_uobject(uobj, RDMA_REMOVE_CLOSE);
 787                up_read(&ufile->hw_destroy_rwsem);
 788        }
 789
 790        /* Matches the get in alloc_begin_fd_uobject */
 791        kref_put(&ufile->ref, ib_uverbs_release_file);
 792
 793        /* Pairs with filp->private_data in alloc_begin_fd_uobject */
 794        uverbs_uobject_put(uobj);
 795}
 796
 797static void ufile_disassociate_ucontext(struct ib_ucontext *ibcontext)
 798{
 799        struct ib_device *ib_dev = ibcontext->device;
 800        struct task_struct *owning_process  = NULL;
 801        struct mm_struct   *owning_mm       = NULL;
 802
 803        owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
 804        if (!owning_process)
 805                return;
 806
 807        owning_mm = get_task_mm(owning_process);
 808        if (!owning_mm) {
 809                pr_info("no mm, disassociate ucontext is pending task termination\n");
 810                while (1) {
 811                        put_task_struct(owning_process);
 812                        usleep_range(1000, 2000);
 813                        owning_process = get_pid_task(ibcontext->tgid,
 814                                                      PIDTYPE_PID);
 815                        if (!owning_process ||
 816                            owning_process->state == TASK_DEAD) {
 817                                pr_info("disassociate ucontext done, task was terminated\n");
 818                                /* in case task was dead need to release the
 819                                 * task struct.
 820                                 */
 821                                if (owning_process)
 822                                        put_task_struct(owning_process);
 823                                return;
 824                        }
 825                }
 826        }
 827
 828        down_write(&owning_mm->mmap_sem);
 829        ib_dev->disassociate_ucontext(ibcontext);
 830        up_write(&owning_mm->mmap_sem);
 831        mmput(owning_mm);
 832        put_task_struct(owning_process);
 833}
 834
 835/*
 836 * Drop the ucontext off the ufile and completely disconnect it from the
 837 * ib_device
 838 */
 839static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
 840                                   enum rdma_remove_reason reason)
 841{
 842        struct ib_ucontext *ucontext = ufile->ucontext;
 843        int ret;
 844
 845        if (reason == RDMA_REMOVE_DRIVER_REMOVE)
 846                ufile_disassociate_ucontext(ucontext);
 847
 848        put_pid(ucontext->tgid);
 849        ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device,
 850                           RDMACG_RESOURCE_HCA_HANDLE);
 851
 852        /*
 853         * FIXME: Drivers are not permitted to fail dealloc_ucontext, remove
 854         * the error return.
 855         */
 856        ret = ucontext->device->dealloc_ucontext(ucontext);
 857        WARN_ON(ret);
 858
 859        ufile->ucontext = NULL;
 860}
 861
 862static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
 863                                  enum rdma_remove_reason reason)
 864{
 865        struct ib_uobject *obj, *next_obj;
 866        int ret = -EINVAL;
 867
 868        /*
 869         * This shouldn't run while executing other commands on this
 870         * context. Thus, the only thing we should take care of is
 871         * releasing a FD while traversing this list. The FD could be
 872         * closed and released from the _release fop of this FD.
 873         * In order to mitigate this, we add a lock.
 874         * We take and release the lock per traversal in order to let
 875         * other threads (which might still use the FDs) chance to run.
 876         */
 877        list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) {
 878                /*
 879                 * if we hit this WARN_ON, that means we are
 880                 * racing with a lookup_get.
 881                 */
 882                WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE));
 883                if (!uverbs_destroy_uobject(obj, reason))
 884                        ret = 0;
 885                else
 886                        atomic_set(&obj->usecnt, 0);
 887        }
 888        return ret;
 889}
 890
 891/*
 892 * Destroy the uncontext and every uobject associated with it. If called with
 893 * reason != RDMA_REMOVE_CLOSE this will not return until the destruction has
 894 * been completed and ufile->ucontext is NULL.
 895 *
 896 * This is internally locked and can be called in parallel from multiple
 897 * contexts.
 898 */
 899void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
 900                             enum rdma_remove_reason reason)
 901{
 902        if (reason == RDMA_REMOVE_CLOSE) {
 903                /*
 904                 * During destruction we might trigger something that
 905                 * synchronously calls release on any file descriptor. For
 906                 * this reason all paths that come from file_operations
 907                 * release must use try_lock. They can progress knowing that
 908                 * there is an ongoing uverbs_destroy_ufile_hw that will clean
 909                 * up the driver resources.
 910                 */
 911                if (!mutex_trylock(&ufile->ucontext_lock))
 912                        return;
 913
 914        } else {
 915                mutex_lock(&ufile->ucontext_lock);
 916        }
 917
 918        down_write(&ufile->hw_destroy_rwsem);
 919
 920        /*
 921         * If a ucontext was never created then we can't have any uobjects to
 922         * cleanup, nothing to do.
 923         */
 924        if (!ufile->ucontext)
 925                goto done;
 926
 927        ufile->ucontext->closing = true;
 928        ufile->ucontext->cleanup_retryable = true;
 929        while (!list_empty(&ufile->uobjects))
 930                if (__uverbs_cleanup_ufile(ufile, reason)) {
 931                        /*
 932                         * No entry was cleaned-up successfully during this
 933                         * iteration
 934                         */
 935                        break;
 936                }
 937
 938        ufile->ucontext->cleanup_retryable = false;
 939        if (!list_empty(&ufile->uobjects))
 940                __uverbs_cleanup_ufile(ufile, reason);
 941
 942        ufile_destroy_ucontext(ufile, reason);
 943
 944done:
 945        up_write(&ufile->hw_destroy_rwsem);
 946        mutex_unlock(&ufile->ucontext_lock);
 947}
 948
 949const struct uverbs_obj_type_class uverbs_fd_class = {
 950        .alloc_begin = alloc_begin_fd_uobject,
 951        .lookup_get = lookup_get_fd_uobject,
 952        .alloc_commit = alloc_commit_fd_uobject,
 953        .alloc_abort = alloc_abort_fd_uobject,
 954        .lookup_put = lookup_put_fd_uobject,
 955        .destroy_hw = destroy_hw_fd_uobject,
 956        .remove_handle = remove_handle_fd_uobject,
 957        .needs_kfree_rcu = false,
 958};
 959EXPORT_SYMBOL(uverbs_fd_class);
 960
 961struct ib_uobject *
 962uverbs_get_uobject_from_file(u16 object_id,
 963                             struct ib_uverbs_file *ufile,
 964                             enum uverbs_obj_access access, s64 id)
 965{
 966        const struct uverbs_api_object *obj =
 967                uapi_get_object(ufile->device->uapi, object_id);
 968
 969        switch (access) {
 970        case UVERBS_ACCESS_READ:
 971                return rdma_lookup_get_uobject(obj, ufile, id,
 972                                               UVERBS_LOOKUP_READ);
 973        case UVERBS_ACCESS_DESTROY:
 974                /* Actual destruction is done inside uverbs_handle_method */
 975                return rdma_lookup_get_uobject(obj, ufile, id,
 976                                               UVERBS_LOOKUP_DESTROY);
 977        case UVERBS_ACCESS_WRITE:
 978                return rdma_lookup_get_uobject(obj, ufile, id,
 979                                               UVERBS_LOOKUP_WRITE);
 980        case UVERBS_ACCESS_NEW:
 981                return rdma_alloc_begin_uobject(obj, ufile);
 982        default:
 983                WARN_ON(true);
 984                return ERR_PTR(-EOPNOTSUPP);
 985        }
 986}
 987
 988int uverbs_finalize_object(struct ib_uobject *uobj,
 989                           enum uverbs_obj_access access,
 990                           bool commit)
 991{
 992        int ret = 0;
 993
 994        /*
 995         * refcounts should be handled at the object level and not at the
 996         * uobject level. Refcounts of the objects themselves are done in
 997         * handlers.
 998         */
 999
1000        switch (access) {
1001        case UVERBS_ACCESS_READ:
1002                rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_READ);
1003                break;
1004        case UVERBS_ACCESS_WRITE:
1005                rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
1006                break;
1007        case UVERBS_ACCESS_DESTROY:
1008                if (uobj)
1009                        rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
1010                break;
1011        case UVERBS_ACCESS_NEW:
1012                if (commit)
1013                        ret = rdma_alloc_commit_uobject(uobj);
1014                else
1015                        rdma_alloc_abort_uobject(uobj);
1016                break;
1017        default:
1018                WARN_ON(true);
1019                ret = -EOPNOTSUPP;
1020        }
1021
1022        return ret;
1023}
1024