linux/drivers/infiniband/core/rdma_core.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2016, Mellanox Technologies inc.  All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include <linux/file.h>
  34#include <linux/anon_inodes.h>
  35#include <linux/sched/mm.h>
  36#include <rdma/ib_verbs.h>
  37#include <rdma/uverbs_types.h>
  38#include <linux/rcupdate.h>
  39#include <rdma/uverbs_ioctl.h>
  40#include <rdma/rdma_user_ioctl.h>
  41#include "uverbs.h"
  42#include "core_priv.h"
  43#include "rdma_core.h"
  44
  45void uverbs_uobject_get(struct ib_uobject *uobject)
  46{
  47        kref_get(&uobject->ref);
  48}
  49
  50static void uverbs_uobject_free(struct kref *ref)
  51{
  52        struct ib_uobject *uobj =
  53                container_of(ref, struct ib_uobject, ref);
  54
  55        if (uobj->uapi_object->type_class->needs_kfree_rcu)
  56                kfree_rcu(uobj, rcu);
  57        else
  58                kfree(uobj);
  59}
  60
  61void uverbs_uobject_put(struct ib_uobject *uobject)
  62{
  63        kref_put(&uobject->ref, uverbs_uobject_free);
  64}
  65
  66static int uverbs_try_lock_object(struct ib_uobject *uobj,
  67                                  enum rdma_lookup_mode mode)
  68{
  69        /*
  70         * When a shared access is required, we use a positive counter. Each
  71         * shared access request checks that the value != -1 and increment it.
  72         * Exclusive access is required for operations like write or destroy.
  73         * In exclusive access mode, we check that the counter is zero (nobody
  74         * claimed this object) and we set it to -1. Releasing a shared access
  75         * lock is done simply by decreasing the counter. As for exclusive
  76         * access locks, since only a single one of them is is allowed
  77         * concurrently, setting the counter to zero is enough for releasing
  78         * this lock.
  79         */
  80        switch (mode) {
  81        case UVERBS_LOOKUP_READ:
  82                return atomic_fetch_add_unless(&uobj->usecnt, 1, -1) == -1 ?
  83                        -EBUSY : 0;
  84        case UVERBS_LOOKUP_WRITE:
  85                /* lock is exclusive */
  86                return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
  87        case UVERBS_LOOKUP_DESTROY:
  88                return 0;
  89        }
  90        return 0;
  91}
  92
  93static void assert_uverbs_usecnt(struct ib_uobject *uobj,
  94                                 enum rdma_lookup_mode mode)
  95{
  96#ifdef CONFIG_LOCKDEP
  97        switch (mode) {
  98        case UVERBS_LOOKUP_READ:
  99                WARN_ON(atomic_read(&uobj->usecnt) <= 0);
 100                break;
 101        case UVERBS_LOOKUP_WRITE:
 102                WARN_ON(atomic_read(&uobj->usecnt) != -1);
 103                break;
 104        case UVERBS_LOOKUP_DESTROY:
 105                break;
 106        }
 107#endif
 108}
 109
 110/*
 111 * This must be called with the hw_destroy_rwsem locked for read or write,
 112 * also the uobject itself must be locked for write.
 113 *
 114 * Upon return the HW object is guaranteed to be destroyed.
 115 *
 116 * For RDMA_REMOVE_ABORT, the hw_destroy_rwsem is not required to be held,
 117 * however the type's allocat_commit function cannot have been called and the
 118 * uobject cannot be on the uobjects_lists
 119 *
 120 * For RDMA_REMOVE_DESTROY the caller shold be holding a kref (eg via
 121 * rdma_lookup_get_uobject) and the object is left in a state where the caller
 122 * needs to call rdma_lookup_put_uobject.
 123 *
 124 * For all other destroy modes this function internally unlocks the uobject
 125 * and consumes the kref on the uobj.
 126 */
 127static int uverbs_destroy_uobject(struct ib_uobject *uobj,
 128                                  enum rdma_remove_reason reason,
 129                                  struct uverbs_attr_bundle *attrs)
 130{
 131        struct ib_uverbs_file *ufile = attrs->ufile;
 132        unsigned long flags;
 133        int ret;
 134
 135        lockdep_assert_held(&ufile->hw_destroy_rwsem);
 136        assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE);
 137
 138        if (uobj->object) {
 139                ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason,
 140                                                                attrs);
 141                if (ret) {
 142                        if (ib_is_destroy_retryable(ret, reason, uobj))
 143                                return ret;
 144
 145                        /* Nothing to be done, dangle the memory and move on */
 146                        WARN(true,
 147                             "ib_uverbs: failed to remove uobject id %d, driver err=%d",
 148                             uobj->id, ret);
 149                }
 150
 151                uobj->object = NULL;
 152        }
 153
 154        if (reason == RDMA_REMOVE_ABORT) {
 155                WARN_ON(!list_empty(&uobj->list));
 156                WARN_ON(!uobj->context);
 157                uobj->uapi_object->type_class->alloc_abort(uobj);
 158        }
 159
 160        uobj->context = NULL;
 161
 162        /*
 163         * For DESTROY the usecnt is held write locked, the caller is expected
 164         * to put it unlock and put the object when done with it. Only DESTROY
 165         * can remove the IDR handle.
 166         */
 167        if (reason != RDMA_REMOVE_DESTROY)
 168                atomic_set(&uobj->usecnt, 0);
 169        else
 170                uobj->uapi_object->type_class->remove_handle(uobj);
 171
 172        if (!list_empty(&uobj->list)) {
 173                spin_lock_irqsave(&ufile->uobjects_lock, flags);
 174                list_del_init(&uobj->list);
 175                spin_unlock_irqrestore(&ufile->uobjects_lock, flags);
 176
 177                /*
 178                 * Pairs with the get in rdma_alloc_commit_uobject(), could
 179                 * destroy uobj.
 180                 */
 181                uverbs_uobject_put(uobj);
 182        }
 183
 184        /*
 185         * When aborting the stack kref remains owned by the core code, and is
 186         * not transferred into the type. Pairs with the get in alloc_uobj
 187         */
 188        if (reason == RDMA_REMOVE_ABORT)
 189                uverbs_uobject_put(uobj);
 190
 191        return 0;
 192}
 193
 194/*
 195 * This calls uverbs_destroy_uobject() using the RDMA_REMOVE_DESTROY
 196 * sequence. It should only be used from command callbacks. On success the
 197 * caller must pair this with rdma_lookup_put_uobject(LOOKUP_WRITE). This
 198 * version requires the caller to have already obtained an
 199 * LOOKUP_DESTROY uobject kref.
 200 */
 201int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs)
 202{
 203        struct ib_uverbs_file *ufile = attrs->ufile;
 204        int ret;
 205
 206        down_read(&ufile->hw_destroy_rwsem);
 207
 208        ret = uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE);
 209        if (ret)
 210                goto out_unlock;
 211
 212        ret = uverbs_destroy_uobject(uobj, RDMA_REMOVE_DESTROY, attrs);
 213        if (ret) {
 214                atomic_set(&uobj->usecnt, 0);
 215                goto out_unlock;
 216        }
 217
 218out_unlock:
 219        up_read(&ufile->hw_destroy_rwsem);
 220        return ret;
 221}
 222
 223/*
 224 * uobj_get_destroy destroys the HW object and returns a handle to the uobj
 225 * with a NULL object pointer. The caller must pair this with
 226 * uverbs_put_destroy.
 227 */
 228struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
 229                                      u32 id, struct uverbs_attr_bundle *attrs)
 230{
 231        struct ib_uobject *uobj;
 232        int ret;
 233
 234        uobj = rdma_lookup_get_uobject(obj, attrs->ufile, id,
 235                                       UVERBS_LOOKUP_DESTROY, attrs);
 236        if (IS_ERR(uobj))
 237                return uobj;
 238
 239        ret = uobj_destroy(uobj, attrs);
 240        if (ret) {
 241                rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
 242                return ERR_PTR(ret);
 243        }
 244
 245        return uobj;
 246}
 247
 248/*
 249 * Does both uobj_get_destroy() and uobj_put_destroy().  Returns 0 on success
 250 * (negative errno on failure). For use by callers that do not need the uobj.
 251 */
 252int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
 253                           struct uverbs_attr_bundle *attrs)
 254{
 255        struct ib_uobject *uobj;
 256
 257        uobj = __uobj_get_destroy(obj, id, attrs);
 258        if (IS_ERR(uobj))
 259                return PTR_ERR(uobj);
 260
 261        rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
 262        return 0;
 263}
 264
 265/* alloc_uobj must be undone by uverbs_destroy_uobject() */
 266static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
 267                                     const struct uverbs_api_object *obj)
 268{
 269        struct ib_uobject *uobj;
 270        struct ib_ucontext *ucontext;
 271
 272        ucontext = ib_uverbs_get_ucontext_file(ufile);
 273        if (IS_ERR(ucontext))
 274                return ERR_CAST(ucontext);
 275
 276        uobj = kzalloc(obj->type_attrs->obj_size, GFP_KERNEL);
 277        if (!uobj)
 278                return ERR_PTR(-ENOMEM);
 279        /*
 280         * user_handle should be filled by the handler,
 281         * The object is added to the list in the commit stage.
 282         */
 283        uobj->ufile = ufile;
 284        uobj->context = ucontext;
 285        INIT_LIST_HEAD(&uobj->list);
 286        uobj->uapi_object = obj;
 287        /*
 288         * Allocated objects start out as write locked to deny any other
 289         * syscalls from accessing them until they are committed. See
 290         * rdma_alloc_commit_uobject
 291         */
 292        atomic_set(&uobj->usecnt, -1);
 293        kref_init(&uobj->ref);
 294
 295        return uobj;
 296}
 297
 298static int idr_add_uobj(struct ib_uobject *uobj)
 299{
 300       /*
 301        * We start with allocating an idr pointing to NULL. This represents an
 302        * object which isn't initialized yet. We'll replace it later on with
 303        * the real object once we commit.
 304        */
 305        return xa_alloc(&uobj->ufile->idr, &uobj->id, NULL, xa_limit_32b,
 306                        GFP_KERNEL);
 307}
 308
 309/* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
 310static struct ib_uobject *
 311lookup_get_idr_uobject(const struct uverbs_api_object *obj,
 312                       struct ib_uverbs_file *ufile, s64 id,
 313                       enum rdma_lookup_mode mode)
 314{
 315        struct ib_uobject *uobj;
 316
 317        if (id < 0 || id > ULONG_MAX)
 318                return ERR_PTR(-EINVAL);
 319
 320        rcu_read_lock();
 321        /*
 322         * The idr_find is guaranteed to return a pointer to something that
 323         * isn't freed yet, or NULL, as the free after idr_remove goes through
 324         * kfree_rcu(). However the object may still have been released and
 325         * kfree() could be called at any time.
 326         */
 327        uobj = xa_load(&ufile->idr, id);
 328        if (!uobj || !kref_get_unless_zero(&uobj->ref))
 329                uobj = ERR_PTR(-ENOENT);
 330        rcu_read_unlock();
 331        return uobj;
 332}
 333
 334static struct ib_uobject *
 335lookup_get_fd_uobject(const struct uverbs_api_object *obj,
 336                      struct ib_uverbs_file *ufile, s64 id,
 337                      enum rdma_lookup_mode mode)
 338{
 339        const struct uverbs_obj_fd_type *fd_type;
 340        struct file *f;
 341        struct ib_uobject *uobject;
 342        int fdno = id;
 343
 344        if (fdno != id)
 345                return ERR_PTR(-EINVAL);
 346
 347        if (mode != UVERBS_LOOKUP_READ)
 348                return ERR_PTR(-EOPNOTSUPP);
 349
 350        if (!obj->type_attrs)
 351                return ERR_PTR(-EIO);
 352        fd_type =
 353                container_of(obj->type_attrs, struct uverbs_obj_fd_type, type);
 354
 355        f = fget(fdno);
 356        if (!f)
 357                return ERR_PTR(-EBADF);
 358
 359        uobject = f->private_data;
 360        /*
 361         * fget(id) ensures we are not currently running uverbs_close_fd,
 362         * and the caller is expected to ensure that uverbs_close_fd is never
 363         * done while a call top lookup is possible.
 364         */
 365        if (f->f_op != fd_type->fops) {
 366                fput(f);
 367                return ERR_PTR(-EBADF);
 368        }
 369
 370        uverbs_uobject_get(uobject);
 371        return uobject;
 372}
 373
 374struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
 375                                           struct ib_uverbs_file *ufile, s64 id,
 376                                           enum rdma_lookup_mode mode,
 377                                           struct uverbs_attr_bundle *attrs)
 378{
 379        struct ib_uobject *uobj;
 380        int ret;
 381
 382        if (obj == ERR_PTR(-ENOMSG)) {
 383                /* must be UVERBS_IDR_ANY_OBJECT, see uapi_get_object() */
 384                uobj = lookup_get_idr_uobject(NULL, ufile, id, mode);
 385                if (IS_ERR(uobj))
 386                        return uobj;
 387        } else {
 388                if (IS_ERR(obj))
 389                        return ERR_PTR(-EINVAL);
 390
 391                uobj = obj->type_class->lookup_get(obj, ufile, id, mode);
 392                if (IS_ERR(uobj))
 393                        return uobj;
 394
 395                if (uobj->uapi_object != obj) {
 396                        ret = -EINVAL;
 397                        goto free;
 398                }
 399        }
 400
 401        /*
 402         * If we have been disassociated block every command except for
 403         * DESTROY based commands.
 404         */
 405        if (mode != UVERBS_LOOKUP_DESTROY &&
 406            !srcu_dereference(ufile->device->ib_dev,
 407                              &ufile->device->disassociate_srcu)) {
 408                ret = -EIO;
 409                goto free;
 410        }
 411
 412        ret = uverbs_try_lock_object(uobj, mode);
 413        if (ret)
 414                goto free;
 415        if (attrs)
 416                attrs->context = uobj->context;
 417
 418        return uobj;
 419free:
 420        uobj->uapi_object->type_class->lookup_put(uobj, mode);
 421        uverbs_uobject_put(uobj);
 422        return ERR_PTR(ret);
 423}
 424
 425static struct ib_uobject *
 426alloc_begin_idr_uobject(const struct uverbs_api_object *obj,
 427                        struct ib_uverbs_file *ufile)
 428{
 429        int ret;
 430        struct ib_uobject *uobj;
 431
 432        uobj = alloc_uobj(ufile, obj);
 433        if (IS_ERR(uobj))
 434                return uobj;
 435
 436        ret = idr_add_uobj(uobj);
 437        if (ret)
 438                goto uobj_put;
 439
 440        ret = ib_rdmacg_try_charge(&uobj->cg_obj, uobj->context->device,
 441                                   RDMACG_RESOURCE_HCA_OBJECT);
 442        if (ret)
 443                goto remove;
 444
 445        return uobj;
 446
 447remove:
 448        xa_erase(&ufile->idr, uobj->id);
 449uobj_put:
 450        uverbs_uobject_put(uobj);
 451        return ERR_PTR(ret);
 452}
 453
 454static struct ib_uobject *
 455alloc_begin_fd_uobject(const struct uverbs_api_object *obj,
 456                       struct ib_uverbs_file *ufile)
 457{
 458        int new_fd;
 459        struct ib_uobject *uobj;
 460
 461        new_fd = get_unused_fd_flags(O_CLOEXEC);
 462        if (new_fd < 0)
 463                return ERR_PTR(new_fd);
 464
 465        uobj = alloc_uobj(ufile, obj);
 466        if (IS_ERR(uobj)) {
 467                put_unused_fd(new_fd);
 468                return uobj;
 469        }
 470
 471        uobj->id = new_fd;
 472        uobj->ufile = ufile;
 473
 474        return uobj;
 475}
 476
 477struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
 478                                            struct ib_uverbs_file *ufile,
 479                                            struct uverbs_attr_bundle *attrs)
 480{
 481        struct ib_uobject *ret;
 482
 483        if (IS_ERR(obj))
 484                return ERR_PTR(-EINVAL);
 485
 486        /*
 487         * The hw_destroy_rwsem is held across the entire object creation and
 488         * released during rdma_alloc_commit_uobject or
 489         * rdma_alloc_abort_uobject
 490         */
 491        if (!down_read_trylock(&ufile->hw_destroy_rwsem))
 492                return ERR_PTR(-EIO);
 493
 494        ret = obj->type_class->alloc_begin(obj, ufile);
 495        if (IS_ERR(ret)) {
 496                up_read(&ufile->hw_destroy_rwsem);
 497                return ret;
 498        }
 499        if (attrs)
 500                attrs->context = ret->context;
 501        return ret;
 502}
 503
 504static void alloc_abort_idr_uobject(struct ib_uobject *uobj)
 505{
 506        ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
 507                           RDMACG_RESOURCE_HCA_OBJECT);
 508
 509        xa_erase(&uobj->ufile->idr, uobj->id);
 510}
 511
 512static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj,
 513                                               enum rdma_remove_reason why,
 514                                               struct uverbs_attr_bundle *attrs)
 515{
 516        const struct uverbs_obj_idr_type *idr_type =
 517                container_of(uobj->uapi_object->type_attrs,
 518                             struct uverbs_obj_idr_type, type);
 519        int ret = idr_type->destroy_object(uobj, why, attrs);
 520
 521        /*
 522         * We can only fail gracefully if the user requested to destroy the
 523         * object or when a retry may be called upon an error.
 524         * In the rest of the cases, just remove whatever you can.
 525         */
 526        if (ib_is_destroy_retryable(ret, why, uobj))
 527                return ret;
 528
 529        if (why == RDMA_REMOVE_ABORT)
 530                return 0;
 531
 532        ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
 533                           RDMACG_RESOURCE_HCA_OBJECT);
 534
 535        return 0;
 536}
 537
 538static void remove_handle_idr_uobject(struct ib_uobject *uobj)
 539{
 540        xa_erase(&uobj->ufile->idr, uobj->id);
 541        /* Matches the kref in alloc_commit_idr_uobject */
 542        uverbs_uobject_put(uobj);
 543}
 544
 545static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
 546{
 547        put_unused_fd(uobj->id);
 548}
 549
 550static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj,
 551                                              enum rdma_remove_reason why,
 552                                              struct uverbs_attr_bundle *attrs)
 553{
 554        const struct uverbs_obj_fd_type *fd_type = container_of(
 555                uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
 556        int ret = fd_type->context_closed(uobj, why);
 557
 558        if (ib_is_destroy_retryable(ret, why, uobj))
 559                return ret;
 560
 561        return 0;
 562}
 563
 564static void remove_handle_fd_uobject(struct ib_uobject *uobj)
 565{
 566}
 567
 568static int alloc_commit_idr_uobject(struct ib_uobject *uobj)
 569{
 570        struct ib_uverbs_file *ufile = uobj->ufile;
 571        void *old;
 572
 573        /*
 574         * We already allocated this IDR with a NULL object, so
 575         * this shouldn't fail.
 576         *
 577         * NOTE: Storing the uobj transfers our kref on uobj to the XArray.
 578         * It will be put by remove_commit_idr_uobject()
 579         */
 580        old = xa_store(&ufile->idr, uobj->id, uobj, GFP_KERNEL);
 581        WARN_ON(old != NULL);
 582
 583        return 0;
 584}
 585
 586static int alloc_commit_fd_uobject(struct ib_uobject *uobj)
 587{
 588        const struct uverbs_obj_fd_type *fd_type = container_of(
 589                uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
 590        int fd = uobj->id;
 591        struct file *filp;
 592
 593        /*
 594         * The kref for uobj is moved into filp->private data and put in
 595         * uverbs_close_fd(). Once alloc_commit() succeeds uverbs_close_fd()
 596         * must be guaranteed to be called from the provided fops release
 597         * callback.
 598         */
 599        filp = anon_inode_getfile(fd_type->name,
 600                                  fd_type->fops,
 601                                  uobj,
 602                                  fd_type->flags);
 603        if (IS_ERR(filp))
 604                return PTR_ERR(filp);
 605
 606        uobj->object = filp;
 607
 608        /* Matching put will be done in uverbs_close_fd() */
 609        kref_get(&uobj->ufile->ref);
 610
 611        /* This shouldn't be used anymore. Use the file object instead */
 612        uobj->id = 0;
 613
 614        /*
 615         * NOTE: Once we install the file we loose ownership of our kref on
 616         * uobj. It will be put by uverbs_close_fd()
 617         */
 618        fd_install(fd, filp);
 619
 620        return 0;
 621}
 622
 623/*
 624 * In all cases rdma_alloc_commit_uobject() consumes the kref to uobj and the
 625 * caller can no longer assume uobj is valid. If this function fails it
 626 * destroys the uboject, including the attached HW object.
 627 */
 628int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj,
 629                                           struct uverbs_attr_bundle *attrs)
 630{
 631        struct ib_uverbs_file *ufile = attrs->ufile;
 632        int ret;
 633
 634        /* alloc_commit consumes the uobj kref */
 635        ret = uobj->uapi_object->type_class->alloc_commit(uobj);
 636        if (ret) {
 637                uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs);
 638                up_read(&ufile->hw_destroy_rwsem);
 639                return ret;
 640        }
 641
 642        /* kref is held so long as the uobj is on the uobj list. */
 643        uverbs_uobject_get(uobj);
 644        spin_lock_irq(&ufile->uobjects_lock);
 645        list_add(&uobj->list, &ufile->uobjects);
 646        spin_unlock_irq(&ufile->uobjects_lock);
 647
 648        /* matches atomic_set(-1) in alloc_uobj */
 649        atomic_set(&uobj->usecnt, 0);
 650
 651        /* Matches the down_read in rdma_alloc_begin_uobject */
 652        up_read(&ufile->hw_destroy_rwsem);
 653
 654        return 0;
 655}
 656
 657/*
 658 * This consumes the kref for uobj. It is up to the caller to unwind the HW
 659 * object and anything else connected to uobj before calling this.
 660 */
 661void rdma_alloc_abort_uobject(struct ib_uobject *uobj,
 662                              struct uverbs_attr_bundle *attrs)
 663{
 664        struct ib_uverbs_file *ufile = uobj->ufile;
 665
 666        uobj->object = NULL;
 667        uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs);
 668
 669        /* Matches the down_read in rdma_alloc_begin_uobject */
 670        up_read(&ufile->hw_destroy_rwsem);
 671}
 672
 673static void lookup_put_idr_uobject(struct ib_uobject *uobj,
 674                                   enum rdma_lookup_mode mode)
 675{
 676}
 677
 678static void lookup_put_fd_uobject(struct ib_uobject *uobj,
 679                                  enum rdma_lookup_mode mode)
 680{
 681        struct file *filp = uobj->object;
 682
 683        WARN_ON(mode != UVERBS_LOOKUP_READ);
 684        /* This indirectly calls uverbs_close_fd and free the object */
 685        fput(filp);
 686}
 687
 688void rdma_lookup_put_uobject(struct ib_uobject *uobj,
 689                             enum rdma_lookup_mode mode)
 690{
 691        assert_uverbs_usecnt(uobj, mode);
 692        uobj->uapi_object->type_class->lookup_put(uobj, mode);
 693        /*
 694         * In order to unlock an object, either decrease its usecnt for
 695         * read access or zero it in case of exclusive access. See
 696         * uverbs_try_lock_object for locking schema information.
 697         */
 698        switch (mode) {
 699        case UVERBS_LOOKUP_READ:
 700                atomic_dec(&uobj->usecnt);
 701                break;
 702        case UVERBS_LOOKUP_WRITE:
 703                atomic_set(&uobj->usecnt, 0);
 704                break;
 705        case UVERBS_LOOKUP_DESTROY:
 706                break;
 707        }
 708
 709        /* Pairs with the kref obtained by type->lookup_get */
 710        uverbs_uobject_put(uobj);
 711}
 712
 713void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile)
 714{
 715        xa_init_flags(&ufile->idr, XA_FLAGS_ALLOC);
 716}
 717
 718void release_ufile_idr_uobject(struct ib_uverbs_file *ufile)
 719{
 720        struct ib_uobject *entry;
 721        unsigned long id;
 722
 723        /*
 724         * At this point uverbs_cleanup_ufile() is guaranteed to have run, and
 725         * there are no HW objects left, however the xarray is still populated
 726         * with anything that has not been cleaned up by userspace. Since the
 727         * kref on ufile is 0, nothing is allowed to call lookup_get.
 728         *
 729         * This is an optimized equivalent to remove_handle_idr_uobject
 730         */
 731        xa_for_each(&ufile->idr, id, entry) {
 732                WARN_ON(entry->object);
 733                uverbs_uobject_put(entry);
 734        }
 735
 736        xa_destroy(&ufile->idr);
 737}
 738
 739const struct uverbs_obj_type_class uverbs_idr_class = {
 740        .alloc_begin = alloc_begin_idr_uobject,
 741        .lookup_get = lookup_get_idr_uobject,
 742        .alloc_commit = alloc_commit_idr_uobject,
 743        .alloc_abort = alloc_abort_idr_uobject,
 744        .lookup_put = lookup_put_idr_uobject,
 745        .destroy_hw = destroy_hw_idr_uobject,
 746        .remove_handle = remove_handle_idr_uobject,
 747        /*
 748         * When we destroy an object, we first just lock it for WRITE and
 749         * actually DESTROY it in the finalize stage. So, the problematic
 750         * scenario is when we just started the finalize stage of the
 751         * destruction (nothing was executed yet). Now, the other thread
 752         * fetched the object for READ access, but it didn't lock it yet.
 753         * The DESTROY thread continues and starts destroying the object.
 754         * When the other thread continue - without the RCU, it would
 755         * access freed memory. However, the rcu_read_lock delays the free
 756         * until the rcu_read_lock of the READ operation quits. Since the
 757         * exclusive lock of the object is still taken by the DESTROY flow, the
 758         * READ operation will get -EBUSY and it'll just bail out.
 759         */
 760        .needs_kfree_rcu = true,
 761};
 762EXPORT_SYMBOL(uverbs_idr_class);
 763
 764void uverbs_close_fd(struct file *f)
 765{
 766        struct ib_uobject *uobj = f->private_data;
 767        struct ib_uverbs_file *ufile = uobj->ufile;
 768        struct uverbs_attr_bundle attrs = {
 769                .context = uobj->context,
 770                .ufile = ufile,
 771        };
 772
 773        if (down_read_trylock(&ufile->hw_destroy_rwsem)) {
 774                /*
 775                 * lookup_get_fd_uobject holds the kref on the struct file any
 776                 * time a FD uobj is locked, which prevents this release
 777                 * method from being invoked. Meaning we can always get the
 778                 * write lock here, or we have a kernel bug.
 779                 */
 780                WARN_ON(uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE));
 781                uverbs_destroy_uobject(uobj, RDMA_REMOVE_CLOSE, &attrs);
 782                up_read(&ufile->hw_destroy_rwsem);
 783        }
 784
 785        /* Matches the get in alloc_begin_fd_uobject */
 786        kref_put(&ufile->ref, ib_uverbs_release_file);
 787
 788        /* Pairs with filp->private_data in alloc_begin_fd_uobject */
 789        uverbs_uobject_put(uobj);
 790}
 791EXPORT_SYMBOL(uverbs_close_fd);
 792
 793/*
 794 * Drop the ucontext off the ufile and completely disconnect it from the
 795 * ib_device
 796 */
 797static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
 798                                   enum rdma_remove_reason reason)
 799{
 800        struct ib_ucontext *ucontext = ufile->ucontext;
 801        struct ib_device *ib_dev = ucontext->device;
 802
 803        /*
 804         * If we are closing the FD then the user mmap VMAs must have
 805         * already been destroyed as they hold on to the filep, otherwise
 806         * they need to be zap'd.
 807         */
 808        if (reason == RDMA_REMOVE_DRIVER_REMOVE) {
 809                uverbs_user_mmap_disassociate(ufile);
 810                if (ib_dev->ops.disassociate_ucontext)
 811                        ib_dev->ops.disassociate_ucontext(ucontext);
 812        }
 813
 814        ib_rdmacg_uncharge(&ucontext->cg_obj, ib_dev,
 815                           RDMACG_RESOURCE_HCA_HANDLE);
 816
 817        rdma_restrack_del(&ucontext->res);
 818
 819        ib_dev->ops.dealloc_ucontext(ucontext);
 820        kfree(ucontext);
 821
 822        ufile->ucontext = NULL;
 823}
 824
 825static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
 826                                  enum rdma_remove_reason reason)
 827{
 828        struct ib_uobject *obj, *next_obj;
 829        int ret = -EINVAL;
 830        struct uverbs_attr_bundle attrs = { .ufile = ufile };
 831
 832        /*
 833         * This shouldn't run while executing other commands on this
 834         * context. Thus, the only thing we should take care of is
 835         * releasing a FD while traversing this list. The FD could be
 836         * closed and released from the _release fop of this FD.
 837         * In order to mitigate this, we add a lock.
 838         * We take and release the lock per traversal in order to let
 839         * other threads (which might still use the FDs) chance to run.
 840         */
 841        list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) {
 842                attrs.context = obj->context;
 843                /*
 844                 * if we hit this WARN_ON, that means we are
 845                 * racing with a lookup_get.
 846                 */
 847                WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE));
 848                if (!uverbs_destroy_uobject(obj, reason, &attrs))
 849                        ret = 0;
 850                else
 851                        atomic_set(&obj->usecnt, 0);
 852        }
 853        return ret;
 854}
 855
 856/*
 857 * Destroy the uncontext and every uobject associated with it. If called with
 858 * reason != RDMA_REMOVE_CLOSE this will not return until the destruction has
 859 * been completed and ufile->ucontext is NULL.
 860 *
 861 * This is internally locked and can be called in parallel from multiple
 862 * contexts.
 863 */
 864void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
 865                             enum rdma_remove_reason reason)
 866{
 867        if (reason == RDMA_REMOVE_CLOSE) {
 868                /*
 869                 * During destruction we might trigger something that
 870                 * synchronously calls release on any file descriptor. For
 871                 * this reason all paths that come from file_operations
 872                 * release must use try_lock. They can progress knowing that
 873                 * there is an ongoing uverbs_destroy_ufile_hw that will clean
 874                 * up the driver resources.
 875                 */
 876                if (!mutex_trylock(&ufile->ucontext_lock))
 877                        return;
 878
 879        } else {
 880                mutex_lock(&ufile->ucontext_lock);
 881        }
 882
 883        down_write(&ufile->hw_destroy_rwsem);
 884
 885        /*
 886         * If a ucontext was never created then we can't have any uobjects to
 887         * cleanup, nothing to do.
 888         */
 889        if (!ufile->ucontext)
 890                goto done;
 891
 892        ufile->ucontext->closing = true;
 893        ufile->ucontext->cleanup_retryable = true;
 894        while (!list_empty(&ufile->uobjects))
 895                if (__uverbs_cleanup_ufile(ufile, reason)) {
 896                        /*
 897                         * No entry was cleaned-up successfully during this
 898                         * iteration
 899                         */
 900                        break;
 901                }
 902
 903        ufile->ucontext->cleanup_retryable = false;
 904        if (!list_empty(&ufile->uobjects))
 905                __uverbs_cleanup_ufile(ufile, reason);
 906
 907        ufile_destroy_ucontext(ufile, reason);
 908
 909done:
 910        up_write(&ufile->hw_destroy_rwsem);
 911        mutex_unlock(&ufile->ucontext_lock);
 912}
 913
 914const struct uverbs_obj_type_class uverbs_fd_class = {
 915        .alloc_begin = alloc_begin_fd_uobject,
 916        .lookup_get = lookup_get_fd_uobject,
 917        .alloc_commit = alloc_commit_fd_uobject,
 918        .alloc_abort = alloc_abort_fd_uobject,
 919        .lookup_put = lookup_put_fd_uobject,
 920        .destroy_hw = destroy_hw_fd_uobject,
 921        .remove_handle = remove_handle_fd_uobject,
 922        .needs_kfree_rcu = false,
 923};
 924EXPORT_SYMBOL(uverbs_fd_class);
 925
 926struct ib_uobject *
 927uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access,
 928                             s64 id, struct uverbs_attr_bundle *attrs)
 929{
 930        const struct uverbs_api_object *obj =
 931                uapi_get_object(attrs->ufile->device->uapi, object_id);
 932
 933        switch (access) {
 934        case UVERBS_ACCESS_READ:
 935                return rdma_lookup_get_uobject(obj, attrs->ufile, id,
 936                                               UVERBS_LOOKUP_READ, attrs);
 937        case UVERBS_ACCESS_DESTROY:
 938                /* Actual destruction is done inside uverbs_handle_method */
 939                return rdma_lookup_get_uobject(obj, attrs->ufile, id,
 940                                               UVERBS_LOOKUP_DESTROY, attrs);
 941        case UVERBS_ACCESS_WRITE:
 942                return rdma_lookup_get_uobject(obj, attrs->ufile, id,
 943                                               UVERBS_LOOKUP_WRITE, attrs);
 944        case UVERBS_ACCESS_NEW:
 945                return rdma_alloc_begin_uobject(obj, attrs->ufile, attrs);
 946        default:
 947                WARN_ON(true);
 948                return ERR_PTR(-EOPNOTSUPP);
 949        }
 950}
 951
 952int uverbs_finalize_object(struct ib_uobject *uobj,
 953                           enum uverbs_obj_access access, bool commit,
 954                           struct uverbs_attr_bundle *attrs)
 955{
 956        int ret = 0;
 957
 958        /*
 959         * refcounts should be handled at the object level and not at the
 960         * uobject level. Refcounts of the objects themselves are done in
 961         * handlers.
 962         */
 963
 964        switch (access) {
 965        case UVERBS_ACCESS_READ:
 966                rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_READ);
 967                break;
 968        case UVERBS_ACCESS_WRITE:
 969                rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
 970                break;
 971        case UVERBS_ACCESS_DESTROY:
 972                if (uobj)
 973                        rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
 974                break;
 975        case UVERBS_ACCESS_NEW:
 976                if (commit)
 977                        ret = rdma_alloc_commit_uobject(uobj, attrs);
 978                else
 979                        rdma_alloc_abort_uobject(uobj, attrs);
 980                break;
 981        default:
 982                WARN_ON(true);
 983                ret = -EOPNOTSUPP;
 984        }
 985
 986        return ret;
 987}
 988