linux/drivers/infiniband/core/ib_core_uverbs.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/*
   3 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
   4 * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
   5 * Copyright 2019 Marvell. All rights reserved.
   6 */
   7#include <linux/xarray.h>
   8#include "uverbs.h"
   9#include "core_priv.h"
  10
  11/**
  12 * rdma_umap_priv_init() - Initialize the private data of a vma
  13 *
  14 * @priv: The already allocated private data
  15 * @vma: The vm area struct that needs private data
  16 * @entry: entry into the mmap_xa that needs to be linked with
  17 *       this vma
  18 *
  19 * Each time we map IO memory into user space this keeps track of the
  20 * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space
  21 * to point to the zero page and allow the hot unplug to proceed.
  22 *
  23 * This is necessary for cases like PCI physical hot unplug as the actual BAR
  24 * memory may vanish after this and access to it from userspace could MCE.
  25 *
  26 * RDMA drivers supporting disassociation must have their user space designed
  27 * to cope in some way with their IO pages going to the zero page.
  28 *
  29 */
  30void rdma_umap_priv_init(struct rdma_umap_priv *priv,
  31                         struct vm_area_struct *vma,
  32                         struct rdma_user_mmap_entry *entry)
  33{
  34        struct ib_uverbs_file *ufile = vma->vm_file->private_data;
  35
  36        priv->vma = vma;
  37        if (entry) {
  38                kref_get(&entry->ref);
  39                priv->entry = entry;
  40        }
  41        vma->vm_private_data = priv;
  42        /* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */
  43
  44        mutex_lock(&ufile->umap_lock);
  45        list_add(&priv->list, &ufile->umaps);
  46        mutex_unlock(&ufile->umap_lock);
  47}
  48EXPORT_SYMBOL(rdma_umap_priv_init);
  49
  50/**
  51 * rdma_user_mmap_io() - Map IO memory into a process
  52 *
  53 * @ucontext: associated user context
  54 * @vma: the vma related to the current mmap call
  55 * @pfn: pfn to map
  56 * @size: size to map
  57 * @prot: pgprot to use in remap call
  58 * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL
  59 *         if mmap_entry is not used by the driver
  60 *
  61 * This is to be called by drivers as part of their mmap() functions if they
  62 * wish to send something like PCI-E BAR memory to userspace.
  63 *
  64 * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on
  65 * success.
  66 */
  67int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
  68                      unsigned long pfn, unsigned long size, pgprot_t prot,
  69                      struct rdma_user_mmap_entry *entry)
  70{
  71        struct ib_uverbs_file *ufile = ucontext->ufile;
  72        struct rdma_umap_priv *priv;
  73
  74        if (!(vma->vm_flags & VM_SHARED))
  75                return -EINVAL;
  76
  77        if (vma->vm_end - vma->vm_start != size)
  78                return -EINVAL;
  79
  80        /* Driver is using this wrong, must be called by ib_uverbs_mmap */
  81        if (WARN_ON(!vma->vm_file ||
  82                    vma->vm_file->private_data != ufile))
  83                return -EINVAL;
  84        lockdep_assert_held(&ufile->device->disassociate_srcu);
  85
  86        priv = kzalloc(sizeof(*priv), GFP_KERNEL);
  87        if (!priv)
  88                return -ENOMEM;
  89
  90        vma->vm_page_prot = prot;
  91        if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
  92                kfree(priv);
  93                return -EAGAIN;
  94        }
  95
  96        rdma_umap_priv_init(priv, vma, entry);
  97        return 0;
  98}
  99EXPORT_SYMBOL(rdma_user_mmap_io);
 100
 101/**
 102 * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa
 103 *
 104 * @ucontext: associated user context
 105 * @pgoff: The mmap offset >> PAGE_SHIFT
 106 *
 107 * This function is called when a user tries to mmap with an offset (returned
 108 * by rdma_user_mmap_get_offset()) it initially received from the driver. The
 109 * rdma_user_mmap_entry was created by the function
 110 * rdma_user_mmap_entry_insert().  This function increases the refcnt of the
 111 * entry so that it won't be deleted from the xarray in the meantime.
 112 *
 113 * Return an reference to an entry if exists or NULL if there is no
 114 * match. rdma_user_mmap_entry_put() must be called to put the reference.
 115 */
 116struct rdma_user_mmap_entry *
 117rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
 118                               unsigned long pgoff)
 119{
 120        struct rdma_user_mmap_entry *entry;
 121
 122        if (pgoff > U32_MAX)
 123                return NULL;
 124
 125        xa_lock(&ucontext->mmap_xa);
 126
 127        entry = xa_load(&ucontext->mmap_xa, pgoff);
 128
 129        /*
 130         * If refcount is zero, entry is already being deleted, driver_removed
 131         * indicates that the no further mmaps are possible and we waiting for
 132         * the active VMAs to be closed.
 133         */
 134        if (!entry || entry->start_pgoff != pgoff || entry->driver_removed ||
 135            !kref_get_unless_zero(&entry->ref))
 136                goto err;
 137
 138        xa_unlock(&ucontext->mmap_xa);
 139
 140        ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] returned\n",
 141                  pgoff, entry->npages);
 142
 143        return entry;
 144
 145err:
 146        xa_unlock(&ucontext->mmap_xa);
 147        return NULL;
 148}
 149EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff);
 150
 151/**
 152 * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa
 153 *
 154 * @ucontext: associated user context
 155 * @vma: the vma being mmap'd into
 156 *
 157 * This function is like rdma_user_mmap_entry_get_pgoff() except that it also
 158 * checks that the VMA is correct.
 159 */
 160struct rdma_user_mmap_entry *
 161rdma_user_mmap_entry_get(struct ib_ucontext *ucontext,
 162                         struct vm_area_struct *vma)
 163{
 164        struct rdma_user_mmap_entry *entry;
 165
 166        if (!(vma->vm_flags & VM_SHARED))
 167                return NULL;
 168        entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff);
 169        if (!entry)
 170                return NULL;
 171        if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) {
 172                rdma_user_mmap_entry_put(entry);
 173                return NULL;
 174        }
 175        return entry;
 176}
 177EXPORT_SYMBOL(rdma_user_mmap_entry_get);
 178
 179static void rdma_user_mmap_entry_free(struct kref *kref)
 180{
 181        struct rdma_user_mmap_entry *entry =
 182                container_of(kref, struct rdma_user_mmap_entry, ref);
 183        struct ib_ucontext *ucontext = entry->ucontext;
 184        unsigned long i;
 185
 186        /*
 187         * Erase all entries occupied by this single entry, this is deferred
 188         * until all VMA are closed so that the mmap offsets remain unique.
 189         */
 190        xa_lock(&ucontext->mmap_xa);
 191        for (i = 0; i < entry->npages; i++)
 192                __xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i);
 193        xa_unlock(&ucontext->mmap_xa);
 194
 195        ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] removed\n",
 196                  entry->start_pgoff, entry->npages);
 197
 198        if (ucontext->device->ops.mmap_free)
 199                ucontext->device->ops.mmap_free(entry);
 200}
 201
 202/**
 203 * rdma_user_mmap_entry_put() - Drop reference to the mmap entry
 204 *
 205 * @entry: an entry in the mmap_xa
 206 *
 207 * This function is called when the mapping is closed if it was
 208 * an io mapping or when the driver is done with the entry for
 209 * some other reason.
 210 * Should be called after rdma_user_mmap_entry_get was called
 211 * and entry is no longer needed. This function will erase the
 212 * entry and free it if its refcnt reaches zero.
 213 */
 214void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry)
 215{
 216        kref_put(&entry->ref, rdma_user_mmap_entry_free);
 217}
 218EXPORT_SYMBOL(rdma_user_mmap_entry_put);
 219
 220/**
 221 * rdma_user_mmap_entry_remove() - Drop reference to entry and
 222 *                                 mark it as unmmapable
 223 *
 224 * @entry: the entry to insert into the mmap_xa
 225 *
 226 * Drivers can call this to prevent userspace from creating more mappings for
 227 * entry, however existing mmaps continue to exist and ops->mmap_free() will
 228 * not be called until all user mmaps are destroyed.
 229 */
 230void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
 231{
 232        if (!entry)
 233                return;
 234
 235        xa_lock(&entry->ucontext->mmap_xa);
 236        entry->driver_removed = true;
 237        xa_unlock(&entry->ucontext->mmap_xa);
 238        kref_put(&entry->ref, rdma_user_mmap_entry_free);
 239}
 240EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
 241
 242/**
 243 * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa
 244 *                                       in a given range.
 245 *
 246 * @ucontext: associated user context.
 247 * @entry: the entry to insert into the mmap_xa
 248 * @length: length of the address that will be mmapped
 249 * @min_pgoff: minimum pgoff to be returned
 250 * @max_pgoff: maximum pgoff to be returned
 251 *
 252 * This function should be called by drivers that use the rdma_user_mmap
 253 * interface for implementing their mmap syscall A database of mmap offsets is
 254 * handled in the core and helper functions are provided to insert entries
 255 * into the database and extract entries when the user calls mmap with the
 256 * given offset. The function allocates a unique page offset in a given range
 257 * that should be provided to user, the user will use the offset to retrieve
 258 * information such as address to be mapped and how.
 259 *
 260 * Return: 0 on success and -ENOMEM on failure
 261 */
 262int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
 263                                      struct rdma_user_mmap_entry *entry,
 264                                      size_t length, u32 min_pgoff,
 265                                      u32 max_pgoff)
 266{
 267        struct ib_uverbs_file *ufile = ucontext->ufile;
 268        XA_STATE(xas, &ucontext->mmap_xa, min_pgoff);
 269        u32 xa_first, xa_last, npages;
 270        int err;
 271        u32 i;
 272
 273        if (!entry)
 274                return -EINVAL;
 275
 276        kref_init(&entry->ref);
 277        entry->ucontext = ucontext;
 278
 279        /*
 280         * We want the whole allocation to be done without interruption from a
 281         * different thread. The allocation requires finding a free range and
 282         * storing. During the xa_insert the lock could be released, possibly
 283         * allowing another thread to choose the same range.
 284         */
 285        mutex_lock(&ufile->umap_lock);
 286
 287        xa_lock(&ucontext->mmap_xa);
 288
 289        /* We want to find an empty range */
 290        npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE);
 291        entry->npages = npages;
 292        while (true) {
 293                /* First find an empty index */
 294                xas_find_marked(&xas, max_pgoff, XA_FREE_MARK);
 295                if (xas.xa_node == XAS_RESTART)
 296                        goto err_unlock;
 297
 298                xa_first = xas.xa_index;
 299
 300                /* Is there enough room to have the range? */
 301                if (check_add_overflow(xa_first, npages, &xa_last))
 302                        goto err_unlock;
 303
 304                /*
 305                 * Now look for the next present entry. If an entry doesn't
 306                 * exist, we found an empty range and can proceed.
 307                 */
 308                xas_next_entry(&xas, xa_last - 1);
 309                if (xas.xa_node == XAS_BOUNDS || xas.xa_index >= xa_last)
 310                        break;
 311        }
 312
 313        for (i = xa_first; i < xa_last; i++) {
 314                err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL);
 315                if (err)
 316                        goto err_undo;
 317        }
 318
 319        /*
 320         * Internally the kernel uses a page offset, in libc this is a byte
 321         * offset. Drivers should not return pgoff to userspace.
 322         */
 323        entry->start_pgoff = xa_first;
 324        xa_unlock(&ucontext->mmap_xa);
 325        mutex_unlock(&ufile->umap_lock);
 326
 327        ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#x] inserted\n",
 328                  entry->start_pgoff, npages);
 329
 330        return 0;
 331
 332err_undo:
 333        for (; i > xa_first; i--)
 334                __xa_erase(&ucontext->mmap_xa, i - 1);
 335
 336err_unlock:
 337        xa_unlock(&ucontext->mmap_xa);
 338        mutex_unlock(&ufile->umap_lock);
 339        return -ENOMEM;
 340}
 341EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range);
 342
 343/**
 344 * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa.
 345 *
 346 * @ucontext: associated user context.
 347 * @entry: the entry to insert into the mmap_xa
 348 * @length: length of the address that will be mmapped
 349 *
 350 * This function should be called by drivers that use the rdma_user_mmap
 351 * interface for handling user mmapped addresses. The database is handled in
 352 * the core and helper functions are provided to insert entries into the
 353 * database and extract entries when the user calls mmap with the given offset.
 354 * The function allocates a unique page offset that should be provided to user,
 355 * the user will use the offset to retrieve information such as address to
 356 * be mapped and how.
 357 *
 358 * Return: 0 on success and -ENOMEM on failure
 359 */
 360int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
 361                                struct rdma_user_mmap_entry *entry,
 362                                size_t length)
 363{
 364        return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0,
 365                                                 U32_MAX);
 366}
 367EXPORT_SYMBOL(rdma_user_mmap_entry_insert);
 368