linux/drivers/infiniband/core/umem.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2005 Topspin Communications.  All rights reserved.
   3 * Copyright (c) 2005 Cisco Systems.  All rights reserved.
   4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
   5 *
   6 * This software is available to you under a choice of one of two
   7 * licenses.  You may choose to be licensed under the terms of the GNU
   8 * General Public License (GPL) Version 2, available from the file
   9 * COPYING in the main directory of this source tree, or the
  10 * OpenIB.org BSD license below:
  11 *
  12 *     Redistribution and use in source and binary forms, with or
  13 *     without modification, are permitted provided that the following
  14 *     conditions are met:
  15 *
  16 *      - Redistributions of source code must retain the above
  17 *        copyright notice, this list of conditions and the following
  18 *        disclaimer.
  19 *
  20 *      - Redistributions in binary form must reproduce the above
  21 *        copyright notice, this list of conditions and the following
  22 *        disclaimer in the documentation and/or other materials
  23 *        provided with the distribution.
  24 *
  25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  32 * SOFTWARE.
  33 */
  34
  35#include <linux/mm.h>
  36#include <linux/dma-mapping.h>
  37#include <linux/sched/signal.h>
  38#include <linux/sched/mm.h>
  39#include <linux/export.h>
  40#include <linux/hugetlb.h>
  41#include <linux/slab.h>
  42#include <rdma/ib_umem_odp.h>
  43
  44#include "uverbs.h"
  45
  46
  47static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
  48{
  49        struct scatterlist *sg;
  50        struct page *page;
  51        int i;
  52
  53        if (umem->nmap > 0)
  54                ib_dma_unmap_sg(dev, umem->sg_head.sgl,
  55                                umem->npages,
  56                                DMA_BIDIRECTIONAL);
  57
  58        for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
  59
  60                page = sg_page(sg);
  61                if (!PageDirty(page) && umem->writable && dirty)
  62                        set_page_dirty_lock(page);
  63                put_page(page);
  64        }
  65
  66        sg_free_table(&umem->sg_head);
  67}
  68
  69/**
  70 * ib_umem_get - Pin and DMA map userspace memory.
  71 *
  72 * If access flags indicate ODP memory, avoid pinning. Instead, stores
  73 * the mm for future page fault handling in conjunction with MMU notifiers.
  74 *
  75 * @context: userspace context to pin memory for
  76 * @addr: userspace virtual address to start at
  77 * @size: length of region to pin
  78 * @access: IB_ACCESS_xxx flags for memory being pinned
  79 * @dmasync: flush in-flight DMA when the memory region is written
  80 */
  81struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
  82                            size_t size, int access, int dmasync)
  83{
  84        struct ib_umem *umem;
  85        struct page **page_list;
  86        struct vm_area_struct **vma_list;
  87        unsigned long lock_limit;
  88        unsigned long new_pinned;
  89        unsigned long cur_base;
  90        struct mm_struct *mm;
  91        unsigned long npages;
  92        int ret;
  93        int i;
  94        unsigned long dma_attrs = 0;
  95        struct scatterlist *sg, *sg_list_start;
  96        unsigned int gup_flags = FOLL_WRITE;
  97
  98        if (dmasync)
  99                dma_attrs |= DMA_ATTR_WRITE_BARRIER;
 100
 101        /*
 102         * If the combination of the addr and size requested for this memory
 103         * region causes an integer overflow, return error.
 104         */
 105        if (((addr + size) < addr) ||
 106            PAGE_ALIGN(addr + size) < (addr + size))
 107                return ERR_PTR(-EINVAL);
 108
 109        if (!can_do_mlock())
 110                return ERR_PTR(-EPERM);
 111
 112        if (access & IB_ACCESS_ON_DEMAND) {
 113                umem = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL);
 114                if (!umem)
 115                        return ERR_PTR(-ENOMEM);
 116                umem->is_odp = 1;
 117        } else {
 118                umem = kzalloc(sizeof(*umem), GFP_KERNEL);
 119                if (!umem)
 120                        return ERR_PTR(-ENOMEM);
 121        }
 122
 123        umem->context    = context;
 124        umem->length     = size;
 125        umem->address    = addr;
 126        umem->page_shift = PAGE_SHIFT;
 127        umem->writable   = ib_access_writable(access);
 128        umem->owning_mm = mm = current->mm;
 129        mmgrab(mm);
 130
 131        if (access & IB_ACCESS_ON_DEMAND) {
 132                ret = ib_umem_odp_get(to_ib_umem_odp(umem), access);
 133                if (ret)
 134                        goto umem_kfree;
 135                return umem;
 136        }
 137
 138        /* We assume the memory is from hugetlb until proved otherwise */
 139        umem->hugetlb   = 1;
 140
 141        page_list = (struct page **) __get_free_page(GFP_KERNEL);
 142        if (!page_list) {
 143                ret = -ENOMEM;
 144                goto umem_kfree;
 145        }
 146
 147        /*
 148         * if we can't alloc the vma_list, it's not so bad;
 149         * just assume the memory is not hugetlb memory
 150         */
 151        vma_list = (struct vm_area_struct **) __get_free_page(GFP_KERNEL);
 152        if (!vma_list)
 153                umem->hugetlb = 0;
 154
 155        npages = ib_umem_num_pages(umem);
 156        if (npages == 0 || npages > UINT_MAX) {
 157                ret = -EINVAL;
 158                goto out;
 159        }
 160
 161        lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 162
 163        down_write(&mm->mmap_sem);
 164        if (check_add_overflow(mm->pinned_vm, npages, &new_pinned) ||
 165            (new_pinned > lock_limit && !capable(CAP_IPC_LOCK))) {
 166                up_write(&mm->mmap_sem);
 167                ret = -ENOMEM;
 168                goto out;
 169        }
 170        mm->pinned_vm = new_pinned;
 171        up_write(&mm->mmap_sem);
 172
 173        cur_base = addr & PAGE_MASK;
 174
 175        ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
 176        if (ret)
 177                goto vma;
 178
 179        if (!umem->writable)
 180                gup_flags |= FOLL_FORCE;
 181
 182        sg_list_start = umem->sg_head.sgl;
 183
 184        while (npages) {
 185                down_read(&mm->mmap_sem);
 186                ret = get_user_pages_longterm(cur_base,
 187                                     min_t(unsigned long, npages,
 188                                           PAGE_SIZE / sizeof (struct page *)),
 189                                     gup_flags, page_list, vma_list);
 190                if (ret < 0) {
 191                        up_read(&mm->mmap_sem);
 192                        goto umem_release;
 193                }
 194
 195                umem->npages += ret;
 196                cur_base += ret * PAGE_SIZE;
 197                npages   -= ret;
 198
 199                /* Continue to hold the mmap_sem as vma_list access
 200                 * needs to be protected.
 201                 */
 202                for_each_sg(sg_list_start, sg, ret, i) {
 203                        if (vma_list && !is_vm_hugetlb_page(vma_list[i]))
 204                                umem->hugetlb = 0;
 205
 206                        sg_set_page(sg, page_list[i], PAGE_SIZE, 0);
 207                }
 208                up_read(&mm->mmap_sem);
 209
 210                /* preparing for next loop */
 211                sg_list_start = sg;
 212        }
 213
 214        umem->nmap = ib_dma_map_sg_attrs(context->device,
 215                                  umem->sg_head.sgl,
 216                                  umem->npages,
 217                                  DMA_BIDIRECTIONAL,
 218                                  dma_attrs);
 219
 220        if (!umem->nmap) {
 221                ret = -ENOMEM;
 222                goto umem_release;
 223        }
 224
 225        ret = 0;
 226        goto out;
 227
 228umem_release:
 229        __ib_umem_release(context->device, umem, 0);
 230vma:
 231        down_write(&mm->mmap_sem);
 232        mm->pinned_vm -= ib_umem_num_pages(umem);
 233        up_write(&mm->mmap_sem);
 234out:
 235        if (vma_list)
 236                free_page((unsigned long) vma_list);
 237        free_page((unsigned long) page_list);
 238umem_kfree:
 239        if (ret) {
 240                mmdrop(umem->owning_mm);
 241                kfree(umem);
 242        }
 243        return ret ? ERR_PTR(ret) : umem;
 244}
 245EXPORT_SYMBOL(ib_umem_get);
 246
 247static void __ib_umem_release_tail(struct ib_umem *umem)
 248{
 249        mmdrop(umem->owning_mm);
 250        if (umem->is_odp)
 251                kfree(to_ib_umem_odp(umem));
 252        else
 253                kfree(umem);
 254}
 255
 256static void ib_umem_release_defer(struct work_struct *work)
 257{
 258        struct ib_umem *umem = container_of(work, struct ib_umem, work);
 259
 260        down_write(&umem->owning_mm->mmap_sem);
 261        umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
 262        up_write(&umem->owning_mm->mmap_sem);
 263
 264        __ib_umem_release_tail(umem);
 265}
 266
 267/**
 268 * ib_umem_release - release memory pinned with ib_umem_get
 269 * @umem: umem struct to release
 270 */
 271void ib_umem_release(struct ib_umem *umem)
 272{
 273        struct ib_ucontext *context = umem->context;
 274
 275        if (umem->is_odp) {
 276                ib_umem_odp_release(to_ib_umem_odp(umem));
 277                __ib_umem_release_tail(umem);
 278                return;
 279        }
 280
 281        __ib_umem_release(umem->context->device, umem, 1);
 282
 283        /*
 284         * We may be called with the mm's mmap_sem already held.  This
 285         * can happen when a userspace munmap() is the call that drops
 286         * the last reference to our file and calls our release
 287         * method.  If there are memory regions to destroy, we'll end
 288         * up here and not be able to take the mmap_sem.  In that case
 289         * we defer the vm_locked accounting a workqueue.
 290         */
 291        if (context->closing) {
 292                if (!down_write_trylock(&umem->owning_mm->mmap_sem)) {
 293                        INIT_WORK(&umem->work, ib_umem_release_defer);
 294                        queue_work(ib_wq, &umem->work);
 295                        return;
 296                }
 297        } else {
 298                down_write(&umem->owning_mm->mmap_sem);
 299        }
 300        umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
 301        up_write(&umem->owning_mm->mmap_sem);
 302
 303        __ib_umem_release_tail(umem);
 304}
 305EXPORT_SYMBOL(ib_umem_release);
 306
 307int ib_umem_page_count(struct ib_umem *umem)
 308{
 309        int i;
 310        int n;
 311        struct scatterlist *sg;
 312
 313        if (umem->is_odp)
 314                return ib_umem_num_pages(umem);
 315
 316        n = 0;
 317        for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
 318                n += sg_dma_len(sg) >> umem->page_shift;
 319
 320        return n;
 321}
 322EXPORT_SYMBOL(ib_umem_page_count);
 323
 324/*
 325 * Copy from the given ib_umem's pages to the given buffer.
 326 *
 327 * umem - the umem to copy from
 328 * offset - offset to start copying from
 329 * dst - destination buffer
 330 * length - buffer length
 331 *
 332 * Returns 0 on success, or an error code.
 333 */
 334int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
 335                      size_t length)
 336{
 337        size_t end = offset + length;
 338        int ret;
 339
 340        if (offset > umem->length || length > umem->length - offset) {
 341                pr_err("ib_umem_copy_from not in range. offset: %zd umem length: %zd end: %zd\n",
 342                       offset, umem->length, end);
 343                return -EINVAL;
 344        }
 345
 346        ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->npages, dst, length,
 347                                 offset + ib_umem_offset(umem));
 348
 349        if (ret < 0)
 350                return ret;
 351        else if (ret != length)
 352                return -EINVAL;
 353        else
 354                return 0;
 355}
 356EXPORT_SYMBOL(ib_umem_copy_from);
 357