linux/drivers/gpu/drm/ttm/ttm_bo_vm.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 OR MIT */
   2/**************************************************************************
   3 *
   4 * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA
   5 * All Rights Reserved.
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a
   8 * copy of this software and associated documentation files (the
   9 * "Software"), to deal in the Software without restriction, including
  10 * without limitation the rights to use, copy, modify, merge, publish,
  11 * distribute, sub license, and/or sell copies of the Software, and to
  12 * permit persons to whom the Software is furnished to do so, subject to
  13 * the following conditions:
  14 *
  15 * The above copyright notice and this permission notice (including the
  16 * next paragraph) shall be included in all copies or substantial portions
  17 * of the Software.
  18 *
  19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  22 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  23 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  24 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  25 * USE OR OTHER DEALINGS IN THE SOFTWARE.
  26 *
  27 **************************************************************************/
  28/*
  29 * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com>
  30 */
  31
  32#define pr_fmt(fmt) "[TTM] " fmt
  33
  34#include <drm/ttm/ttm_bo_driver.h>
  35#include <drm/ttm/ttm_placement.h>
  36#include <drm/drm_vma_manager.h>
  37#include <drm/drm_drv.h>
  38#include <drm/drm_managed.h>
  39#include <linux/mm.h>
  40#include <linux/pfn_t.h>
  41#include <linux/rbtree.h>
  42#include <linux/module.h>
  43#include <linux/uaccess.h>
  44#include <linux/mem_encrypt.h>
  45
  46static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
  47                                struct vm_fault *vmf)
  48{
  49        vm_fault_t ret = 0;
  50        int err = 0;
  51
  52        if (likely(!bo->moving))
  53                goto out_unlock;
  54
  55        /*
  56         * Quick non-stalling check for idle.
  57         */
  58        if (dma_fence_is_signaled(bo->moving))
  59                goto out_clear;
  60
  61        /*
  62         * If possible, avoid waiting for GPU with mmap_lock
  63         * held.  We only do this if the fault allows retry and this
  64         * is the first attempt.
  65         */
  66        if (fault_flag_allow_retry_first(vmf->flags)) {
  67                ret = VM_FAULT_RETRY;
  68                if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
  69                        goto out_unlock;
  70
  71                ttm_bo_get(bo);
  72                mmap_read_unlock(vmf->vma->vm_mm);
  73                (void) dma_fence_wait(bo->moving, true);
  74                dma_resv_unlock(bo->base.resv);
  75                ttm_bo_put(bo);
  76                goto out_unlock;
  77        }
  78
  79        /*
  80         * Ordinary wait.
  81         */
  82        err = dma_fence_wait(bo->moving, true);
  83        if (unlikely(err != 0)) {
  84                ret = (err != -ERESTARTSYS) ? VM_FAULT_SIGBUS :
  85                        VM_FAULT_NOPAGE;
  86                goto out_unlock;
  87        }
  88
  89out_clear:
  90        dma_fence_put(bo->moving);
  91        bo->moving = NULL;
  92
  93out_unlock:
  94        return ret;
  95}
  96
  97static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo,
  98                                       unsigned long page_offset)
  99{
 100        struct ttm_device *bdev = bo->bdev;
 101
 102        if (bdev->funcs->io_mem_pfn)
 103                return bdev->funcs->io_mem_pfn(bo, page_offset);
 104
 105        return (bo->resource->bus.offset >> PAGE_SHIFT) + page_offset;
 106}
 107
 108/**
 109 * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback
 110 * @bo: The buffer object
 111 * @vmf: The fault structure handed to the callback
 112 *
 113 * vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped
 114 * during long waits, and after the wait the callback will be restarted. This
 115 * is to allow other threads using the same virtual memory space concurrent
 116 * access to map(), unmap() completely unrelated buffer objects. TTM buffer
 117 * object reservations sometimes wait for GPU and should therefore be
 118 * considered long waits. This function reserves the buffer object interruptibly
 119 * taking this into account. Starvation is avoided by the vm system not
 120 * allowing too many repeated restarts.
 121 * This function is intended to be used in customized fault() and _mkwrite()
 122 * handlers.
 123 *
 124 * Return:
 125 *    0 on success and the bo was reserved.
 126 *    VM_FAULT_RETRY if blocking wait.
 127 *    VM_FAULT_NOPAGE if blocking wait and retrying was not allowed.
 128 */
 129vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
 130                             struct vm_fault *vmf)
 131{
 132        /*
 133         * Work around locking order reversal in fault / nopfn
 134         * between mmap_lock and bo_reserve: Perform a trylock operation
 135         * for reserve, and if it fails, retry the fault after waiting
 136         * for the buffer to become unreserved.
 137         */
 138        if (unlikely(!dma_resv_trylock(bo->base.resv))) {
 139                /*
 140                 * If the fault allows retry and this is the first
 141                 * fault attempt, we try to release the mmap_lock
 142                 * before waiting
 143                 */
 144                if (fault_flag_allow_retry_first(vmf->flags)) {
 145                        if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
 146                                ttm_bo_get(bo);
 147                                mmap_read_unlock(vmf->vma->vm_mm);
 148                                if (!dma_resv_lock_interruptible(bo->base.resv,
 149                                                                 NULL))
 150                                        dma_resv_unlock(bo->base.resv);
 151                                ttm_bo_put(bo);
 152                        }
 153
 154                        return VM_FAULT_RETRY;
 155                }
 156
 157                if (dma_resv_lock_interruptible(bo->base.resv, NULL))
 158                        return VM_FAULT_NOPAGE;
 159        }
 160
 161        /*
 162         * Refuse to fault imported pages. This should be handled
 163         * (if at all) by redirecting mmap to the exporter.
 164         */
 165        if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) {
 166                dma_resv_unlock(bo->base.resv);
 167                return VM_FAULT_SIGBUS;
 168        }
 169
 170        return 0;
 171}
 172EXPORT_SYMBOL(ttm_bo_vm_reserve);
 173
 174#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 175/**
 176 * ttm_bo_vm_insert_huge - Insert a pfn for PUD or PMD faults
 177 * @vmf: Fault data
 178 * @bo: The buffer object
 179 * @page_offset: Page offset from bo start
 180 * @fault_page_size: The size of the fault in pages.
 181 * @pgprot: The page protections.
 182 * Does additional checking whether it's possible to insert a PUD or PMD
 183 * pfn and performs the insertion.
 184 *
 185 * Return: VM_FAULT_NOPAGE on successful insertion, VM_FAULT_FALLBACK if
 186 * a huge fault was not possible, or on insertion error.
 187 */
 188static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf,
 189                                        struct ttm_buffer_object *bo,
 190                                        pgoff_t page_offset,
 191                                        pgoff_t fault_page_size,
 192                                        pgprot_t pgprot)
 193{
 194        pgoff_t i;
 195        vm_fault_t ret;
 196        unsigned long pfn;
 197        pfn_t pfnt;
 198        struct ttm_tt *ttm = bo->ttm;
 199        bool write = vmf->flags & FAULT_FLAG_WRITE;
 200
 201        /* Fault should not cross bo boundary. */
 202        page_offset &= ~(fault_page_size - 1);
 203        if (page_offset + fault_page_size > bo->resource->num_pages)
 204                goto out_fallback;
 205
 206        if (bo->resource->bus.is_iomem)
 207                pfn = ttm_bo_io_mem_pfn(bo, page_offset);
 208        else
 209                pfn = page_to_pfn(ttm->pages[page_offset]);
 210
 211        /* pfn must be fault_page_size aligned. */
 212        if ((pfn & (fault_page_size - 1)) != 0)
 213                goto out_fallback;
 214
 215        /* Check that memory is contiguous. */
 216        if (!bo->resource->bus.is_iomem) {
 217                for (i = 1; i < fault_page_size; ++i) {
 218                        if (page_to_pfn(ttm->pages[page_offset + i]) != pfn + i)
 219                                goto out_fallback;
 220                }
 221        } else if (bo->bdev->funcs->io_mem_pfn) {
 222                for (i = 1; i < fault_page_size; ++i) {
 223                        if (ttm_bo_io_mem_pfn(bo, page_offset + i) != pfn + i)
 224                                goto out_fallback;
 225                }
 226        }
 227
 228        pfnt = __pfn_to_pfn_t(pfn, PFN_DEV);
 229        if (fault_page_size == (HPAGE_PMD_SIZE >> PAGE_SHIFT))
 230                ret = vmf_insert_pfn_pmd_prot(vmf, pfnt, pgprot, write);
 231#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 232        else if (fault_page_size == (HPAGE_PUD_SIZE >> PAGE_SHIFT))
 233                ret = vmf_insert_pfn_pud_prot(vmf, pfnt, pgprot, write);
 234#endif
 235        else
 236                WARN_ON_ONCE(ret = VM_FAULT_FALLBACK);
 237
 238        if (ret != VM_FAULT_NOPAGE)
 239                goto out_fallback;
 240
 241        return VM_FAULT_NOPAGE;
 242out_fallback:
 243        count_vm_event(THP_FAULT_FALLBACK);
 244        return VM_FAULT_FALLBACK;
 245}
 246#else
 247static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf,
 248                                        struct ttm_buffer_object *bo,
 249                                        pgoff_t page_offset,
 250                                        pgoff_t fault_page_size,
 251                                        pgprot_t pgprot)
 252{
 253        return VM_FAULT_FALLBACK;
 254}
 255#endif
 256
 257/**
 258 * ttm_bo_vm_fault_reserved - TTM fault helper
 259 * @vmf: The struct vm_fault given as argument to the fault callback
 260 * @prot: The page protection to be used for this memory area.
 261 * @num_prefault: Maximum number of prefault pages. The caller may want to
 262 * specify this based on madvice settings and the size of the GPU object
 263 * backed by the memory.
 264 * @fault_page_size: The size of the fault in pages.
 265 *
 266 * This function inserts one or more page table entries pointing to the
 267 * memory backing the buffer object, and then returns a return code
 268 * instructing the caller to retry the page access.
 269 *
 270 * Return:
 271 *   VM_FAULT_NOPAGE on success or pending signal
 272 *   VM_FAULT_SIGBUS on unspecified error
 273 *   VM_FAULT_OOM on out-of-memory
 274 *   VM_FAULT_RETRY if retryable wait
 275 */
 276vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
 277                                    pgprot_t prot,
 278                                    pgoff_t num_prefault,
 279                                    pgoff_t fault_page_size)
 280{
 281        struct vm_area_struct *vma = vmf->vma;
 282        struct ttm_buffer_object *bo = vma->vm_private_data;
 283        struct ttm_device *bdev = bo->bdev;
 284        unsigned long page_offset;
 285        unsigned long page_last;
 286        unsigned long pfn;
 287        struct ttm_tt *ttm = NULL;
 288        struct page *page;
 289        int err;
 290        pgoff_t i;
 291        vm_fault_t ret = VM_FAULT_NOPAGE;
 292        unsigned long address = vmf->address;
 293
 294        /*
 295         * Wait for buffer data in transit, due to a pipelined
 296         * move.
 297         */
 298        ret = ttm_bo_vm_fault_idle(bo, vmf);
 299        if (unlikely(ret != 0))
 300                return ret;
 301
 302        err = ttm_mem_io_reserve(bdev, bo->resource);
 303        if (unlikely(err != 0))
 304                return VM_FAULT_SIGBUS;
 305
 306        page_offset = ((address - vma->vm_start) >> PAGE_SHIFT) +
 307                vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node);
 308        page_last = vma_pages(vma) + vma->vm_pgoff -
 309                drm_vma_node_start(&bo->base.vma_node);
 310
 311        if (unlikely(page_offset >= bo->resource->num_pages))
 312                return VM_FAULT_SIGBUS;
 313
 314        prot = ttm_io_prot(bo, bo->resource, prot);
 315        if (!bo->resource->bus.is_iomem) {
 316                struct ttm_operation_ctx ctx = {
 317                        .interruptible = false,
 318                        .no_wait_gpu = false,
 319                        .force_alloc = true
 320                };
 321
 322                ttm = bo->ttm;
 323                if (ttm_tt_populate(bdev, bo->ttm, &ctx))
 324                        return VM_FAULT_OOM;
 325        } else {
 326                /* Iomem should not be marked encrypted */
 327                prot = pgprot_decrypted(prot);
 328        }
 329
 330        /* We don't prefault on huge faults. Yet. */
 331        if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && fault_page_size != 1)
 332                return ttm_bo_vm_insert_huge(vmf, bo, page_offset,
 333                                             fault_page_size, prot);
 334
 335        /*
 336         * Speculatively prefault a number of pages. Only error on
 337         * first page.
 338         */
 339        for (i = 0; i < num_prefault; ++i) {
 340                if (bo->resource->bus.is_iomem) {
 341                        pfn = ttm_bo_io_mem_pfn(bo, page_offset);
 342                } else {
 343                        page = ttm->pages[page_offset];
 344                        if (unlikely(!page && i == 0)) {
 345                                return VM_FAULT_OOM;
 346                        } else if (unlikely(!page)) {
 347                                break;
 348                        }
 349                        page->index = drm_vma_node_start(&bo->base.vma_node) +
 350                                page_offset;
 351                        pfn = page_to_pfn(page);
 352                }
 353
 354                /*
 355                 * Note that the value of @prot at this point may differ from
 356                 * the value of @vma->vm_page_prot in the caching- and
 357                 * encryption bits. This is because the exact location of the
 358                 * data may not be known at mmap() time and may also change
 359                 * at arbitrary times while the data is mmap'ed.
 360                 * See vmf_insert_mixed_prot() for a discussion.
 361                 */
 362                ret = vmf_insert_pfn_prot(vma, address, pfn, prot);
 363
 364                /* Never error on prefaulted PTEs */
 365                if (unlikely((ret & VM_FAULT_ERROR))) {
 366                        if (i == 0)
 367                                return VM_FAULT_NOPAGE;
 368                        else
 369                                break;
 370                }
 371
 372                address += PAGE_SIZE;
 373                if (unlikely(++page_offset >= page_last))
 374                        break;
 375        }
 376        return ret;
 377}
 378EXPORT_SYMBOL(ttm_bo_vm_fault_reserved);
 379
 380static void ttm_bo_release_dummy_page(struct drm_device *dev, void *res)
 381{
 382        struct page *dummy_page = (struct page *)res;
 383
 384        __free_page(dummy_page);
 385}
 386
 387vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot)
 388{
 389        struct vm_area_struct *vma = vmf->vma;
 390        struct ttm_buffer_object *bo = vma->vm_private_data;
 391        struct drm_device *ddev = bo->base.dev;
 392        vm_fault_t ret = VM_FAULT_NOPAGE;
 393        unsigned long address;
 394        unsigned long pfn;
 395        struct page *page;
 396
 397        /* Allocate new dummy page to map all the VA range in this VMA to it*/
 398        page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 399        if (!page)
 400                return VM_FAULT_OOM;
 401
 402        /* Set the page to be freed using drmm release action */
 403        if (drmm_add_action_or_reset(ddev, ttm_bo_release_dummy_page, page))
 404                return VM_FAULT_OOM;
 405
 406        pfn = page_to_pfn(page);
 407
 408        /* Prefault the entire VMA range right away to avoid further faults */
 409        for (address = vma->vm_start; address < vma->vm_end;
 410             address += PAGE_SIZE)
 411                ret = vmf_insert_pfn_prot(vma, address, pfn, prot);
 412
 413        return ret;
 414}
 415EXPORT_SYMBOL(ttm_bo_vm_dummy_page);
 416
 417vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
 418{
 419        struct vm_area_struct *vma = vmf->vma;
 420        pgprot_t prot;
 421        struct ttm_buffer_object *bo = vma->vm_private_data;
 422        struct drm_device *ddev = bo->base.dev;
 423        vm_fault_t ret;
 424        int idx;
 425
 426        ret = ttm_bo_vm_reserve(bo, vmf);
 427        if (ret)
 428                return ret;
 429
 430        prot = vma->vm_page_prot;
 431        if (drm_dev_enter(ddev, &idx)) {
 432                ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1);
 433                drm_dev_exit(idx);
 434        } else {
 435                ret = ttm_bo_vm_dummy_page(vmf, prot);
 436        }
 437        if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
 438                return ret;
 439
 440        dma_resv_unlock(bo->base.resv);
 441
 442        return ret;
 443}
 444EXPORT_SYMBOL(ttm_bo_vm_fault);
 445
 446void ttm_bo_vm_open(struct vm_area_struct *vma)
 447{
 448        struct ttm_buffer_object *bo = vma->vm_private_data;
 449
 450        WARN_ON(bo->bdev->dev_mapping != vma->vm_file->f_mapping);
 451
 452        ttm_bo_get(bo);
 453}
 454EXPORT_SYMBOL(ttm_bo_vm_open);
 455
 456void ttm_bo_vm_close(struct vm_area_struct *vma)
 457{
 458        struct ttm_buffer_object *bo = vma->vm_private_data;
 459
 460        ttm_bo_put(bo);
 461        vma->vm_private_data = NULL;
 462}
 463EXPORT_SYMBOL(ttm_bo_vm_close);
 464
 465static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo,
 466                                 unsigned long offset,
 467                                 uint8_t *buf, int len, int write)
 468{
 469        unsigned long page = offset >> PAGE_SHIFT;
 470        unsigned long bytes_left = len;
 471        int ret;
 472
 473        /* Copy a page at a time, that way no extra virtual address
 474         * mapping is needed
 475         */
 476        offset -= page << PAGE_SHIFT;
 477        do {
 478                unsigned long bytes = min(bytes_left, PAGE_SIZE - offset);
 479                struct ttm_bo_kmap_obj map;
 480                void *ptr;
 481                bool is_iomem;
 482
 483                ret = ttm_bo_kmap(bo, page, 1, &map);
 484                if (ret)
 485                        return ret;
 486
 487                ptr = (uint8_t *)ttm_kmap_obj_virtual(&map, &is_iomem) + offset;
 488                WARN_ON_ONCE(is_iomem);
 489                if (write)
 490                        memcpy(ptr, buf, bytes);
 491                else
 492                        memcpy(buf, ptr, bytes);
 493                ttm_bo_kunmap(&map);
 494
 495                page++;
 496                buf += bytes;
 497                bytes_left -= bytes;
 498                offset = 0;
 499        } while (bytes_left);
 500
 501        return len;
 502}
 503
 504int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
 505                     void *buf, int len, int write)
 506{
 507        struct ttm_buffer_object *bo = vma->vm_private_data;
 508        unsigned long offset = (addr) - vma->vm_start +
 509                ((vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node))
 510                 << PAGE_SHIFT);
 511        int ret;
 512
 513        if (len < 1 || (offset + len) >> PAGE_SHIFT > bo->resource->num_pages)
 514                return -EIO;
 515
 516        ret = ttm_bo_reserve(bo, true, false, NULL);
 517        if (ret)
 518                return ret;
 519
 520        switch (bo->resource->mem_type) {
 521        case TTM_PL_SYSTEM:
 522                if (unlikely(bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) {
 523                        ret = ttm_tt_swapin(bo->ttm);
 524                        if (unlikely(ret != 0))
 525                                return ret;
 526                }
 527                fallthrough;
 528        case TTM_PL_TT:
 529                ret = ttm_bo_vm_access_kmap(bo, offset, buf, len, write);
 530                break;
 531        default:
 532                if (bo->bdev->funcs->access_memory)
 533                        ret = bo->bdev->funcs->access_memory(
 534                                bo, offset, buf, len, write);
 535                else
 536                        ret = -EIO;
 537        }
 538
 539        ttm_bo_unreserve(bo);
 540
 541        return ret;
 542}
 543EXPORT_SYMBOL(ttm_bo_vm_access);
 544
 545static const struct vm_operations_struct ttm_bo_vm_ops = {
 546        .fault = ttm_bo_vm_fault,
 547        .open = ttm_bo_vm_open,
 548        .close = ttm_bo_vm_close,
 549        .access = ttm_bo_vm_access,
 550};
 551
 552int ttm_bo_mmap_obj(struct vm_area_struct *vma, struct ttm_buffer_object *bo)
 553{
 554        /* Enforce no COW since would have really strange behavior with it. */
 555        if (is_cow_mapping(vma->vm_flags))
 556                return -EINVAL;
 557
 558        ttm_bo_get(bo);
 559
 560        /*
 561         * Drivers may want to override the vm_ops field. Otherwise we
 562         * use TTM's default callbacks.
 563         */
 564        if (!vma->vm_ops)
 565                vma->vm_ops = &ttm_bo_vm_ops;
 566
 567        /*
 568         * Note: We're transferring the bo reference to
 569         * vma->vm_private_data here.
 570         */
 571
 572        vma->vm_private_data = bo;
 573
 574        vma->vm_flags |= VM_PFNMAP;
 575        vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
 576        return 0;
 577}
 578EXPORT_SYMBOL(ttm_bo_mmap_obj);
 579