linux/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
<<
>>
Prefs
   1/*
   2 * Copyright 2009 Jerome Glisse.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the
   7 * "Software"), to deal in the Software without restriction, including
   8 * without limitation the rights to use, copy, modify, merge, publish,
   9 * distribute, sub license, and/or sell copies of the Software, and to
  10 * permit persons to whom the Software is furnished to do so, subject to
  11 * the following conditions:
  12 *
  13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20 *
  21 * The above copyright notice and this permission notice (including the
  22 * next paragraph) shall be included in all copies or substantial portions
  23 * of the Software.
  24 *
  25 */
  26/*
  27 * Authors:
  28 *    Jerome Glisse <glisse@freedesktop.org>
  29 *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
  30 *    Dave Airlie
  31 */
  32
  33#include <linux/dma-mapping.h>
  34#include <linux/iommu.h>
  35#include <linux/pagemap.h>
  36#include <linux/sched/task.h>
  37#include <linux/sched/mm.h>
  38#include <linux/seq_file.h>
  39#include <linux/slab.h>
  40#include <linux/swap.h>
  41#include <linux/swiotlb.h>
  42#include <linux/dma-buf.h>
  43#include <linux/sizes.h>
  44
  45#include <drm/ttm/ttm_bo_api.h>
  46#include <drm/ttm/ttm_bo_driver.h>
  47#include <drm/ttm/ttm_placement.h>
  48#include <drm/ttm/ttm_range_manager.h>
  49
  50#include <drm/amdgpu_drm.h>
  51
  52#include "amdgpu.h"
  53#include "amdgpu_object.h"
  54#include "amdgpu_trace.h"
  55#include "amdgpu_amdkfd.h"
  56#include "amdgpu_sdma.h"
  57#include "amdgpu_ras.h"
  58#include "amdgpu_atomfirmware.h"
  59#include "amdgpu_res_cursor.h"
  60#include "bif/bif_4_1_d.h"
  61
  62#define AMDGPU_TTM_VRAM_MAX_DW_READ     (size_t)128
  63
  64static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
  65                                   struct ttm_tt *ttm,
  66                                   struct ttm_resource *bo_mem);
  67static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
  68                                      struct ttm_tt *ttm);
  69
  70static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev,
  71                                    unsigned int type,
  72                                    uint64_t size_in_page)
  73{
  74        return ttm_range_man_init(&adev->mman.bdev, type,
  75                                  false, size_in_page);
  76}
  77
  78/**
  79 * amdgpu_evict_flags - Compute placement flags
  80 *
  81 * @bo: The buffer object to evict
  82 * @placement: Possible destination(s) for evicted BO
  83 *
  84 * Fill in placement data when ttm_bo_evict() is called
  85 */
  86static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
  87                                struct ttm_placement *placement)
  88{
  89        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
  90        struct amdgpu_bo *abo;
  91        static const struct ttm_place placements = {
  92                .fpfn = 0,
  93                .lpfn = 0,
  94                .mem_type = TTM_PL_SYSTEM,
  95                .flags = 0
  96        };
  97
  98        /* Don't handle scatter gather BOs */
  99        if (bo->type == ttm_bo_type_sg) {
 100                placement->num_placement = 0;
 101                placement->num_busy_placement = 0;
 102                return;
 103        }
 104
 105        /* Object isn't an AMDGPU object so ignore */
 106        if (!amdgpu_bo_is_amdgpu_bo(bo)) {
 107                placement->placement = &placements;
 108                placement->busy_placement = &placements;
 109                placement->num_placement = 1;
 110                placement->num_busy_placement = 1;
 111                return;
 112        }
 113
 114        abo = ttm_to_amdgpu_bo(bo);
 115        if (abo->flags & AMDGPU_AMDKFD_CREATE_SVM_BO) {
 116                struct dma_fence *fence;
 117                struct dma_resv *resv = &bo->base._resv;
 118
 119                rcu_read_lock();
 120                fence = rcu_dereference(resv->fence_excl);
 121                if (fence && !fence->ops->signaled)
 122                        dma_fence_enable_sw_signaling(fence);
 123
 124                placement->num_placement = 0;
 125                placement->num_busy_placement = 0;
 126                rcu_read_unlock();
 127                return;
 128        }
 129
 130        switch (bo->resource->mem_type) {
 131        case AMDGPU_PL_GDS:
 132        case AMDGPU_PL_GWS:
 133        case AMDGPU_PL_OA:
 134                placement->num_placement = 0;
 135                placement->num_busy_placement = 0;
 136                return;
 137
 138        case TTM_PL_VRAM:
 139                if (!adev->mman.buffer_funcs_enabled) {
 140                        /* Move to system memory */
 141                        amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
 142                } else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
 143                           !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
 144                           amdgpu_bo_in_cpu_visible_vram(abo)) {
 145
 146                        /* Try evicting to the CPU inaccessible part of VRAM
 147                         * first, but only set GTT as busy placement, so this
 148                         * BO will be evicted to GTT rather than causing other
 149                         * BOs to be evicted from VRAM
 150                         */
 151                        amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
 152                                                        AMDGPU_GEM_DOMAIN_GTT |
 153                                                        AMDGPU_GEM_DOMAIN_CPU);
 154                        abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
 155                        abo->placements[0].lpfn = 0;
 156                        abo->placement.busy_placement = &abo->placements[1];
 157                        abo->placement.num_busy_placement = 1;
 158                } else {
 159                        /* Move to GTT memory */
 160                        amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT |
 161                                                        AMDGPU_GEM_DOMAIN_CPU);
 162                }
 163                break;
 164        case TTM_PL_TT:
 165        case AMDGPU_PL_PREEMPT:
 166        default:
 167                amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
 168                break;
 169        }
 170        *placement = abo->placement;
 171}
 172
 173/**
 174 * amdgpu_ttm_map_buffer - Map memory into the GART windows
 175 * @bo: buffer object to map
 176 * @mem: memory object to map
 177 * @mm_cur: range to map
 178 * @num_pages: number of pages to map
 179 * @window: which GART window to use
 180 * @ring: DMA ring to use for the copy
 181 * @tmz: if we should setup a TMZ enabled mapping
 182 * @addr: resulting address inside the MC address space
 183 *
 184 * Setup one of the GART windows to access a specific piece of memory or return
 185 * the physical address for local memory.
 186 */
 187static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
 188                                 struct ttm_resource *mem,
 189                                 struct amdgpu_res_cursor *mm_cur,
 190                                 unsigned num_pages, unsigned window,
 191                                 struct amdgpu_ring *ring, bool tmz,
 192                                 uint64_t *addr)
 193{
 194        struct amdgpu_device *adev = ring->adev;
 195        struct amdgpu_job *job;
 196        unsigned num_dw, num_bytes;
 197        struct dma_fence *fence;
 198        uint64_t src_addr, dst_addr;
 199        void *cpu_addr;
 200        uint64_t flags;
 201        unsigned int i;
 202        int r;
 203
 204        BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
 205               AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
 206        BUG_ON(mem->mem_type == AMDGPU_PL_PREEMPT);
 207
 208        /* Map only what can't be accessed directly */
 209        if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
 210                *addr = amdgpu_ttm_domain_start(adev, mem->mem_type) +
 211                        mm_cur->start;
 212                return 0;
 213        }
 214
 215        *addr = adev->gmc.gart_start;
 216        *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
 217                AMDGPU_GPU_PAGE_SIZE;
 218        *addr += mm_cur->start & ~PAGE_MASK;
 219
 220        num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
 221        num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
 222
 223        r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
 224                                     AMDGPU_IB_POOL_DELAYED, &job);
 225        if (r)
 226                return r;
 227
 228        src_addr = num_dw * 4;
 229        src_addr += job->ibs[0].gpu_addr;
 230
 231        dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
 232        dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
 233        amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
 234                                dst_addr, num_bytes, false);
 235
 236        amdgpu_ring_pad_ib(ring, &job->ibs[0]);
 237        WARN_ON(job->ibs[0].length_dw > num_dw);
 238
 239        flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, mem);
 240        if (tmz)
 241                flags |= AMDGPU_PTE_TMZ;
 242
 243        cpu_addr = &job->ibs[0].ptr[num_dw];
 244
 245        if (mem->mem_type == TTM_PL_TT) {
 246                dma_addr_t *dma_addr;
 247
 248                dma_addr = &bo->ttm->dma_address[mm_cur->start >> PAGE_SHIFT];
 249                r = amdgpu_gart_map(adev, 0, num_pages, dma_addr, flags,
 250                                    cpu_addr);
 251                if (r)
 252                        goto error_free;
 253        } else {
 254                dma_addr_t dma_address;
 255
 256                dma_address = mm_cur->start;
 257                dma_address += adev->vm_manager.vram_base_offset;
 258
 259                for (i = 0; i < num_pages; ++i) {
 260                        r = amdgpu_gart_map(adev, i << PAGE_SHIFT, 1,
 261                                            &dma_address, flags, cpu_addr);
 262                        if (r)
 263                                goto error_free;
 264
 265                        dma_address += PAGE_SIZE;
 266                }
 267        }
 268
 269        r = amdgpu_job_submit(job, &adev->mman.entity,
 270                              AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
 271        if (r)
 272                goto error_free;
 273
 274        dma_fence_put(fence);
 275
 276        return r;
 277
 278error_free:
 279        amdgpu_job_free(job);
 280        return r;
 281}
 282
 283/**
 284 * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
 285 * @adev: amdgpu device
 286 * @src: buffer/address where to read from
 287 * @dst: buffer/address where to write to
 288 * @size: number of bytes to copy
 289 * @tmz: if a secure copy should be used
 290 * @resv: resv object to sync to
 291 * @f: Returns the last fence if multiple jobs are submitted.
 292 *
 293 * The function copies @size bytes from {src->mem + src->offset} to
 294 * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
 295 * move and different for a BO to BO copy.
 296 *
 297 */
 298int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 299                               const struct amdgpu_copy_mem *src,
 300                               const struct amdgpu_copy_mem *dst,
 301                               uint64_t size, bool tmz,
 302                               struct dma_resv *resv,
 303                               struct dma_fence **f)
 304{
 305        const uint32_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
 306                                        AMDGPU_GPU_PAGE_SIZE);
 307
 308        struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
 309        struct amdgpu_res_cursor src_mm, dst_mm;
 310        struct dma_fence *fence = NULL;
 311        int r = 0;
 312
 313        if (!adev->mman.buffer_funcs_enabled) {
 314                DRM_ERROR("Trying to move memory with ring turned off.\n");
 315                return -EINVAL;
 316        }
 317
 318        amdgpu_res_first(src->mem, src->offset, size, &src_mm);
 319        amdgpu_res_first(dst->mem, dst->offset, size, &dst_mm);
 320
 321        mutex_lock(&adev->mman.gtt_window_lock);
 322        while (src_mm.remaining) {
 323                uint32_t src_page_offset = src_mm.start & ~PAGE_MASK;
 324                uint32_t dst_page_offset = dst_mm.start & ~PAGE_MASK;
 325                struct dma_fence *next;
 326                uint32_t cur_size;
 327                uint64_t from, to;
 328
 329                /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
 330                 * begins at an offset, then adjust the size accordingly
 331                 */
 332                cur_size = max(src_page_offset, dst_page_offset);
 333                cur_size = min(min3(src_mm.size, dst_mm.size, size),
 334                               (uint64_t)(GTT_MAX_BYTES - cur_size));
 335
 336                /* Map src to window 0 and dst to window 1. */
 337                r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm,
 338                                          PFN_UP(cur_size + src_page_offset),
 339                                          0, ring, tmz, &from);
 340                if (r)
 341                        goto error;
 342
 343                r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm,
 344                                          PFN_UP(cur_size + dst_page_offset),
 345                                          1, ring, tmz, &to);
 346                if (r)
 347                        goto error;
 348
 349                r = amdgpu_copy_buffer(ring, from, to, cur_size,
 350                                       resv, &next, false, true, tmz);
 351                if (r)
 352                        goto error;
 353
 354                dma_fence_put(fence);
 355                fence = next;
 356
 357                amdgpu_res_next(&src_mm, cur_size);
 358                amdgpu_res_next(&dst_mm, cur_size);
 359        }
 360error:
 361        mutex_unlock(&adev->mman.gtt_window_lock);
 362        if (f)
 363                *f = dma_fence_get(fence);
 364        dma_fence_put(fence);
 365        return r;
 366}
 367
 368/*
 369 * amdgpu_move_blit - Copy an entire buffer to another buffer
 370 *
 371 * This is a helper called by amdgpu_bo_move() and amdgpu_move_vram_ram() to
 372 * help move buffers to and from VRAM.
 373 */
 374static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 375                            bool evict,
 376                            struct ttm_resource *new_mem,
 377                            struct ttm_resource *old_mem)
 378{
 379        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
 380        struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
 381        struct amdgpu_copy_mem src, dst;
 382        struct dma_fence *fence = NULL;
 383        int r;
 384
 385        src.bo = bo;
 386        dst.bo = bo;
 387        src.mem = old_mem;
 388        dst.mem = new_mem;
 389        src.offset = 0;
 390        dst.offset = 0;
 391
 392        r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
 393                                       new_mem->num_pages << PAGE_SHIFT,
 394                                       amdgpu_bo_encrypted(abo),
 395                                       bo->base.resv, &fence);
 396        if (r)
 397                goto error;
 398
 399        /* clear the space being freed */
 400        if (old_mem->mem_type == TTM_PL_VRAM &&
 401            (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
 402                struct dma_fence *wipe_fence = NULL;
 403
 404                r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON,
 405                                       NULL, &wipe_fence);
 406                if (r) {
 407                        goto error;
 408                } else if (wipe_fence) {
 409                        dma_fence_put(fence);
 410                        fence = wipe_fence;
 411                }
 412        }
 413
 414        /* Always block for VM page tables before committing the new location */
 415        if (bo->type == ttm_bo_type_kernel)
 416                r = ttm_bo_move_accel_cleanup(bo, fence, true, false, new_mem);
 417        else
 418                r = ttm_bo_move_accel_cleanup(bo, fence, evict, true, new_mem);
 419        dma_fence_put(fence);
 420        return r;
 421
 422error:
 423        if (fence)
 424                dma_fence_wait(fence, false);
 425        dma_fence_put(fence);
 426        return r;
 427}
 428
 429/*
 430 * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy
 431 *
 432 * Called by amdgpu_bo_move()
 433 */
 434static bool amdgpu_mem_visible(struct amdgpu_device *adev,
 435                               struct ttm_resource *mem)
 436{
 437        uint64_t mem_size = (u64)mem->num_pages << PAGE_SHIFT;
 438        struct amdgpu_res_cursor cursor;
 439
 440        if (mem->mem_type == TTM_PL_SYSTEM ||
 441            mem->mem_type == TTM_PL_TT)
 442                return true;
 443        if (mem->mem_type != TTM_PL_VRAM)
 444                return false;
 445
 446        amdgpu_res_first(mem, 0, mem_size, &cursor);
 447
 448        /* ttm_resource_ioremap only supports contiguous memory */
 449        if (cursor.size != mem_size)
 450                return false;
 451
 452        return cursor.start + cursor.size <= adev->gmc.visible_vram_size;
 453}
 454
 455/*
 456 * amdgpu_bo_move - Move a buffer object to a new memory location
 457 *
 458 * Called by ttm_bo_handle_move_mem()
 459 */
 460static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 461                          struct ttm_operation_ctx *ctx,
 462                          struct ttm_resource *new_mem,
 463                          struct ttm_place *hop)
 464{
 465        struct amdgpu_device *adev;
 466        struct amdgpu_bo *abo;
 467        struct ttm_resource *old_mem = bo->resource;
 468        int r;
 469
 470        if (new_mem->mem_type == TTM_PL_TT ||
 471            new_mem->mem_type == AMDGPU_PL_PREEMPT) {
 472                r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, new_mem);
 473                if (r)
 474                        return r;
 475        }
 476
 477        /* Can't move a pinned BO */
 478        abo = ttm_to_amdgpu_bo(bo);
 479        if (WARN_ON_ONCE(abo->tbo.pin_count > 0))
 480                return -EINVAL;
 481
 482        adev = amdgpu_ttm_adev(bo->bdev);
 483
 484        if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
 485                ttm_bo_move_null(bo, new_mem);
 486                goto out;
 487        }
 488        if (old_mem->mem_type == TTM_PL_SYSTEM &&
 489            (new_mem->mem_type == TTM_PL_TT ||
 490             new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
 491                ttm_bo_move_null(bo, new_mem);
 492                goto out;
 493        }
 494        if ((old_mem->mem_type == TTM_PL_TT ||
 495             old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
 496            new_mem->mem_type == TTM_PL_SYSTEM) {
 497                r = ttm_bo_wait_ctx(bo, ctx);
 498                if (r)
 499                        return r;
 500
 501                amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
 502                ttm_resource_free(bo, &bo->resource);
 503                ttm_bo_assign_mem(bo, new_mem);
 504                goto out;
 505        }
 506
 507        if (old_mem->mem_type == AMDGPU_PL_GDS ||
 508            old_mem->mem_type == AMDGPU_PL_GWS ||
 509            old_mem->mem_type == AMDGPU_PL_OA ||
 510            new_mem->mem_type == AMDGPU_PL_GDS ||
 511            new_mem->mem_type == AMDGPU_PL_GWS ||
 512            new_mem->mem_type == AMDGPU_PL_OA) {
 513                /* Nothing to save here */
 514                ttm_bo_move_null(bo, new_mem);
 515                goto out;
 516        }
 517
 518        if (bo->type == ttm_bo_type_device &&
 519            new_mem->mem_type == TTM_PL_VRAM &&
 520            old_mem->mem_type != TTM_PL_VRAM) {
 521                /* amdgpu_bo_fault_reserve_notify will re-set this if the CPU
 522                 * accesses the BO after it's moved.
 523                 */
 524                abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 525        }
 526
 527        if (adev->mman.buffer_funcs_enabled) {
 528                if (((old_mem->mem_type == TTM_PL_SYSTEM &&
 529                      new_mem->mem_type == TTM_PL_VRAM) ||
 530                     (old_mem->mem_type == TTM_PL_VRAM &&
 531                      new_mem->mem_type == TTM_PL_SYSTEM))) {
 532                        hop->fpfn = 0;
 533                        hop->lpfn = 0;
 534                        hop->mem_type = TTM_PL_TT;
 535                        hop->flags = TTM_PL_FLAG_TEMPORARY;
 536                        return -EMULTIHOP;
 537                }
 538
 539                r = amdgpu_move_blit(bo, evict, new_mem, old_mem);
 540        } else {
 541                r = -ENODEV;
 542        }
 543
 544        if (r) {
 545                /* Check that all memory is CPU accessible */
 546                if (!amdgpu_mem_visible(adev, old_mem) ||
 547                    !amdgpu_mem_visible(adev, new_mem)) {
 548                        pr_err("Move buffer fallback to memcpy unavailable\n");
 549                        return r;
 550                }
 551
 552                r = ttm_bo_move_memcpy(bo, ctx, new_mem);
 553                if (r)
 554                        return r;
 555        }
 556
 557out:
 558        /* update statistics */
 559        atomic64_add(bo->base.size, &adev->num_bytes_moved);
 560        amdgpu_bo_move_notify(bo, evict, new_mem);
 561        return 0;
 562}
 563
 564/*
 565 * amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault
 566 *
 567 * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault()
 568 */
 569static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
 570                                     struct ttm_resource *mem)
 571{
 572        struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
 573        size_t bus_size = (size_t)mem->num_pages << PAGE_SHIFT;
 574
 575        switch (mem->mem_type) {
 576        case TTM_PL_SYSTEM:
 577                /* system memory */
 578                return 0;
 579        case TTM_PL_TT:
 580        case AMDGPU_PL_PREEMPT:
 581                break;
 582        case TTM_PL_VRAM:
 583                mem->bus.offset = mem->start << PAGE_SHIFT;
 584                /* check if it's visible */
 585                if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size)
 586                        return -EINVAL;
 587
 588                if (adev->mman.aper_base_kaddr &&
 589                    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
 590                        mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr +
 591                                        mem->bus.offset;
 592
 593                mem->bus.offset += adev->gmc.aper_base;
 594                mem->bus.is_iomem = true;
 595                break;
 596        default:
 597                return -EINVAL;
 598        }
 599        return 0;
 600}
 601
 602static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
 603                                           unsigned long page_offset)
 604{
 605        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
 606        struct amdgpu_res_cursor cursor;
 607
 608        amdgpu_res_first(bo->resource, (u64)page_offset << PAGE_SHIFT, 0,
 609                         &cursor);
 610        return (adev->gmc.aper_base + cursor.start) >> PAGE_SHIFT;
 611}
 612
 613/**
 614 * amdgpu_ttm_domain_start - Returns GPU start address
 615 * @adev: amdgpu device object
 616 * @type: type of the memory
 617 *
 618 * Returns:
 619 * GPU start address of a memory domain
 620 */
 621
 622uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type)
 623{
 624        switch (type) {
 625        case TTM_PL_TT:
 626                return adev->gmc.gart_start;
 627        case TTM_PL_VRAM:
 628                return adev->gmc.vram_start;
 629        }
 630
 631        return 0;
 632}
 633
 634/*
 635 * TTM backend functions.
 636 */
 637struct amdgpu_ttm_tt {
 638        struct ttm_tt   ttm;
 639        struct drm_gem_object   *gobj;
 640        u64                     offset;
 641        uint64_t                userptr;
 642        struct task_struct      *usertask;
 643        uint32_t                userflags;
 644        bool                    bound;
 645#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
 646        struct hmm_range        *range;
 647#endif
 648};
 649
 650#ifdef CONFIG_DRM_AMDGPU_USERPTR
 651/*
 652 * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
 653 * memory and start HMM tracking CPU page table update
 654 *
 655 * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
 656 * once afterwards to stop HMM tracking
 657 */
 658int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
 659{
 660        struct ttm_tt *ttm = bo->tbo.ttm;
 661        struct amdgpu_ttm_tt *gtt = (void *)ttm;
 662        unsigned long start = gtt->userptr;
 663        struct vm_area_struct *vma;
 664        struct mm_struct *mm;
 665        bool readonly;
 666        int r = 0;
 667
 668        mm = bo->notifier.mm;
 669        if (unlikely(!mm)) {
 670                DRM_DEBUG_DRIVER("BO is not registered?\n");
 671                return -EFAULT;
 672        }
 673
 674        /* Another get_user_pages is running at the same time?? */
 675        if (WARN_ON(gtt->range))
 676                return -EFAULT;
 677
 678        if (!mmget_not_zero(mm)) /* Happens during process shutdown */
 679                return -ESRCH;
 680
 681        mmap_read_lock(mm);
 682        vma = vma_lookup(mm, start);
 683        if (unlikely(!vma)) {
 684                r = -EFAULT;
 685                goto out_unlock;
 686        }
 687        if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
 688                vma->vm_file)) {
 689                r = -EPERM;
 690                goto out_unlock;
 691        }
 692
 693        readonly = amdgpu_ttm_tt_is_readonly(ttm);
 694        r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start,
 695                                       ttm->num_pages, &gtt->range, readonly,
 696                                       true, NULL);
 697out_unlock:
 698        mmap_read_unlock(mm);
 699        mmput(mm);
 700
 701        return r;
 702}
 703
 704/*
 705 * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
 706 * Check if the pages backing this ttm range have been invalidated
 707 *
 708 * Returns: true if pages are still valid
 709 */
 710bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
 711{
 712        struct amdgpu_ttm_tt *gtt = (void *)ttm;
 713        bool r = false;
 714
 715        if (!gtt || !gtt->userptr)
 716                return false;
 717
 718        DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%x\n",
 719                gtt->userptr, ttm->num_pages);
 720
 721        WARN_ONCE(!gtt->range || !gtt->range->hmm_pfns,
 722                "No user pages to check\n");
 723
 724        if (gtt->range) {
 725                /*
 726                 * FIXME: Must always hold notifier_lock for this, and must
 727                 * not ignore the return code.
 728                 */
 729                r = amdgpu_hmm_range_get_pages_done(gtt->range);
 730                gtt->range = NULL;
 731        }
 732
 733        return !r;
 734}
 735#endif
 736
 737/*
 738 * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
 739 *
 740 * Called by amdgpu_cs_list_validate(). This creates the page list
 741 * that backs user memory and will ultimately be mapped into the device
 742 * address space.
 743 */
 744void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
 745{
 746        unsigned long i;
 747
 748        for (i = 0; i < ttm->num_pages; ++i)
 749                ttm->pages[i] = pages ? pages[i] : NULL;
 750}
 751
 752/*
 753 * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the user pages
 754 *
 755 * Called by amdgpu_ttm_backend_bind()
 756 **/
 757static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev,
 758                                     struct ttm_tt *ttm)
 759{
 760        struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
 761        struct amdgpu_ttm_tt *gtt = (void *)ttm;
 762        int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
 763        enum dma_data_direction direction = write ?
 764                DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
 765        int r;
 766
 767        /* Allocate an SG array and squash pages into it */
 768        r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
 769                                      (u64)ttm->num_pages << PAGE_SHIFT,
 770                                      GFP_KERNEL);
 771        if (r)
 772                goto release_sg;
 773
 774        /* Map SG to device */
 775        r = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
 776        if (r)
 777                goto release_sg;
 778
 779        /* convert SG to linear array of pages and dma addresses */
 780        drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address,
 781                                       ttm->num_pages);
 782
 783        return 0;
 784
 785release_sg:
 786        kfree(ttm->sg);
 787        ttm->sg = NULL;
 788        return r;
 789}
 790
 791/*
 792 * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages
 793 */
 794static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,
 795                                        struct ttm_tt *ttm)
 796{
 797        struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
 798        struct amdgpu_ttm_tt *gtt = (void *)ttm;
 799        int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
 800        enum dma_data_direction direction = write ?
 801                DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
 802
 803        /* double check that we don't free the table twice */
 804        if (!ttm->sg || !ttm->sg->sgl)
 805                return;
 806
 807        /* unmap the pages mapped to the device */
 808        dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
 809        sg_free_table(ttm->sg);
 810
 811#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
 812        if (gtt->range) {
 813                unsigned long i;
 814
 815                for (i = 0; i < ttm->num_pages; i++) {
 816                        if (ttm->pages[i] !=
 817                            hmm_pfn_to_page(gtt->range->hmm_pfns[i]))
 818                                break;
 819                }
 820
 821                WARN((i == ttm->num_pages), "Missing get_user_page_done\n");
 822        }
 823#endif
 824}
 825
 826static int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
 827                                struct ttm_buffer_object *tbo,
 828                                uint64_t flags)
 829{
 830        struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
 831        struct ttm_tt *ttm = tbo->ttm;
 832        struct amdgpu_ttm_tt *gtt = (void *)ttm;
 833        int r;
 834
 835        if (amdgpu_bo_encrypted(abo))
 836                flags |= AMDGPU_PTE_TMZ;
 837
 838        if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
 839                uint64_t page_idx = 1;
 840
 841                r = amdgpu_gart_bind(adev, gtt->offset, page_idx,
 842                                gtt->ttm.dma_address, flags);
 843                if (r)
 844                        goto gart_bind_fail;
 845
 846                /* The memory type of the first page defaults to UC. Now
 847                 * modify the memory type to NC from the second page of
 848                 * the BO onward.
 849                 */
 850                flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
 851                flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
 852
 853                r = amdgpu_gart_bind(adev,
 854                                gtt->offset + (page_idx << PAGE_SHIFT),
 855                                ttm->num_pages - page_idx,
 856                                &(gtt->ttm.dma_address[page_idx]), flags);
 857        } else {
 858                r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
 859                                     gtt->ttm.dma_address, flags);
 860        }
 861
 862gart_bind_fail:
 863        if (r)
 864                DRM_ERROR("failed to bind %u pages at 0x%08llX\n",
 865                          ttm->num_pages, gtt->offset);
 866
 867        return r;
 868}
 869
 870/*
 871 * amdgpu_ttm_backend_bind - Bind GTT memory
 872 *
 873 * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem().
 874 * This handles binding GTT memory to the device address space.
 875 */
 876static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
 877                                   struct ttm_tt *ttm,
 878                                   struct ttm_resource *bo_mem)
 879{
 880        struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
 881        struct amdgpu_ttm_tt *gtt = (void*)ttm;
 882        uint64_t flags;
 883        int r = 0;
 884
 885        if (!bo_mem)
 886                return -EINVAL;
 887
 888        if (gtt->bound)
 889                return 0;
 890
 891        if (gtt->userptr) {
 892                r = amdgpu_ttm_tt_pin_userptr(bdev, ttm);
 893                if (r) {
 894                        DRM_ERROR("failed to pin userptr\n");
 895                        return r;
 896                }
 897        } else if (ttm->page_flags & TTM_PAGE_FLAG_SG) {
 898                if (!ttm->sg) {
 899                        struct dma_buf_attachment *attach;
 900                        struct sg_table *sgt;
 901
 902                        attach = gtt->gobj->import_attach;
 903                        sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
 904                        if (IS_ERR(sgt))
 905                                return PTR_ERR(sgt);
 906
 907                        ttm->sg = sgt;
 908                }
 909
 910                drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address,
 911                                               ttm->num_pages);
 912        }
 913
 914        if (!ttm->num_pages) {
 915                WARN(1, "nothing to bind %u pages for mreg %p back %p!\n",
 916                     ttm->num_pages, bo_mem, ttm);
 917        }
 918
 919        if (bo_mem->mem_type == AMDGPU_PL_GDS ||
 920            bo_mem->mem_type == AMDGPU_PL_GWS ||
 921            bo_mem->mem_type == AMDGPU_PL_OA)
 922                return -EINVAL;
 923
 924        if (bo_mem->mem_type != TTM_PL_TT ||
 925            !amdgpu_gtt_mgr_has_gart_addr(bo_mem)) {
 926                gtt->offset = AMDGPU_BO_INVALID_OFFSET;
 927                return 0;
 928        }
 929
 930        /* compute PTE flags relevant to this BO memory */
 931        flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem);
 932
 933        /* bind pages into GART page tables */
 934        gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
 935        r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
 936                gtt->ttm.dma_address, flags);
 937
 938        if (r)
 939                DRM_ERROR("failed to bind %u pages at 0x%08llX\n",
 940                          ttm->num_pages, gtt->offset);
 941        gtt->bound = true;
 942        return r;
 943}
 944
 945/*
 946 * amdgpu_ttm_alloc_gart - Make sure buffer object is accessible either
 947 * through AGP or GART aperture.
 948 *
 949 * If bo is accessible through AGP aperture, then use AGP aperture
 950 * to access bo; otherwise allocate logical space in GART aperture
 951 * and map bo to GART aperture.
 952 */
 953int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
 954{
 955        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
 956        struct ttm_operation_ctx ctx = { false, false };
 957        struct amdgpu_ttm_tt *gtt = (void *)bo->ttm;
 958        struct ttm_placement placement;
 959        struct ttm_place placements;
 960        struct ttm_resource *tmp;
 961        uint64_t addr, flags;
 962        int r;
 963
 964        if (bo->resource->start != AMDGPU_BO_INVALID_OFFSET)
 965                return 0;
 966
 967        addr = amdgpu_gmc_agp_addr(bo);
 968        if (addr != AMDGPU_BO_INVALID_OFFSET) {
 969                bo->resource->start = addr >> PAGE_SHIFT;
 970                return 0;
 971        }
 972
 973        /* allocate GART space */
 974        placement.num_placement = 1;
 975        placement.placement = &placements;
 976        placement.num_busy_placement = 1;
 977        placement.busy_placement = &placements;
 978        placements.fpfn = 0;
 979        placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
 980        placements.mem_type = TTM_PL_TT;
 981        placements.flags = bo->resource->placement;
 982
 983        r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx);
 984        if (unlikely(r))
 985                return r;
 986
 987        /* compute PTE flags for this buffer object */
 988        flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, tmp);
 989
 990        /* Bind pages */
 991        gtt->offset = (u64)tmp->start << PAGE_SHIFT;
 992        r = amdgpu_ttm_gart_bind(adev, bo, flags);
 993        if (unlikely(r)) {
 994                ttm_resource_free(bo, &tmp);
 995                return r;
 996        }
 997
 998        amdgpu_gart_invalidate_tlb(adev);
 999        ttm_resource_free(bo, &bo->resource);
1000        ttm_bo_assign_mem(bo, tmp);
1001
1002        return 0;
1003}
1004
1005/*
1006 * amdgpu_ttm_recover_gart - Rebind GTT pages
1007 *
1008 * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to
1009 * rebind GTT pages during a GPU reset.
1010 */
1011int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
1012{
1013        struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
1014        uint64_t flags;
1015        int r;
1016
1017        if (!tbo->ttm)
1018                return 0;
1019
1020        flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, tbo->resource);
1021        r = amdgpu_ttm_gart_bind(adev, tbo, flags);
1022
1023        return r;
1024}
1025
1026/*
1027 * amdgpu_ttm_backend_unbind - Unbind GTT mapped pages
1028 *
1029 * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and
1030 * ttm_tt_destroy().
1031 */
1032static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
1033                                      struct ttm_tt *ttm)
1034{
1035        struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
1036        struct amdgpu_ttm_tt *gtt = (void *)ttm;
1037        int r;
1038
1039        /* if the pages have userptr pinning then clear that first */
1040        if (gtt->userptr) {
1041                amdgpu_ttm_tt_unpin_userptr(bdev, ttm);
1042        } else if (ttm->sg && gtt->gobj->import_attach) {
1043                struct dma_buf_attachment *attach;
1044
1045                attach = gtt->gobj->import_attach;
1046                dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL);
1047                ttm->sg = NULL;
1048        }
1049
1050        if (!gtt->bound)
1051                return;
1052
1053        if (gtt->offset == AMDGPU_BO_INVALID_OFFSET)
1054                return;
1055
1056        /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
1057        r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages);
1058        if (r)
1059                DRM_ERROR("failed to unbind %u pages at 0x%08llX\n",
1060                          gtt->ttm.num_pages, gtt->offset);
1061        gtt->bound = false;
1062}
1063
1064static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev,
1065                                       struct ttm_tt *ttm)
1066{
1067        struct amdgpu_ttm_tt *gtt = (void *)ttm;
1068
1069        amdgpu_ttm_backend_unbind(bdev, ttm);
1070        ttm_tt_destroy_common(bdev, ttm);
1071        if (gtt->usertask)
1072                put_task_struct(gtt->usertask);
1073
1074        ttm_tt_fini(&gtt->ttm);
1075        kfree(gtt);
1076}
1077
1078/**
1079 * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO
1080 *
1081 * @bo: The buffer object to create a GTT ttm_tt object around
1082 * @page_flags: Page flags to be added to the ttm_tt object
1083 *
1084 * Called by ttm_tt_create().
1085 */
1086static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
1087                                           uint32_t page_flags)
1088{
1089        struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
1090        struct amdgpu_ttm_tt *gtt;
1091        enum ttm_caching caching;
1092
1093        gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
1094        if (gtt == NULL) {
1095                return NULL;
1096        }
1097        gtt->gobj = &bo->base;
1098
1099        if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
1100                caching = ttm_write_combined;
1101        else
1102                caching = ttm_cached;
1103
1104        /* allocate space for the uninitialized page entries */
1105        if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags, caching)) {
1106                kfree(gtt);
1107                return NULL;
1108        }
1109        return &gtt->ttm;
1110}
1111
1112/*
1113 * amdgpu_ttm_tt_populate - Map GTT pages visible to the device
1114 *
1115 * Map the pages of a ttm_tt object to an address space visible
1116 * to the underlying device.
1117 */
1118static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
1119                                  struct ttm_tt *ttm,
1120                                  struct ttm_operation_ctx *ctx)
1121{
1122        struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
1123        struct amdgpu_ttm_tt *gtt = (void *)ttm;
1124
1125        /* user pages are bound by amdgpu_ttm_tt_pin_userptr() */
1126        if (gtt->userptr) {
1127                ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
1128                if (!ttm->sg)
1129                        return -ENOMEM;
1130                return 0;
1131        }
1132
1133        if (ttm->page_flags & TTM_PAGE_FLAG_SG)
1134                return 0;
1135
1136        return ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx);
1137}
1138
1139/*
1140 * amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays
1141 *
1142 * Unmaps pages of a ttm_tt object from the device address space and
1143 * unpopulates the page array backing it.
1144 */
1145static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
1146                                     struct ttm_tt *ttm)
1147{
1148        struct amdgpu_ttm_tt *gtt = (void *)ttm;
1149        struct amdgpu_device *adev;
1150
1151        if (gtt->userptr) {
1152                amdgpu_ttm_tt_set_user_pages(ttm, NULL);
1153                kfree(ttm->sg);
1154                ttm->sg = NULL;
1155                return;
1156        }
1157
1158        if (ttm->page_flags & TTM_PAGE_FLAG_SG)
1159                return;
1160
1161        adev = amdgpu_ttm_adev(bdev);
1162        return ttm_pool_free(&adev->mman.bdev.pool, ttm);
1163}
1164
1165/**
1166 * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt for the current
1167 * task
1168 *
1169 * @bo: The ttm_buffer_object to bind this userptr to
1170 * @addr:  The address in the current tasks VM space to use
1171 * @flags: Requirements of userptr object.
1172 *
1173 * Called by amdgpu_gem_userptr_ioctl() to bind userptr pages
1174 * to current task
1175 */
1176int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
1177                              uint64_t addr, uint32_t flags)
1178{
1179        struct amdgpu_ttm_tt *gtt;
1180
1181        if (!bo->ttm) {
1182                /* TODO: We want a separate TTM object type for userptrs */
1183                bo->ttm = amdgpu_ttm_tt_create(bo, 0);
1184                if (bo->ttm == NULL)
1185                        return -ENOMEM;
1186        }
1187
1188        /* Set TTM_PAGE_FLAG_SG before populate but after create. */
1189        bo->ttm->page_flags |= TTM_PAGE_FLAG_SG;
1190
1191        gtt = (void *)bo->ttm;
1192        gtt->userptr = addr;
1193        gtt->userflags = flags;
1194
1195        if (gtt->usertask)
1196                put_task_struct(gtt->usertask);
1197        gtt->usertask = current->group_leader;
1198        get_task_struct(gtt->usertask);
1199
1200        return 0;
1201}
1202
1203/*
1204 * amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object
1205 */
1206struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
1207{
1208        struct amdgpu_ttm_tt *gtt = (void *)ttm;
1209
1210        if (gtt == NULL)
1211                return NULL;
1212
1213        if (gtt->usertask == NULL)
1214                return NULL;
1215
1216        return gtt->usertask->mm;
1217}
1218
1219/*
1220 * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays inside an
1221 * address range for the current task.
1222 *
1223 */
1224bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
1225                                  unsigned long end)
1226{
1227        struct amdgpu_ttm_tt *gtt = (void *)ttm;
1228        unsigned long size;
1229
1230        if (gtt == NULL || !gtt->userptr)
1231                return false;
1232
1233        /* Return false if no part of the ttm_tt object lies within
1234         * the range
1235         */
1236        size = (unsigned long)gtt->ttm.num_pages * PAGE_SIZE;
1237        if (gtt->userptr > end || gtt->userptr + size <= start)
1238                return false;
1239
1240        return true;
1241}
1242
1243/*
1244 * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
1245 */
1246bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
1247{
1248        struct amdgpu_ttm_tt *gtt = (void *)ttm;
1249
1250        if (gtt == NULL || !gtt->userptr)
1251                return false;
1252
1253        return true;
1254}
1255
1256/*
1257 * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only?
1258 */
1259bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
1260{
1261        struct amdgpu_ttm_tt *gtt = (void *)ttm;
1262
1263        if (gtt == NULL)
1264                return false;
1265
1266        return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
1267}
1268
1269/**
1270 * amdgpu_ttm_tt_pde_flags - Compute PDE flags for ttm_tt object
1271 *
1272 * @ttm: The ttm_tt object to compute the flags for
1273 * @mem: The memory registry backing this ttm_tt object
1274 *
1275 * Figure out the flags to use for a VM PDE (Page Directory Entry).
1276 */
1277uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)
1278{
1279        uint64_t flags = 0;
1280
1281        if (mem && mem->mem_type != TTM_PL_SYSTEM)
1282                flags |= AMDGPU_PTE_VALID;
1283
1284        if (mem && (mem->mem_type == TTM_PL_TT ||
1285                    mem->mem_type == AMDGPU_PL_PREEMPT)) {
1286                flags |= AMDGPU_PTE_SYSTEM;
1287
1288                if (ttm->caching == ttm_cached)
1289                        flags |= AMDGPU_PTE_SNOOPED;
1290        }
1291
1292        if (mem && mem->mem_type == TTM_PL_VRAM &&
1293                        mem->bus.caching == ttm_cached)
1294                flags |= AMDGPU_PTE_SNOOPED;
1295
1296        return flags;
1297}
1298
1299/**
1300 * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
1301 *
1302 * @adev: amdgpu_device pointer
1303 * @ttm: The ttm_tt object to compute the flags for
1304 * @mem: The memory registry backing this ttm_tt object
1305 *
1306 * Figure out the flags to use for a VM PTE (Page Table Entry).
1307 */
1308uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
1309                                 struct ttm_resource *mem)
1310{
1311        uint64_t flags = amdgpu_ttm_tt_pde_flags(ttm, mem);
1312
1313        flags |= adev->gart.gart_pte_flags;
1314        flags |= AMDGPU_PTE_READABLE;
1315
1316        if (!amdgpu_ttm_tt_is_readonly(ttm))
1317                flags |= AMDGPU_PTE_WRITEABLE;
1318
1319        return flags;
1320}
1321
1322/*
1323 * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict a buffer
1324 * object.
1325 *
1326 * Return true if eviction is sensible. Called by ttm_mem_evict_first() on
1327 * behalf of ttm_bo_mem_force_space() which tries to evict buffer objects until
1328 * it can find space for a new object and by ttm_bo_force_list_clean() which is
1329 * used to clean out a memory space.
1330 */
1331static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
1332                                            const struct ttm_place *place)
1333{
1334        unsigned long num_pages = bo->resource->num_pages;
1335        struct amdgpu_res_cursor cursor;
1336        struct dma_resv_list *flist;
1337        struct dma_fence *f;
1338        int i;
1339
1340        /* Swapout? */
1341        if (bo->resource->mem_type == TTM_PL_SYSTEM)
1342                return true;
1343
1344        if (bo->type == ttm_bo_type_kernel &&
1345            !amdgpu_vm_evictable(ttm_to_amdgpu_bo(bo)))
1346                return false;
1347
1348        /* If bo is a KFD BO, check if the bo belongs to the current process.
1349         * If true, then return false as any KFD process needs all its BOs to
1350         * be resident to run successfully
1351         */
1352        flist = dma_resv_shared_list(bo->base.resv);
1353        if (flist) {
1354                for (i = 0; i < flist->shared_count; ++i) {
1355                        f = rcu_dereference_protected(flist->shared[i],
1356                                dma_resv_held(bo->base.resv));
1357                        if (amdkfd_fence_check_mm(f, current->mm))
1358                                return false;
1359                }
1360        }
1361
1362        switch (bo->resource->mem_type) {
1363        case AMDGPU_PL_PREEMPT:
1364                /* Preemptible BOs don't own system resources managed by the
1365                 * driver (pages, VRAM, GART space). They point to resources
1366                 * owned by someone else (e.g. pageable memory in user mode
1367                 * or a DMABuf). They are used in a preemptible context so we
1368                 * can guarantee no deadlocks and good QoS in case of MMU
1369                 * notifiers or DMABuf move notifiers from the resource owner.
1370                 */
1371                return false;
1372        case TTM_PL_TT:
1373                if (amdgpu_bo_is_amdgpu_bo(bo) &&
1374                    amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
1375                        return false;
1376                return true;
1377
1378        case TTM_PL_VRAM:
1379                /* Check each drm MM node individually */
1380                amdgpu_res_first(bo->resource, 0, (u64)num_pages << PAGE_SHIFT,
1381                                 &cursor);
1382                while (cursor.remaining) {
1383                        if (place->fpfn < PFN_DOWN(cursor.start + cursor.size)
1384                            && !(place->lpfn &&
1385                                 place->lpfn <= PFN_DOWN(cursor.start)))
1386                                return true;
1387
1388                        amdgpu_res_next(&cursor, cursor.size);
1389                }
1390                return false;
1391
1392        default:
1393                break;
1394        }
1395
1396        return ttm_bo_eviction_valuable(bo, place);
1397}
1398
1399static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
1400                                      void *buf, size_t size, bool write)
1401{
1402        while (size) {
1403                uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
1404                uint64_t bytes = 4 - (pos & 0x3);
1405                uint32_t shift = (pos & 0x3) * 8;
1406                uint32_t mask = 0xffffffff << shift;
1407                uint32_t value = 0;
1408
1409                if (size < bytes) {
1410                        mask &= 0xffffffff >> (bytes - size) * 8;
1411                        bytes = size;
1412                }
1413
1414                if (mask != 0xffffffff) {
1415                        amdgpu_device_mm_access(adev, aligned_pos, &value, 4, false);
1416                        if (write) {
1417                                value &= ~mask;
1418                                value |= (*(uint32_t *)buf << shift) & mask;
1419                                amdgpu_device_mm_access(adev, aligned_pos, &value, 4, true);
1420                        } else {
1421                                value = (value & mask) >> shift;
1422                                memcpy(buf, &value, bytes);
1423                        }
1424                } else {
1425                        amdgpu_device_mm_access(adev, aligned_pos, buf, 4, write);
1426                }
1427
1428                pos += bytes;
1429                buf += bytes;
1430                size -= bytes;
1431        }
1432}
1433
1434/**
1435 * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
1436 *
1437 * @bo:  The buffer object to read/write
1438 * @offset:  Offset into buffer object
1439 * @buf:  Secondary buffer to write/read from
1440 * @len: Length in bytes of access
1441 * @write:  true if writing
1442 *
1443 * This is used to access VRAM that backs a buffer object via MMIO
1444 * access for debugging purposes.
1445 */
1446static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
1447                                    unsigned long offset, void *buf, int len,
1448                                    int write)
1449{
1450        struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
1451        struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
1452        struct amdgpu_res_cursor cursor;
1453        int ret = 0;
1454
1455        if (bo->resource->mem_type != TTM_PL_VRAM)
1456                return -EIO;
1457
1458        amdgpu_res_first(bo->resource, offset, len, &cursor);
1459        while (cursor.remaining) {
1460                size_t count, size = cursor.size;
1461                loff_t pos = cursor.start;
1462
1463                count = amdgpu_device_aper_access(adev, pos, buf, size, write);
1464                size -= count;
1465                if (size) {
1466                        /* using MM to access rest vram and handle un-aligned address */
1467                        pos += count;
1468                        buf += count;
1469                        amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write);
1470                }
1471
1472                ret += cursor.size;
1473                buf += cursor.size;
1474                amdgpu_res_next(&cursor, cursor.size);
1475        }
1476
1477        return ret;
1478}
1479
1480static void
1481amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo)
1482{
1483        amdgpu_bo_move_notify(bo, false, NULL);
1484}
1485
1486static struct ttm_device_funcs amdgpu_bo_driver = {
1487        .ttm_tt_create = &amdgpu_ttm_tt_create,
1488        .ttm_tt_populate = &amdgpu_ttm_tt_populate,
1489        .ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate,
1490        .ttm_tt_destroy = &amdgpu_ttm_backend_destroy,
1491        .eviction_valuable = amdgpu_ttm_bo_eviction_valuable,
1492        .evict_flags = &amdgpu_evict_flags,
1493        .move = &amdgpu_bo_move,
1494        .delete_mem_notify = &amdgpu_bo_delete_mem_notify,
1495        .release_notify = &amdgpu_bo_release_notify,
1496        .io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
1497        .io_mem_pfn = amdgpu_ttm_io_mem_pfn,
1498        .access_memory = &amdgpu_ttm_access_memory,
1499        .del_from_lru_notify = &amdgpu_vm_del_from_lru_notify
1500};
1501
1502/*
1503 * Firmware Reservation functions
1504 */
1505/**
1506 * amdgpu_ttm_fw_reserve_vram_fini - free fw reserved vram
1507 *
1508 * @adev: amdgpu_device pointer
1509 *
1510 * free fw reserved vram if it has been reserved.
1511 */
1512static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
1513{
1514        amdgpu_bo_free_kernel(&adev->mman.fw_vram_usage_reserved_bo,
1515                NULL, &adev->mman.fw_vram_usage_va);
1516}
1517
1518/**
1519 * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw
1520 *
1521 * @adev: amdgpu_device pointer
1522 *
1523 * create bo vram reservation from fw.
1524 */
1525static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
1526{
1527        uint64_t vram_size = adev->gmc.visible_vram_size;
1528
1529        adev->mman.fw_vram_usage_va = NULL;
1530        adev->mman.fw_vram_usage_reserved_bo = NULL;
1531
1532        if (adev->mman.fw_vram_usage_size == 0 ||
1533            adev->mman.fw_vram_usage_size > vram_size)
1534                return 0;
1535
1536        return amdgpu_bo_create_kernel_at(adev,
1537                                          adev->mman.fw_vram_usage_start_offset,
1538                                          adev->mman.fw_vram_usage_size,
1539                                          AMDGPU_GEM_DOMAIN_VRAM,
1540                                          &adev->mman.fw_vram_usage_reserved_bo,
1541                                          &adev->mman.fw_vram_usage_va);
1542}
1543
1544/*
1545 * Memoy training reservation functions
1546 */
1547
1548/**
1549 * amdgpu_ttm_training_reserve_vram_fini - free memory training reserved vram
1550 *
1551 * @adev: amdgpu_device pointer
1552 *
1553 * free memory training reserved vram if it has been reserved.
1554 */
1555static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev)
1556{
1557        struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1558
1559        ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
1560        amdgpu_bo_free_kernel(&ctx->c2p_bo, NULL, NULL);
1561        ctx->c2p_bo = NULL;
1562
1563        return 0;
1564}
1565
1566static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)
1567{
1568        struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1569
1570        memset(ctx, 0, sizeof(*ctx));
1571
1572        ctx->c2p_train_data_offset =
1573                ALIGN((adev->gmc.mc_vram_size - adev->mman.discovery_tmr_size - SZ_1M), SZ_1M);
1574        ctx->p2c_train_data_offset =
1575                (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
1576        ctx->train_data_size =
1577                GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES;
1578
1579        DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
1580                        ctx->train_data_size,
1581                        ctx->p2c_train_data_offset,
1582                        ctx->c2p_train_data_offset);
1583}
1584
1585/*
1586 * reserve TMR memory at the top of VRAM which holds
1587 * IP Discovery data and is protected by PSP.
1588 */
1589static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
1590{
1591        int ret;
1592        struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1593        bool mem_train_support = false;
1594
1595        if (!amdgpu_sriov_vf(adev)) {
1596                if (amdgpu_atomfirmware_mem_training_supported(adev))
1597                        mem_train_support = true;
1598                else
1599                        DRM_DEBUG("memory training does not support!\n");
1600        }
1601
1602        /*
1603         * Query reserved tmr size through atom firmwareinfo for Sienna_Cichlid and onwards for all
1604         * the use cases (IP discovery/G6 memory training/profiling/diagnostic data.etc)
1605         *
1606         * Otherwise, fallback to legacy approach to check and reserve tmr block for ip
1607         * discovery data and G6 memory training data respectively
1608         */
1609        adev->mman.discovery_tmr_size =
1610                amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
1611        if (!adev->mman.discovery_tmr_size)
1612                adev->mman.discovery_tmr_size = DISCOVERY_TMR_OFFSET;
1613
1614        if (mem_train_support) {
1615                /* reserve vram for mem train according to TMR location */
1616                amdgpu_ttm_training_data_block_init(adev);
1617                ret = amdgpu_bo_create_kernel_at(adev,
1618                                         ctx->c2p_train_data_offset,
1619                                         ctx->train_data_size,
1620                                         AMDGPU_GEM_DOMAIN_VRAM,
1621                                         &ctx->c2p_bo,
1622                                         NULL);
1623                if (ret) {
1624                        DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
1625                        amdgpu_ttm_training_reserve_vram_fini(adev);
1626                        return ret;
1627                }
1628                ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
1629        }
1630
1631        ret = amdgpu_bo_create_kernel_at(adev,
1632                                adev->gmc.real_vram_size - adev->mman.discovery_tmr_size,
1633                                adev->mman.discovery_tmr_size,
1634                                AMDGPU_GEM_DOMAIN_VRAM,
1635                                &adev->mman.discovery_memory,
1636                                NULL);
1637        if (ret) {
1638                DRM_ERROR("alloc tmr failed(%d)!\n", ret);
1639                amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
1640                return ret;
1641        }
1642
1643        return 0;
1644}
1645
1646/*
1647 * amdgpu_ttm_init - Init the memory management (ttm) as well as various
1648 * gtt/vram related fields.
1649 *
1650 * This initializes all of the memory space pools that the TTM layer
1651 * will need such as the GTT space (system memory mapped to the device),
1652 * VRAM (on-board memory), and on-chip memories (GDS, GWS, OA) which
1653 * can be mapped per VMID.
1654 */
1655int amdgpu_ttm_init(struct amdgpu_device *adev)
1656{
1657        uint64_t gtt_size;
1658        int r;
1659        u64 vis_vram_limit;
1660
1661        mutex_init(&adev->mman.gtt_window_lock);
1662
1663        /* No others user of address space so set it to 0 */
1664        r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
1665                               adev_to_drm(adev)->anon_inode->i_mapping,
1666                               adev_to_drm(adev)->vma_offset_manager,
1667                               adev->need_swiotlb,
1668                               dma_addressing_limited(adev->dev));
1669        if (r) {
1670                DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
1671                return r;
1672        }
1673        adev->mman.initialized = true;
1674
1675        /* Initialize VRAM pool with all of VRAM divided into pages */
1676        r = amdgpu_vram_mgr_init(adev);
1677        if (r) {
1678                DRM_ERROR("Failed initializing VRAM heap.\n");
1679                return r;
1680        }
1681
1682        /* Reduce size of CPU-visible VRAM if requested */
1683        vis_vram_limit = (u64)amdgpu_vis_vram_limit * 1024 * 1024;
1684        if (amdgpu_vis_vram_limit > 0 &&
1685            vis_vram_limit <= adev->gmc.visible_vram_size)
1686                adev->gmc.visible_vram_size = vis_vram_limit;
1687
1688        /* Change the size here instead of the init above so only lpfn is affected */
1689        amdgpu_ttm_set_buffer_funcs_status(adev, false);
1690#ifdef CONFIG_64BIT
1691#ifdef CONFIG_X86
1692        if (adev->gmc.xgmi.connected_to_cpu)
1693                adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base,
1694                                adev->gmc.visible_vram_size);
1695
1696        else
1697#endif
1698                adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
1699                                adev->gmc.visible_vram_size);
1700#endif
1701
1702        /*
1703         *The reserved vram for firmware must be pinned to the specified
1704         *place on the VRAM, so reserve it early.
1705         */
1706        r = amdgpu_ttm_fw_reserve_vram_init(adev);
1707        if (r) {
1708                return r;
1709        }
1710
1711        /*
1712         * only NAVI10 and onwards ASIC support for IP discovery.
1713         * If IP discovery enabled, a block of memory should be
1714         * reserved for IP discovey.
1715         */
1716        if (adev->mman.discovery_bin) {
1717                r = amdgpu_ttm_reserve_tmr(adev);
1718                if (r)
1719                        return r;
1720        }
1721
1722        /* allocate memory as required for VGA
1723         * This is used for VGA emulation and pre-OS scanout buffers to
1724         * avoid display artifacts while transitioning between pre-OS
1725         * and driver.  */
1726        r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size,
1727                                       AMDGPU_GEM_DOMAIN_VRAM,
1728                                       &adev->mman.stolen_vga_memory,
1729                                       NULL);
1730        if (r)
1731                return r;
1732        r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
1733                                       adev->mman.stolen_extended_size,
1734                                       AMDGPU_GEM_DOMAIN_VRAM,
1735                                       &adev->mman.stolen_extended_memory,
1736                                       NULL);
1737        if (r)
1738                return r;
1739        r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset,
1740                                       adev->mman.stolen_reserved_size,
1741                                       AMDGPU_GEM_DOMAIN_VRAM,
1742                                       &adev->mman.stolen_reserved_memory,
1743                                       NULL);
1744        if (r)
1745                return r;
1746
1747        DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
1748                 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
1749
1750        /* Compute GTT size, either bsaed on 3/4th the size of RAM size
1751         * or whatever the user passed on module init */
1752        if (amdgpu_gtt_size == -1) {
1753                struct sysinfo si;
1754
1755                si_meminfo(&si);
1756                gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
1757                               adev->gmc.mc_vram_size),
1758                               ((uint64_t)si.totalram * si.mem_unit * 3/4));
1759        }
1760        else
1761                gtt_size = (uint64_t)amdgpu_gtt_size << 20;
1762
1763        /* Initialize GTT memory pool */
1764        r = amdgpu_gtt_mgr_init(adev, gtt_size);
1765        if (r) {
1766                DRM_ERROR("Failed initializing GTT heap.\n");
1767                return r;
1768        }
1769        DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
1770                 (unsigned)(gtt_size / (1024 * 1024)));
1771
1772        /* Initialize preemptible memory pool */
1773        r = amdgpu_preempt_mgr_init(adev);
1774        if (r) {
1775                DRM_ERROR("Failed initializing PREEMPT heap.\n");
1776                return r;
1777        }
1778
1779        /* Initialize various on-chip memory pools */
1780        r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS, adev->gds.gds_size);
1781        if (r) {
1782                DRM_ERROR("Failed initializing GDS heap.\n");
1783                return r;
1784        }
1785
1786        r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GWS, adev->gds.gws_size);
1787        if (r) {
1788                DRM_ERROR("Failed initializing gws heap.\n");
1789                return r;
1790        }
1791
1792        r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_OA, adev->gds.oa_size);
1793        if (r) {
1794                DRM_ERROR("Failed initializing oa heap.\n");
1795                return r;
1796        }
1797
1798        return 0;
1799}
1800
1801/*
1802 * amdgpu_ttm_fini - De-initialize the TTM memory pools
1803 */
1804void amdgpu_ttm_fini(struct amdgpu_device *adev)
1805{
1806        if (!adev->mman.initialized)
1807                return;
1808
1809        amdgpu_ttm_training_reserve_vram_fini(adev);
1810        /* return the stolen vga memory back to VRAM */
1811        amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
1812        amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
1813        /* return the IP Discovery TMR memory back to VRAM */
1814        amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
1815        if (adev->mman.stolen_reserved_size)
1816                amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
1817                                      NULL, NULL);
1818        amdgpu_ttm_fw_reserve_vram_fini(adev);
1819
1820        amdgpu_vram_mgr_fini(adev);
1821        amdgpu_gtt_mgr_fini(adev);
1822        amdgpu_preempt_mgr_fini(adev);
1823        ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS);
1824        ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
1825        ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
1826        ttm_device_fini(&adev->mman.bdev);
1827        adev->mman.initialized = false;
1828        DRM_INFO("amdgpu: ttm finalized\n");
1829}
1830
1831/**
1832 * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer functions
1833 *
1834 * @adev: amdgpu_device pointer
1835 * @enable: true when we can use buffer functions.
1836 *
1837 * Enable/disable use of buffer functions during suspend/resume. This should
1838 * only be called at bootup or when userspace isn't running.
1839 */
1840void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
1841{
1842        struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
1843        uint64_t size;
1844        int r;
1845
1846        if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
1847            adev->mman.buffer_funcs_enabled == enable)
1848                return;
1849
1850        if (enable) {
1851                struct amdgpu_ring *ring;
1852                struct drm_gpu_scheduler *sched;
1853
1854                ring = adev->mman.buffer_funcs_ring;
1855                sched = &ring->sched;
1856                r = drm_sched_entity_init(&adev->mman.entity,
1857                                          DRM_SCHED_PRIORITY_KERNEL, &sched,
1858                                          1, NULL);
1859                if (r) {
1860                        DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
1861                                  r);
1862                        return;
1863                }
1864        } else {
1865                drm_sched_entity_destroy(&adev->mman.entity);
1866                dma_fence_put(man->move);
1867                man->move = NULL;
1868        }
1869
1870        /* this just adjusts TTM size idea, which sets lpfn to the correct value */
1871        if (enable)
1872                size = adev->gmc.real_vram_size;
1873        else
1874                size = adev->gmc.visible_vram_size;
1875        man->size = size >> PAGE_SHIFT;
1876        adev->mman.buffer_funcs_enabled = enable;
1877}
1878
1879int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
1880                       uint64_t dst_offset, uint32_t byte_count,
1881                       struct dma_resv *resv,
1882                       struct dma_fence **fence, bool direct_submit,
1883                       bool vm_needs_flush, bool tmz)
1884{
1885        enum amdgpu_ib_pool_type pool = direct_submit ? AMDGPU_IB_POOL_DIRECT :
1886                AMDGPU_IB_POOL_DELAYED;
1887        struct amdgpu_device *adev = ring->adev;
1888        struct amdgpu_job *job;
1889
1890        uint32_t max_bytes;
1891        unsigned num_loops, num_dw;
1892        unsigned i;
1893        int r;
1894
1895        if (direct_submit && !ring->sched.ready) {
1896                DRM_ERROR("Trying to move memory with ring turned off.\n");
1897                return -EINVAL;
1898        }
1899
1900        max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
1901        num_loops = DIV_ROUND_UP(byte_count, max_bytes);
1902        num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
1903
1904        r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, pool, &job);
1905        if (r)
1906                return r;
1907
1908        if (vm_needs_flush) {
1909                job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo ?
1910                                        adev->gmc.pdb0_bo : adev->gart.bo);
1911                job->vm_needs_flush = true;
1912        }
1913        if (resv) {
1914                r = amdgpu_sync_resv(adev, &job->sync, resv,
1915                                     AMDGPU_SYNC_ALWAYS,
1916                                     AMDGPU_FENCE_OWNER_UNDEFINED);
1917                if (r) {
1918                        DRM_ERROR("sync failed (%d).\n", r);
1919                        goto error_free;
1920                }
1921        }
1922
1923        for (i = 0; i < num_loops; i++) {
1924                uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
1925
1926                amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
1927                                        dst_offset, cur_size_in_bytes, tmz);
1928
1929                src_offset += cur_size_in_bytes;
1930                dst_offset += cur_size_in_bytes;
1931                byte_count -= cur_size_in_bytes;
1932        }
1933
1934        amdgpu_ring_pad_ib(ring, &job->ibs[0]);
1935        WARN_ON(job->ibs[0].length_dw > num_dw);
1936        if (direct_submit)
1937                r = amdgpu_job_submit_direct(job, ring, fence);
1938        else
1939                r = amdgpu_job_submit(job, &adev->mman.entity,
1940                                      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
1941        if (r)
1942                goto error_free;
1943
1944        return r;
1945
1946error_free:
1947        amdgpu_job_free(job);
1948        DRM_ERROR("Error scheduling IBs (%d)\n", r);
1949        return r;
1950}
1951
1952int amdgpu_fill_buffer(struct amdgpu_bo *bo,
1953                       uint32_t src_data,
1954                       struct dma_resv *resv,
1955                       struct dma_fence **fence)
1956{
1957        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
1958        uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
1959        struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
1960
1961        struct amdgpu_res_cursor cursor;
1962        unsigned int num_loops, num_dw;
1963        uint64_t num_bytes;
1964
1965        struct amdgpu_job *job;
1966        int r;
1967
1968        if (!adev->mman.buffer_funcs_enabled) {
1969                DRM_ERROR("Trying to clear memory with ring turned off.\n");
1970                return -EINVAL;
1971        }
1972
1973        if (bo->tbo.resource->mem_type == AMDGPU_PL_PREEMPT) {
1974                DRM_ERROR("Trying to clear preemptible memory.\n");
1975                return -EINVAL;
1976        }
1977
1978        if (bo->tbo.resource->mem_type == TTM_PL_TT) {
1979                r = amdgpu_ttm_alloc_gart(&bo->tbo);
1980                if (r)
1981                        return r;
1982        }
1983
1984        num_bytes = bo->tbo.resource->num_pages << PAGE_SHIFT;
1985        num_loops = 0;
1986
1987        amdgpu_res_first(bo->tbo.resource, 0, num_bytes, &cursor);
1988        while (cursor.remaining) {
1989                num_loops += DIV_ROUND_UP_ULL(cursor.size, max_bytes);
1990                amdgpu_res_next(&cursor, cursor.size);
1991        }
1992        num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw;
1993
1994        /* for IB padding */
1995        num_dw += 64;
1996
1997        r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED,
1998                                     &job);
1999        if (r)
2000                return r;
2001
2002        if (resv) {
2003                r = amdgpu_sync_resv(adev, &job->sync, resv,
2004                                     AMDGPU_SYNC_ALWAYS,
2005                                     AMDGPU_FENCE_OWNER_UNDEFINED);
2006                if (r) {
2007                        DRM_ERROR("sync failed (%d).\n", r);
2008                        goto error_free;
2009                }
2010        }
2011
2012        amdgpu_res_first(bo->tbo.resource, 0, num_bytes, &cursor);
2013        while (cursor.remaining) {
2014                uint32_t cur_size = min_t(uint64_t, cursor.size, max_bytes);
2015                uint64_t dst_addr = cursor.start;
2016
2017                dst_addr += amdgpu_ttm_domain_start(adev,
2018                                                    bo->tbo.resource->mem_type);
2019                amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, dst_addr,
2020                                        cur_size);
2021
2022                amdgpu_res_next(&cursor, cur_size);
2023        }
2024
2025        amdgpu_ring_pad_ib(ring, &job->ibs[0]);
2026        WARN_ON(job->ibs[0].length_dw > num_dw);
2027        r = amdgpu_job_submit(job, &adev->mman.entity,
2028                              AMDGPU_FENCE_OWNER_UNDEFINED, fence);
2029        if (r)
2030                goto error_free;
2031
2032        return 0;
2033
2034error_free:
2035        amdgpu_job_free(job);
2036        return r;
2037}
2038
2039#if defined(CONFIG_DEBUG_FS)
2040
2041static int amdgpu_mm_vram_table_show(struct seq_file *m, void *unused)
2042{
2043        struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
2044        struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
2045                                                            TTM_PL_VRAM);
2046        struct drm_printer p = drm_seq_file_printer(m);
2047
2048        man->func->debug(man, &p);
2049        return 0;
2050}
2051
2052static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused)
2053{
2054        struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
2055
2056        return ttm_pool_debugfs(&adev->mman.bdev.pool, m);
2057}
2058
2059static int amdgpu_mm_tt_table_show(struct seq_file *m, void *unused)
2060{
2061        struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
2062        struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
2063                                                            TTM_PL_TT);
2064        struct drm_printer p = drm_seq_file_printer(m);
2065
2066        man->func->debug(man, &p);
2067        return 0;
2068}
2069
2070static int amdgpu_mm_gds_table_show(struct seq_file *m, void *unused)
2071{
2072        struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
2073        struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
2074                                                            AMDGPU_PL_GDS);
2075        struct drm_printer p = drm_seq_file_printer(m);
2076
2077        man->func->debug(man, &p);
2078        return 0;
2079}
2080
2081static int amdgpu_mm_gws_table_show(struct seq_file *m, void *unused)
2082{
2083        struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
2084        struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
2085                                                            AMDGPU_PL_GWS);
2086        struct drm_printer p = drm_seq_file_printer(m);
2087
2088        man->func->debug(man, &p);
2089        return 0;
2090}
2091
2092static int amdgpu_mm_oa_table_show(struct seq_file *m, void *unused)
2093{
2094        struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
2095        struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
2096                                                            AMDGPU_PL_OA);
2097        struct drm_printer p = drm_seq_file_printer(m);
2098
2099        man->func->debug(man, &p);
2100        return 0;
2101}
2102
2103DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_vram_table);
2104DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_tt_table);
2105DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_gds_table);
2106DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_gws_table);
2107DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_oa_table);
2108DEFINE_SHOW_ATTRIBUTE(amdgpu_ttm_page_pool);
2109
2110/*
2111 * amdgpu_ttm_vram_read - Linear read access to VRAM
2112 *
2113 * Accesses VRAM via MMIO for debugging purposes.
2114 */
2115static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
2116                                    size_t size, loff_t *pos)
2117{
2118        struct amdgpu_device *adev = file_inode(f)->i_private;
2119        ssize_t result = 0;
2120
2121        if (size & 0x3 || *pos & 0x3)
2122                return -EINVAL;
2123
2124        if (*pos >= adev->gmc.mc_vram_size)
2125                return -ENXIO;
2126
2127        size = min(size, (size_t)(adev->gmc.mc_vram_size - *pos));
2128        while (size) {
2129                size_t bytes = min(size, AMDGPU_TTM_VRAM_MAX_DW_READ * 4);
2130                uint32_t value[AMDGPU_TTM_VRAM_MAX_DW_READ];
2131
2132                amdgpu_device_vram_access(adev, *pos, value, bytes, false);
2133                if (copy_to_user(buf, value, bytes))
2134                        return -EFAULT;
2135
2136                result += bytes;
2137                buf += bytes;
2138                *pos += bytes;
2139                size -= bytes;
2140        }
2141
2142        return result;
2143}
2144
2145/*
2146 * amdgpu_ttm_vram_write - Linear write access to VRAM
2147 *
2148 * Accesses VRAM via MMIO for debugging purposes.
2149 */
2150static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
2151                                    size_t size, loff_t *pos)
2152{
2153        struct amdgpu_device *adev = file_inode(f)->i_private;
2154        ssize_t result = 0;
2155        int r;
2156
2157        if (size & 0x3 || *pos & 0x3)
2158                return -EINVAL;
2159
2160        if (*pos >= adev->gmc.mc_vram_size)
2161                return -ENXIO;
2162
2163        while (size) {
2164                uint32_t value;
2165
2166                if (*pos >= adev->gmc.mc_vram_size)
2167                        return result;
2168
2169                r = get_user(value, (uint32_t *)buf);
2170                if (r)
2171                        return r;
2172
2173                amdgpu_device_mm_access(adev, *pos, &value, 4, true);
2174
2175                result += 4;
2176                buf += 4;
2177                *pos += 4;
2178                size -= 4;
2179        }
2180
2181        return result;
2182}
2183
2184static const struct file_operations amdgpu_ttm_vram_fops = {
2185        .owner = THIS_MODULE,
2186        .read = amdgpu_ttm_vram_read,
2187        .write = amdgpu_ttm_vram_write,
2188        .llseek = default_llseek,
2189};
2190
2191/*
2192 * amdgpu_iomem_read - Virtual read access to GPU mapped memory
2193 *
2194 * This function is used to read memory that has been mapped to the
2195 * GPU and the known addresses are not physical addresses but instead
2196 * bus addresses (e.g., what you'd put in an IB or ring buffer).
2197 */
2198static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
2199                                 size_t size, loff_t *pos)
2200{
2201        struct amdgpu_device *adev = file_inode(f)->i_private;
2202        struct iommu_domain *dom;
2203        ssize_t result = 0;
2204        int r;
2205
2206        /* retrieve the IOMMU domain if any for this device */
2207        dom = iommu_get_domain_for_dev(adev->dev);
2208
2209        while (size) {
2210                phys_addr_t addr = *pos & PAGE_MASK;
2211                loff_t off = *pos & ~PAGE_MASK;
2212                size_t bytes = PAGE_SIZE - off;
2213                unsigned long pfn;
2214                struct page *p;
2215                void *ptr;
2216
2217                bytes = bytes < size ? bytes : size;
2218
2219                /* Translate the bus address to a physical address.  If
2220                 * the domain is NULL it means there is no IOMMU active
2221                 * and the address translation is the identity
2222                 */
2223                addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
2224
2225                pfn = addr >> PAGE_SHIFT;
2226                if (!pfn_valid(pfn))
2227                        return -EPERM;
2228
2229                p = pfn_to_page(pfn);
2230                if (p->mapping != adev->mman.bdev.dev_mapping)
2231                        return -EPERM;
2232
2233                ptr = kmap(p);
2234                r = copy_to_user(buf, ptr + off, bytes);
2235                kunmap(p);
2236                if (r)
2237                        return -EFAULT;
2238
2239                size -= bytes;
2240                *pos += bytes;
2241                result += bytes;
2242        }
2243
2244        return result;
2245}
2246
2247/*
2248 * amdgpu_iomem_write - Virtual write access to GPU mapped memory
2249 *
2250 * This function is used to write memory that has been mapped to the
2251 * GPU and the known addresses are not physical addresses but instead
2252 * bus addresses (e.g., what you'd put in an IB or ring buffer).
2253 */
2254static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
2255                                 size_t size, loff_t *pos)
2256{
2257        struct amdgpu_device *adev = file_inode(f)->i_private;
2258        struct iommu_domain *dom;
2259        ssize_t result = 0;
2260        int r;
2261
2262        dom = iommu_get_domain_for_dev(adev->dev);
2263
2264        while (size) {
2265                phys_addr_t addr = *pos & PAGE_MASK;
2266                loff_t off = *pos & ~PAGE_MASK;
2267                size_t bytes = PAGE_SIZE - off;
2268                unsigned long pfn;
2269                struct page *p;
2270                void *ptr;
2271
2272                bytes = bytes < size ? bytes : size;
2273
2274                addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
2275
2276                pfn = addr >> PAGE_SHIFT;
2277                if (!pfn_valid(pfn))
2278                        return -EPERM;
2279
2280                p = pfn_to_page(pfn);
2281                if (p->mapping != adev->mman.bdev.dev_mapping)
2282                        return -EPERM;
2283
2284                ptr = kmap(p);
2285                r = copy_from_user(ptr + off, buf, bytes);
2286                kunmap(p);
2287                if (r)
2288                        return -EFAULT;
2289
2290                size -= bytes;
2291                *pos += bytes;
2292                result += bytes;
2293        }
2294
2295        return result;
2296}
2297
2298static const struct file_operations amdgpu_ttm_iomem_fops = {
2299        .owner = THIS_MODULE,
2300        .read = amdgpu_iomem_read,
2301        .write = amdgpu_iomem_write,
2302        .llseek = default_llseek
2303};
2304
2305#endif
2306
2307void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
2308{
2309#if defined(CONFIG_DEBUG_FS)
2310        struct drm_minor *minor = adev_to_drm(adev)->primary;
2311        struct dentry *root = minor->debugfs_root;
2312
2313        debugfs_create_file_size("amdgpu_vram", 0444, root, adev,
2314                                 &amdgpu_ttm_vram_fops, adev->gmc.mc_vram_size);
2315        debugfs_create_file("amdgpu_iomem", 0444, root, adev,
2316                            &amdgpu_ttm_iomem_fops);
2317        debugfs_create_file("amdgpu_vram_mm", 0444, root, adev,
2318                            &amdgpu_mm_vram_table_fops);
2319        debugfs_create_file("amdgpu_gtt_mm", 0444, root, adev,
2320                            &amdgpu_mm_tt_table_fops);
2321        debugfs_create_file("amdgpu_gds_mm", 0444, root, adev,
2322                            &amdgpu_mm_gds_table_fops);
2323        debugfs_create_file("amdgpu_gws_mm", 0444, root, adev,
2324                            &amdgpu_mm_gws_table_fops);
2325        debugfs_create_file("amdgpu_oa_mm", 0444, root, adev,
2326                            &amdgpu_mm_oa_table_fops);
2327        debugfs_create_file("ttm_page_pool", 0444, root, adev,
2328                            &amdgpu_ttm_page_pool_fops);
2329#endif
2330}
2331