linux/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
<<
>>
Prefs
   1/*
   2 * Copyright 2009 Jerome Glisse.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the
   7 * "Software"), to deal in the Software without restriction, including
   8 * without limitation the rights to use, copy, modify, merge, publish,
   9 * distribute, sub license, and/or sell copies of the Software, and to
  10 * permit persons to whom the Software is furnished to do so, subject to
  11 * the following conditions:
  12 *
  13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20 *
  21 * The above copyright notice and this permission notice (including the
  22 * next paragraph) shall be included in all copies or substantial portions
  23 * of the Software.
  24 *
  25 */
  26/*
  27 * Authors:
  28 *    Jerome Glisse <glisse@freedesktop.org>
  29 *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
  30 *    Dave Airlie
  31 */
  32
  33#include <linux/dma-mapping.h>
  34#include <linux/iommu.h>
  35#include <linux/hmm.h>
  36#include <linux/pagemap.h>
  37#include <linux/sched/task.h>
  38#include <linux/sched/mm.h>
  39#include <linux/seq_file.h>
  40#include <linux/slab.h>
  41#include <linux/swap.h>
  42#include <linux/swiotlb.h>
  43#include <linux/dma-buf.h>
  44#include <linux/sizes.h>
  45
  46#include <drm/ttm/ttm_bo_api.h>
  47#include <drm/ttm/ttm_bo_driver.h>
  48#include <drm/ttm/ttm_placement.h>
  49#include <drm/ttm/ttm_module.h>
  50#include <drm/ttm/ttm_page_alloc.h>
  51
  52#include <drm/drm_debugfs.h>
  53#include <drm/amdgpu_drm.h>
  54
  55#include "amdgpu.h"
  56#include "amdgpu_object.h"
  57#include "amdgpu_trace.h"
  58#include "amdgpu_amdkfd.h"
  59#include "amdgpu_sdma.h"
  60#include "amdgpu_ras.h"
  61#include "bif/bif_4_1_d.h"
  62
  63#define AMDGPU_TTM_VRAM_MAX_DW_READ     (size_t)128
  64
  65static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
  66                             struct ttm_mem_reg *mem, unsigned num_pages,
  67                             uint64_t offset, unsigned window,
  68                             struct amdgpu_ring *ring,
  69                             uint64_t *addr);
  70
  71/**
  72 * amdgpu_init_mem_type - Initialize a memory manager for a specific type of
  73 * memory request.
  74 *
  75 * @bdev: The TTM BO device object (contains a reference to amdgpu_device)
  76 * @type: The type of memory requested
  77 * @man: The memory type manager for each domain
  78 *
  79 * This is called by ttm_bo_init_mm() when a buffer object is being
  80 * initialized.
  81 */
  82static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
  83                                struct ttm_mem_type_manager *man)
  84{
  85        struct amdgpu_device *adev;
  86
  87        adev = amdgpu_ttm_adev(bdev);
  88
  89        switch (type) {
  90        case TTM_PL_SYSTEM:
  91                /* System memory */
  92                man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
  93                man->available_caching = TTM_PL_MASK_CACHING;
  94                man->default_caching = TTM_PL_FLAG_CACHED;
  95                break;
  96        case TTM_PL_TT:
  97                /* GTT memory  */
  98                man->func = &amdgpu_gtt_mgr_func;
  99                man->gpu_offset = adev->gmc.gart_start;
 100                man->available_caching = TTM_PL_MASK_CACHING;
 101                man->default_caching = TTM_PL_FLAG_CACHED;
 102                man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA;
 103                break;
 104        case TTM_PL_VRAM:
 105                /* "On-card" video ram */
 106                man->func = &amdgpu_vram_mgr_func;
 107                man->gpu_offset = adev->gmc.vram_start;
 108                man->flags = TTM_MEMTYPE_FLAG_FIXED |
 109                             TTM_MEMTYPE_FLAG_MAPPABLE;
 110                man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
 111                man->default_caching = TTM_PL_FLAG_WC;
 112                break;
 113        case AMDGPU_PL_GDS:
 114        case AMDGPU_PL_GWS:
 115        case AMDGPU_PL_OA:
 116                /* On-chip GDS memory*/
 117                man->func = &ttm_bo_manager_func;
 118                man->gpu_offset = 0;
 119                man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_CMA;
 120                man->available_caching = TTM_PL_FLAG_UNCACHED;
 121                man->default_caching = TTM_PL_FLAG_UNCACHED;
 122                break;
 123        default:
 124                DRM_ERROR("Unsupported memory type %u\n", (unsigned)type);
 125                return -EINVAL;
 126        }
 127        return 0;
 128}
 129
 130/**
 131 * amdgpu_evict_flags - Compute placement flags
 132 *
 133 * @bo: The buffer object to evict
 134 * @placement: Possible destination(s) for evicted BO
 135 *
 136 * Fill in placement data when ttm_bo_evict() is called
 137 */
 138static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 139                                struct ttm_placement *placement)
 140{
 141        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
 142        struct amdgpu_bo *abo;
 143        static const struct ttm_place placements = {
 144                .fpfn = 0,
 145                .lpfn = 0,
 146                .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
 147        };
 148
 149        /* Don't handle scatter gather BOs */
 150        if (bo->type == ttm_bo_type_sg) {
 151                placement->num_placement = 0;
 152                placement->num_busy_placement = 0;
 153                return;
 154        }
 155
 156        /* Object isn't an AMDGPU object so ignore */
 157        if (!amdgpu_bo_is_amdgpu_bo(bo)) {
 158                placement->placement = &placements;
 159                placement->busy_placement = &placements;
 160                placement->num_placement = 1;
 161                placement->num_busy_placement = 1;
 162                return;
 163        }
 164
 165        abo = ttm_to_amdgpu_bo(bo);
 166        switch (bo->mem.mem_type) {
 167        case AMDGPU_PL_GDS:
 168        case AMDGPU_PL_GWS:
 169        case AMDGPU_PL_OA:
 170                placement->num_placement = 0;
 171                placement->num_busy_placement = 0;
 172                return;
 173
 174        case TTM_PL_VRAM:
 175                if (!adev->mman.buffer_funcs_enabled) {
 176                        /* Move to system memory */
 177                        amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
 178                } else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
 179                           !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
 180                           amdgpu_bo_in_cpu_visible_vram(abo)) {
 181
 182                        /* Try evicting to the CPU inaccessible part of VRAM
 183                         * first, but only set GTT as busy placement, so this
 184                         * BO will be evicted to GTT rather than causing other
 185                         * BOs to be evicted from VRAM
 186                         */
 187                        amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
 188                                                         AMDGPU_GEM_DOMAIN_GTT);
 189                        abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
 190                        abo->placements[0].lpfn = 0;
 191                        abo->placement.busy_placement = &abo->placements[1];
 192                        abo->placement.num_busy_placement = 1;
 193                } else {
 194                        /* Move to GTT memory */
 195                        amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
 196                }
 197                break;
 198        case TTM_PL_TT:
 199        default:
 200                amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
 201                break;
 202        }
 203        *placement = abo->placement;
 204}
 205
 206/**
 207 * amdgpu_verify_access - Verify access for a mmap call
 208 *
 209 * @bo: The buffer object to map
 210 * @filp: The file pointer from the process performing the mmap
 211 *
 212 * This is called by ttm_bo_mmap() to verify whether a process
 213 * has the right to mmap a BO to their process space.
 214 */
 215static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 216{
 217        struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
 218
 219        /*
 220         * Don't verify access for KFD BOs. They don't have a GEM
 221         * object associated with them.
 222         */
 223        if (abo->kfd_bo)
 224                return 0;
 225
 226        if (amdgpu_ttm_tt_get_usermm(bo->ttm))
 227                return -EPERM;
 228        return drm_vma_node_verify_access(&abo->tbo.base.vma_node,
 229                                          filp->private_data);
 230}
 231
 232/**
 233 * amdgpu_move_null - Register memory for a buffer object
 234 *
 235 * @bo: The bo to assign the memory to
 236 * @new_mem: The memory to be assigned.
 237 *
 238 * Assign the memory from new_mem to the memory of the buffer object bo.
 239 */
 240static void amdgpu_move_null(struct ttm_buffer_object *bo,
 241                             struct ttm_mem_reg *new_mem)
 242{
 243        struct ttm_mem_reg *old_mem = &bo->mem;
 244
 245        BUG_ON(old_mem->mm_node != NULL);
 246        *old_mem = *new_mem;
 247        new_mem->mm_node = NULL;
 248}
 249
 250/**
 251 * amdgpu_mm_node_addr - Compute the GPU relative offset of a GTT buffer.
 252 *
 253 * @bo: The bo to assign the memory to.
 254 * @mm_node: Memory manager node for drm allocator.
 255 * @mem: The region where the bo resides.
 256 *
 257 */
 258static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
 259                                    struct drm_mm_node *mm_node,
 260                                    struct ttm_mem_reg *mem)
 261{
 262        uint64_t addr = 0;
 263
 264        if (mm_node->start != AMDGPU_BO_INVALID_OFFSET) {
 265                addr = mm_node->start << PAGE_SHIFT;
 266                addr += bo->bdev->man[mem->mem_type].gpu_offset;
 267        }
 268        return addr;
 269}
 270
 271/**
 272 * amdgpu_find_mm_node - Helper function finds the drm_mm_node corresponding to
 273 * @offset. It also modifies the offset to be within the drm_mm_node returned
 274 *
 275 * @mem: The region where the bo resides.
 276 * @offset: The offset that drm_mm_node is used for finding.
 277 *
 278 */
 279static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
 280                                               unsigned long *offset)
 281{
 282        struct drm_mm_node *mm_node = mem->mm_node;
 283
 284        while (*offset >= (mm_node->size << PAGE_SHIFT)) {
 285                *offset -= (mm_node->size << PAGE_SHIFT);
 286                ++mm_node;
 287        }
 288        return mm_node;
 289}
 290
 291/**
 292 * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
 293 *
 294 * The function copies @size bytes from {src->mem + src->offset} to
 295 * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
 296 * move and different for a BO to BO copy.
 297 *
 298 * @f: Returns the last fence if multiple jobs are submitted.
 299 */
 300int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 301                               struct amdgpu_copy_mem *src,
 302                               struct amdgpu_copy_mem *dst,
 303                               uint64_t size,
 304                               struct dma_resv *resv,
 305                               struct dma_fence **f)
 306{
 307        struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
 308        struct drm_mm_node *src_mm, *dst_mm;
 309        uint64_t src_node_start, dst_node_start, src_node_size,
 310                 dst_node_size, src_page_offset, dst_page_offset;
 311        struct dma_fence *fence = NULL;
 312        int r = 0;
 313        const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
 314                                        AMDGPU_GPU_PAGE_SIZE);
 315
 316        if (!adev->mman.buffer_funcs_enabled) {
 317                DRM_ERROR("Trying to move memory with ring turned off.\n");
 318                return -EINVAL;
 319        }
 320
 321        src_mm = amdgpu_find_mm_node(src->mem, &src->offset);
 322        src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
 323                                             src->offset;
 324        src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
 325        src_page_offset = src_node_start & (PAGE_SIZE - 1);
 326
 327        dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset);
 328        dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
 329                                             dst->offset;
 330        dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
 331        dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
 332
 333        mutex_lock(&adev->mman.gtt_window_lock);
 334
 335        while (size) {
 336                unsigned long cur_size;
 337                uint64_t from = src_node_start, to = dst_node_start;
 338                struct dma_fence *next;
 339
 340                /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
 341                 * begins at an offset, then adjust the size accordingly
 342                 */
 343                cur_size = min3(min(src_node_size, dst_node_size), size,
 344                                GTT_MAX_BYTES);
 345                if (cur_size + src_page_offset > GTT_MAX_BYTES ||
 346                    cur_size + dst_page_offset > GTT_MAX_BYTES)
 347                        cur_size -= max(src_page_offset, dst_page_offset);
 348
 349                /* Map only what needs to be accessed. Map src to window 0 and
 350                 * dst to window 1
 351                 */
 352                if (src->mem->start == AMDGPU_BO_INVALID_OFFSET) {
 353                        r = amdgpu_map_buffer(src->bo, src->mem,
 354                                        PFN_UP(cur_size + src_page_offset),
 355                                        src_node_start, 0, ring,
 356                                        &from);
 357                        if (r)
 358                                goto error;
 359                        /* Adjust the offset because amdgpu_map_buffer returns
 360                         * start of mapped page
 361                         */
 362                        from += src_page_offset;
 363                }
 364
 365                if (dst->mem->start == AMDGPU_BO_INVALID_OFFSET) {
 366                        r = amdgpu_map_buffer(dst->bo, dst->mem,
 367                                        PFN_UP(cur_size + dst_page_offset),
 368                                        dst_node_start, 1, ring,
 369                                        &to);
 370                        if (r)
 371                                goto error;
 372                        to += dst_page_offset;
 373                }
 374
 375                r = amdgpu_copy_buffer(ring, from, to, cur_size,
 376                                       resv, &next, false, true);
 377                if (r)
 378                        goto error;
 379
 380                dma_fence_put(fence);
 381                fence = next;
 382
 383                size -= cur_size;
 384                if (!size)
 385                        break;
 386
 387                src_node_size -= cur_size;
 388                if (!src_node_size) {
 389                        src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm,
 390                                                             src->mem);
 391                        src_node_size = (src_mm->size << PAGE_SHIFT);
 392                        src_page_offset = 0;
 393                } else {
 394                        src_node_start += cur_size;
 395                        src_page_offset = src_node_start & (PAGE_SIZE - 1);
 396                }
 397                dst_node_size -= cur_size;
 398                if (!dst_node_size) {
 399                        dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm,
 400                                                             dst->mem);
 401                        dst_node_size = (dst_mm->size << PAGE_SHIFT);
 402                        dst_page_offset = 0;
 403                } else {
 404                        dst_node_start += cur_size;
 405                        dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
 406                }
 407        }
 408error:
 409        mutex_unlock(&adev->mman.gtt_window_lock);
 410        if (f)
 411                *f = dma_fence_get(fence);
 412        dma_fence_put(fence);
 413        return r;
 414}
 415
 416/**
 417 * amdgpu_move_blit - Copy an entire buffer to another buffer
 418 *
 419 * This is a helper called by amdgpu_bo_move() and amdgpu_move_vram_ram() to
 420 * help move buffers to and from VRAM.
 421 */
 422static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 423                            bool evict, bool no_wait_gpu,
 424                            struct ttm_mem_reg *new_mem,
 425                            struct ttm_mem_reg *old_mem)
 426{
 427        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
 428        struct amdgpu_copy_mem src, dst;
 429        struct dma_fence *fence = NULL;
 430        int r;
 431
 432        src.bo = bo;
 433        dst.bo = bo;
 434        src.mem = old_mem;
 435        dst.mem = new_mem;
 436        src.offset = 0;
 437        dst.offset = 0;
 438
 439        r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
 440                                       new_mem->num_pages << PAGE_SHIFT,
 441                                       bo->base.resv, &fence);
 442        if (r)
 443                goto error;
 444
 445        /* clear the space being freed */
 446        if (old_mem->mem_type == TTM_PL_VRAM &&
 447            (ttm_to_amdgpu_bo(bo)->flags &
 448             AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
 449                struct dma_fence *wipe_fence = NULL;
 450
 451                r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON,
 452                                       NULL, &wipe_fence);
 453                if (r) {
 454                        goto error;
 455                } else if (wipe_fence) {
 456                        dma_fence_put(fence);
 457                        fence = wipe_fence;
 458                }
 459        }
 460
 461        /* Always block for VM page tables before committing the new location */
 462        if (bo->type == ttm_bo_type_kernel)
 463                r = ttm_bo_move_accel_cleanup(bo, fence, true, new_mem);
 464        else
 465                r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
 466        dma_fence_put(fence);
 467        return r;
 468
 469error:
 470        if (fence)
 471                dma_fence_wait(fence, false);
 472        dma_fence_put(fence);
 473        return r;
 474}
 475
 476/**
 477 * amdgpu_move_vram_ram - Copy VRAM buffer to RAM buffer
 478 *
 479 * Called by amdgpu_bo_move().
 480 */
 481static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
 482                                struct ttm_operation_ctx *ctx,
 483                                struct ttm_mem_reg *new_mem)
 484{
 485        struct ttm_mem_reg *old_mem = &bo->mem;
 486        struct ttm_mem_reg tmp_mem;
 487        struct ttm_place placements;
 488        struct ttm_placement placement;
 489        int r;
 490
 491        /* create space/pages for new_mem in GTT space */
 492        tmp_mem = *new_mem;
 493        tmp_mem.mm_node = NULL;
 494        placement.num_placement = 1;
 495        placement.placement = &placements;
 496        placement.num_busy_placement = 1;
 497        placement.busy_placement = &placements;
 498        placements.fpfn = 0;
 499        placements.lpfn = 0;
 500        placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
 501        r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
 502        if (unlikely(r)) {
 503                pr_err("Failed to find GTT space for blit from VRAM\n");
 504                return r;
 505        }
 506
 507        /* set caching flags */
 508        r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement);
 509        if (unlikely(r)) {
 510                goto out_cleanup;
 511        }
 512
 513        /* Bind the memory to the GTT space */
 514        r = ttm_tt_bind(bo->ttm, &tmp_mem, ctx);
 515        if (unlikely(r)) {
 516                goto out_cleanup;
 517        }
 518
 519        /* blit VRAM to GTT */
 520        r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu, &tmp_mem, old_mem);
 521        if (unlikely(r)) {
 522                goto out_cleanup;
 523        }
 524
 525        /* move BO (in tmp_mem) to new_mem */
 526        r = ttm_bo_move_ttm(bo, ctx, new_mem);
 527out_cleanup:
 528        ttm_bo_mem_put(bo, &tmp_mem);
 529        return r;
 530}
 531
 532/**
 533 * amdgpu_move_ram_vram - Copy buffer from RAM to VRAM
 534 *
 535 * Called by amdgpu_bo_move().
 536 */
 537static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
 538                                struct ttm_operation_ctx *ctx,
 539                                struct ttm_mem_reg *new_mem)
 540{
 541        struct ttm_mem_reg *old_mem = &bo->mem;
 542        struct ttm_mem_reg tmp_mem;
 543        struct ttm_placement placement;
 544        struct ttm_place placements;
 545        int r;
 546
 547        /* make space in GTT for old_mem buffer */
 548        tmp_mem = *new_mem;
 549        tmp_mem.mm_node = NULL;
 550        placement.num_placement = 1;
 551        placement.placement = &placements;
 552        placement.num_busy_placement = 1;
 553        placement.busy_placement = &placements;
 554        placements.fpfn = 0;
 555        placements.lpfn = 0;
 556        placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
 557        r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
 558        if (unlikely(r)) {
 559                pr_err("Failed to find GTT space for blit to VRAM\n");
 560                return r;
 561        }
 562
 563        /* move/bind old memory to GTT space */
 564        r = ttm_bo_move_ttm(bo, ctx, &tmp_mem);
 565        if (unlikely(r)) {
 566                goto out_cleanup;
 567        }
 568
 569        /* copy to VRAM */
 570        r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu, new_mem, old_mem);
 571        if (unlikely(r)) {
 572                goto out_cleanup;
 573        }
 574out_cleanup:
 575        ttm_bo_mem_put(bo, &tmp_mem);
 576        return r;
 577}
 578
 579/**
 580 * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy
 581 *
 582 * Called by amdgpu_bo_move()
 583 */
 584static bool amdgpu_mem_visible(struct amdgpu_device *adev,
 585                               struct ttm_mem_reg *mem)
 586{
 587        struct drm_mm_node *nodes = mem->mm_node;
 588
 589        if (mem->mem_type == TTM_PL_SYSTEM ||
 590            mem->mem_type == TTM_PL_TT)
 591                return true;
 592        if (mem->mem_type != TTM_PL_VRAM)
 593                return false;
 594
 595        /* ttm_mem_reg_ioremap only supports contiguous memory */
 596        if (nodes->size != mem->num_pages)
 597                return false;
 598
 599        return ((nodes->start + nodes->size) << PAGE_SHIFT)
 600                <= adev->gmc.visible_vram_size;
 601}
 602
 603/**
 604 * amdgpu_bo_move - Move a buffer object to a new memory location
 605 *
 606 * Called by ttm_bo_handle_move_mem()
 607 */
 608static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 609                          struct ttm_operation_ctx *ctx,
 610                          struct ttm_mem_reg *new_mem)
 611{
 612        struct amdgpu_device *adev;
 613        struct amdgpu_bo *abo;
 614        struct ttm_mem_reg *old_mem = &bo->mem;
 615        int r;
 616
 617        /* Can't move a pinned BO */
 618        abo = ttm_to_amdgpu_bo(bo);
 619        if (WARN_ON_ONCE(abo->pin_count > 0))
 620                return -EINVAL;
 621
 622        adev = amdgpu_ttm_adev(bo->bdev);
 623
 624        if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
 625                amdgpu_move_null(bo, new_mem);
 626                return 0;
 627        }
 628        if ((old_mem->mem_type == TTM_PL_TT &&
 629             new_mem->mem_type == TTM_PL_SYSTEM) ||
 630            (old_mem->mem_type == TTM_PL_SYSTEM &&
 631             new_mem->mem_type == TTM_PL_TT)) {
 632                /* bind is enough */
 633                amdgpu_move_null(bo, new_mem);
 634                return 0;
 635        }
 636        if (old_mem->mem_type == AMDGPU_PL_GDS ||
 637            old_mem->mem_type == AMDGPU_PL_GWS ||
 638            old_mem->mem_type == AMDGPU_PL_OA ||
 639            new_mem->mem_type == AMDGPU_PL_GDS ||
 640            new_mem->mem_type == AMDGPU_PL_GWS ||
 641            new_mem->mem_type == AMDGPU_PL_OA) {
 642                /* Nothing to save here */
 643                amdgpu_move_null(bo, new_mem);
 644                return 0;
 645        }
 646
 647        if (!adev->mman.buffer_funcs_enabled) {
 648                r = -ENODEV;
 649                goto memcpy;
 650        }
 651
 652        if (old_mem->mem_type == TTM_PL_VRAM &&
 653            new_mem->mem_type == TTM_PL_SYSTEM) {
 654                r = amdgpu_move_vram_ram(bo, evict, ctx, new_mem);
 655        } else if (old_mem->mem_type == TTM_PL_SYSTEM &&
 656                   new_mem->mem_type == TTM_PL_VRAM) {
 657                r = amdgpu_move_ram_vram(bo, evict, ctx, new_mem);
 658        } else {
 659                r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu,
 660                                     new_mem, old_mem);
 661        }
 662
 663        if (r) {
 664memcpy:
 665                /* Check that all memory is CPU accessible */
 666                if (!amdgpu_mem_visible(adev, old_mem) ||
 667                    !amdgpu_mem_visible(adev, new_mem)) {
 668                        pr_err("Move buffer fallback to memcpy unavailable\n");
 669                        return r;
 670                }
 671
 672                r = ttm_bo_move_memcpy(bo, ctx, new_mem);
 673                if (r)
 674                        return r;
 675        }
 676
 677        if (bo->type == ttm_bo_type_device &&
 678            new_mem->mem_type == TTM_PL_VRAM &&
 679            old_mem->mem_type != TTM_PL_VRAM) {
 680                /* amdgpu_bo_fault_reserve_notify will re-set this if the CPU
 681                 * accesses the BO after it's moved.
 682                 */
 683                abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 684        }
 685
 686        /* update statistics */
 687        atomic64_add((u64)bo->num_pages << PAGE_SHIFT, &adev->num_bytes_moved);
 688        return 0;
 689}
 690
 691/**
 692 * amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault
 693 *
 694 * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault()
 695 */
 696static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 697{
 698        struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
 699        struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
 700        struct drm_mm_node *mm_node = mem->mm_node;
 701
 702        mem->bus.addr = NULL;
 703        mem->bus.offset = 0;
 704        mem->bus.size = mem->num_pages << PAGE_SHIFT;
 705        mem->bus.base = 0;
 706        mem->bus.is_iomem = false;
 707        if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
 708                return -EINVAL;
 709        switch (mem->mem_type) {
 710        case TTM_PL_SYSTEM:
 711                /* system memory */
 712                return 0;
 713        case TTM_PL_TT:
 714                break;
 715        case TTM_PL_VRAM:
 716                mem->bus.offset = mem->start << PAGE_SHIFT;
 717                /* check if it's visible */
 718                if ((mem->bus.offset + mem->bus.size) > adev->gmc.visible_vram_size)
 719                        return -EINVAL;
 720                /* Only physically contiguous buffers apply. In a contiguous
 721                 * buffer, size of the first mm_node would match the number of
 722                 * pages in ttm_mem_reg.
 723                 */
 724                if (adev->mman.aper_base_kaddr &&
 725                    (mm_node->size == mem->num_pages))
 726                        mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr +
 727                                        mem->bus.offset;
 728
 729                mem->bus.base = adev->gmc.aper_base;
 730                mem->bus.is_iomem = true;
 731                break;
 732        default:
 733                return -EINVAL;
 734        }
 735        return 0;
 736}
 737
 738static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 739{
 740}
 741
 742static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
 743                                           unsigned long page_offset)
 744{
 745        struct drm_mm_node *mm;
 746        unsigned long offset = (page_offset << PAGE_SHIFT);
 747
 748        mm = amdgpu_find_mm_node(&bo->mem, &offset);
 749        return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start +
 750                (offset >> PAGE_SHIFT);
 751}
 752
 753/*
 754 * TTM backend functions.
 755 */
 756struct amdgpu_ttm_tt {
 757        struct ttm_dma_tt       ttm;
 758        struct drm_gem_object   *gobj;
 759        u64                     offset;
 760        uint64_t                userptr;
 761        struct task_struct      *usertask;
 762        uint32_t                userflags;
 763#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
 764        struct hmm_range        *range;
 765#endif
 766};
 767
 768#ifdef CONFIG_DRM_AMDGPU_USERPTR
 769/* flags used by HMM internal, not related to CPU/GPU PTE flags */
 770static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
 771        (1 << 0), /* HMM_PFN_VALID */
 772        (1 << 1), /* HMM_PFN_WRITE */
 773};
 774
 775static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
 776        0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
 777        0, /* HMM_PFN_NONE */
 778        0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
 779};
 780
 781/**
 782 * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
 783 * memory and start HMM tracking CPU page table update
 784 *
 785 * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
 786 * once afterwards to stop HMM tracking
 787 */
 788int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
 789{
 790        struct ttm_tt *ttm = bo->tbo.ttm;
 791        struct amdgpu_ttm_tt *gtt = (void *)ttm;
 792        unsigned long start = gtt->userptr;
 793        struct vm_area_struct *vma;
 794        struct hmm_range *range;
 795        unsigned long timeout;
 796        struct mm_struct *mm;
 797        unsigned long i;
 798        int r = 0;
 799
 800        mm = bo->notifier.mm;
 801        if (unlikely(!mm)) {
 802                DRM_DEBUG_DRIVER("BO is not registered?\n");
 803                return -EFAULT;
 804        }
 805
 806        /* Another get_user_pages is running at the same time?? */
 807        if (WARN_ON(gtt->range))
 808                return -EFAULT;
 809
 810        if (!mmget_not_zero(mm)) /* Happens during process shutdown */
 811                return -ESRCH;
 812
 813        range = kzalloc(sizeof(*range), GFP_KERNEL);
 814        if (unlikely(!range)) {
 815                r = -ENOMEM;
 816                goto out;
 817        }
 818        range->notifier = &bo->notifier;
 819        range->flags = hmm_range_flags;
 820        range->values = hmm_range_values;
 821        range->pfn_shift = PAGE_SHIFT;
 822        range->start = bo->notifier.interval_tree.start;
 823        range->end = bo->notifier.interval_tree.last + 1;
 824        range->default_flags = hmm_range_flags[HMM_PFN_VALID];
 825        if (!amdgpu_ttm_tt_is_readonly(ttm))
 826                range->default_flags |= range->flags[HMM_PFN_WRITE];
 827
 828        range->pfns = kvmalloc_array(ttm->num_pages, sizeof(*range->pfns),
 829                                     GFP_KERNEL);
 830        if (unlikely(!range->pfns)) {
 831                r = -ENOMEM;
 832                goto out_free_ranges;
 833        }
 834
 835        down_read(&mm->mmap_sem);
 836        vma = find_vma(mm, start);
 837        if (unlikely(!vma || start < vma->vm_start)) {
 838                r = -EFAULT;
 839                goto out_unlock;
 840        }
 841        if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
 842                vma->vm_file)) {
 843                r = -EPERM;
 844                goto out_unlock;
 845        }
 846        up_read(&mm->mmap_sem);
 847        timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
 848
 849retry:
 850        range->notifier_seq = mmu_interval_read_begin(&bo->notifier);
 851
 852        down_read(&mm->mmap_sem);
 853        r = hmm_range_fault(range);
 854        up_read(&mm->mmap_sem);
 855        if (unlikely(r <= 0)) {
 856                /*
 857                 * FIXME: This timeout should encompass the retry from
 858                 * mmu_interval_read_retry() as well.
 859                 */
 860                if ((r == 0 || r == -EBUSY) && !time_after(jiffies, timeout))
 861                        goto retry;
 862                goto out_free_pfns;
 863        }
 864
 865        for (i = 0; i < ttm->num_pages; i++) {
 866                /* FIXME: The pages cannot be touched outside the notifier_lock */
 867                pages[i] = hmm_device_entry_to_page(range, range->pfns[i]);
 868                if (unlikely(!pages[i])) {
 869                        pr_err("Page fault failed for pfn[%lu] = 0x%llx\n",
 870                               i, range->pfns[i]);
 871                        r = -ENOMEM;
 872
 873                        goto out_free_pfns;
 874                }
 875        }
 876
 877        gtt->range = range;
 878        mmput(mm);
 879
 880        return 0;
 881
 882out_unlock:
 883        up_read(&mm->mmap_sem);
 884out_free_pfns:
 885        kvfree(range->pfns);
 886out_free_ranges:
 887        kfree(range);
 888out:
 889        mmput(mm);
 890        return r;
 891}
 892
 893/**
 894 * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
 895 * Check if the pages backing this ttm range have been invalidated
 896 *
 897 * Returns: true if pages are still valid
 898 */
 899bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
 900{
 901        struct amdgpu_ttm_tt *gtt = (void *)ttm;
 902        bool r = false;
 903
 904        if (!gtt || !gtt->userptr)
 905                return false;
 906
 907        DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%lx\n",
 908                gtt->userptr, ttm->num_pages);
 909
 910        WARN_ONCE(!gtt->range || !gtt->range->pfns,
 911                "No user pages to check\n");
 912
 913        if (gtt->range) {
 914                /*
 915                 * FIXME: Must always hold notifier_lock for this, and must
 916                 * not ignore the return code.
 917                 */
 918                r = mmu_interval_read_retry(gtt->range->notifier,
 919                                         gtt->range->notifier_seq);
 920                kvfree(gtt->range->pfns);
 921                kfree(gtt->range);
 922                gtt->range = NULL;
 923        }
 924
 925        return !r;
 926}
 927#endif
 928
 929/**
 930 * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
 931 *
 932 * Called by amdgpu_cs_list_validate(). This creates the page list
 933 * that backs user memory and will ultimately be mapped into the device
 934 * address space.
 935 */
 936void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
 937{
 938        unsigned long i;
 939
 940        for (i = 0; i < ttm->num_pages; ++i)
 941                ttm->pages[i] = pages ? pages[i] : NULL;
 942}
 943
 944/**
 945 * amdgpu_ttm_tt_pin_userptr -  prepare the sg table with the user pages
 946 *
 947 * Called by amdgpu_ttm_backend_bind()
 948 **/
 949static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
 950{
 951        struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
 952        struct amdgpu_ttm_tt *gtt = (void *)ttm;
 953        unsigned nents;
 954        int r;
 955
 956        int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
 957        enum dma_data_direction direction = write ?
 958                DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
 959
 960        /* Allocate an SG array and squash pages into it */
 961        r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
 962                                      ttm->num_pages << PAGE_SHIFT,
 963                                      GFP_KERNEL);
 964        if (r)
 965                goto release_sg;
 966
 967        /* Map SG to device */
 968        r = -ENOMEM;
 969        nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
 970        if (nents == 0)
 971                goto release_sg;
 972
 973        /* convert SG to linear array of pages and dma addresses */
 974        drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
 975                                         gtt->ttm.dma_address, ttm->num_pages);
 976
 977        return 0;
 978
 979release_sg:
 980        kfree(ttm->sg);
 981        return r;
 982}
 983
 984/**
 985 * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages
 986 */
 987static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
 988{
 989        struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
 990        struct amdgpu_ttm_tt *gtt = (void *)ttm;
 991
 992        int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
 993        enum dma_data_direction direction = write ?
 994                DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
 995
 996        /* double check that we don't free the table twice */
 997        if (!ttm->sg->sgl)
 998                return;
 999
1000        /* unmap the pages mapped to the device */
1001        dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
1002
1003        sg_free_table(ttm->sg);
1004
1005#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
1006        if (gtt->range) {
1007                unsigned long i;
1008
1009                for (i = 0; i < ttm->num_pages; i++) {
1010                        if (ttm->pages[i] !=
1011                                hmm_device_entry_to_page(gtt->range,
1012                                              gtt->range->pfns[i]))
1013                                break;
1014                }
1015
1016                WARN((i == ttm->num_pages), "Missing get_user_page_done\n");
1017        }
1018#endif
1019}
1020
1021int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
1022                                struct ttm_buffer_object *tbo,
1023                                uint64_t flags)
1024{
1025        struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
1026        struct ttm_tt *ttm = tbo->ttm;
1027        struct amdgpu_ttm_tt *gtt = (void *)ttm;
1028        int r;
1029
1030        if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
1031                uint64_t page_idx = 1;
1032
1033                r = amdgpu_gart_bind(adev, gtt->offset, page_idx,
1034                                ttm->pages, gtt->ttm.dma_address, flags);
1035                if (r)
1036                        goto gart_bind_fail;
1037
1038                /* The memory type of the first page defaults to UC. Now
1039                 * modify the memory type to NC from the second page of
1040                 * the BO onward.
1041                 */
1042                flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
1043                flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
1044
1045                r = amdgpu_gart_bind(adev,
1046                                gtt->offset + (page_idx << PAGE_SHIFT),
1047                                ttm->num_pages - page_idx,
1048                                &ttm->pages[page_idx],
1049                                &(gtt->ttm.dma_address[page_idx]), flags);
1050        } else {
1051                r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
1052                                     ttm->pages, gtt->ttm.dma_address, flags);
1053        }
1054
1055gart_bind_fail:
1056        if (r)
1057                DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
1058                          ttm->num_pages, gtt->offset);
1059
1060        return r;
1061}
1062
1063/**
1064 * amdgpu_ttm_backend_bind - Bind GTT memory
1065 *
1066 * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem().
1067 * This handles binding GTT memory to the device address space.
1068 */
1069static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
1070                                   struct ttm_mem_reg *bo_mem)
1071{
1072        struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
1073        struct amdgpu_ttm_tt *gtt = (void*)ttm;
1074        uint64_t flags;
1075        int r = 0;
1076
1077        if (gtt->userptr) {
1078                r = amdgpu_ttm_tt_pin_userptr(ttm);
1079                if (r) {
1080                        DRM_ERROR("failed to pin userptr\n");
1081                        return r;
1082                }
1083        }
1084        if (!ttm->num_pages) {
1085                WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
1086                     ttm->num_pages, bo_mem, ttm);
1087        }
1088
1089        if (bo_mem->mem_type == AMDGPU_PL_GDS ||
1090            bo_mem->mem_type == AMDGPU_PL_GWS ||
1091            bo_mem->mem_type == AMDGPU_PL_OA)
1092                return -EINVAL;
1093
1094        if (!amdgpu_gtt_mgr_has_gart_addr(bo_mem)) {
1095                gtt->offset = AMDGPU_BO_INVALID_OFFSET;
1096                return 0;
1097        }
1098
1099        /* compute PTE flags relevant to this BO memory */
1100        flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem);
1101
1102        /* bind pages into GART page tables */
1103        gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
1104        r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
1105                ttm->pages, gtt->ttm.dma_address, flags);
1106
1107        if (r)
1108                DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
1109                          ttm->num_pages, gtt->offset);
1110        return r;
1111}
1112
1113/**
1114 * amdgpu_ttm_alloc_gart - Allocate GART memory for buffer object
1115 */
1116int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
1117{
1118        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
1119        struct ttm_operation_ctx ctx = { false, false };
1120        struct amdgpu_ttm_tt *gtt = (void*)bo->ttm;
1121        struct ttm_mem_reg tmp;
1122        struct ttm_placement placement;
1123        struct ttm_place placements;
1124        uint64_t addr, flags;
1125        int r;
1126
1127        if (bo->mem.start != AMDGPU_BO_INVALID_OFFSET)
1128                return 0;
1129
1130        addr = amdgpu_gmc_agp_addr(bo);
1131        if (addr != AMDGPU_BO_INVALID_OFFSET) {
1132                bo->mem.start = addr >> PAGE_SHIFT;
1133        } else {
1134
1135                /* allocate GART space */
1136                tmp = bo->mem;
1137                tmp.mm_node = NULL;
1138                placement.num_placement = 1;
1139                placement.placement = &placements;
1140                placement.num_busy_placement = 1;
1141                placement.busy_placement = &placements;
1142                placements.fpfn = 0;
1143                placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
1144                placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) |
1145                        TTM_PL_FLAG_TT;
1146
1147                r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx);
1148                if (unlikely(r))
1149                        return r;
1150
1151                /* compute PTE flags for this buffer object */
1152                flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp);
1153
1154                /* Bind pages */
1155                gtt->offset = (u64)tmp.start << PAGE_SHIFT;
1156                r = amdgpu_ttm_gart_bind(adev, bo, flags);
1157                if (unlikely(r)) {
1158                        ttm_bo_mem_put(bo, &tmp);
1159                        return r;
1160                }
1161
1162                ttm_bo_mem_put(bo, &bo->mem);
1163                bo->mem = tmp;
1164        }
1165
1166        bo->offset = (bo->mem.start << PAGE_SHIFT) +
1167                bo->bdev->man[bo->mem.mem_type].gpu_offset;
1168
1169        return 0;
1170}
1171
1172/**
1173 * amdgpu_ttm_recover_gart - Rebind GTT pages
1174 *
1175 * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to
1176 * rebind GTT pages during a GPU reset.
1177 */
1178int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
1179{
1180        struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
1181        uint64_t flags;
1182        int r;
1183
1184        if (!tbo->ttm)
1185                return 0;
1186
1187        flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, &tbo->mem);
1188        r = amdgpu_ttm_gart_bind(adev, tbo, flags);
1189
1190        return r;
1191}
1192
1193/**
1194 * amdgpu_ttm_backend_unbind - Unbind GTT mapped pages
1195 *
1196 * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and
1197 * ttm_tt_destroy().
1198 */
1199static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
1200{
1201        struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
1202        struct amdgpu_ttm_tt *gtt = (void *)ttm;
1203        int r;
1204
1205        /* if the pages have userptr pinning then clear that first */
1206        if (gtt->userptr)
1207                amdgpu_ttm_tt_unpin_userptr(ttm);
1208
1209        if (gtt->offset == AMDGPU_BO_INVALID_OFFSET)
1210                return 0;
1211
1212        /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
1213        r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages);
1214        if (r)
1215                DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n",
1216                          gtt->ttm.ttm.num_pages, gtt->offset);
1217        return r;
1218}
1219
1220static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
1221{
1222        struct amdgpu_ttm_tt *gtt = (void *)ttm;
1223
1224        if (gtt->usertask)
1225                put_task_struct(gtt->usertask);
1226
1227        ttm_dma_tt_fini(&gtt->ttm);
1228        kfree(gtt);
1229}
1230
1231static struct ttm_backend_func amdgpu_backend_func = {
1232        .bind = &amdgpu_ttm_backend_bind,
1233        .unbind = &amdgpu_ttm_backend_unbind,
1234        .destroy = &amdgpu_ttm_backend_destroy,
1235};
1236
1237/**
1238 * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO
1239 *
1240 * @bo: The buffer object to create a GTT ttm_tt object around
1241 *
1242 * Called by ttm_tt_create().
1243 */
1244static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
1245                                           uint32_t page_flags)
1246{
1247        struct amdgpu_ttm_tt *gtt;
1248
1249        gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
1250        if (gtt == NULL) {
1251                return NULL;
1252        }
1253        gtt->ttm.ttm.func = &amdgpu_backend_func;
1254        gtt->gobj = &bo->base;
1255
1256        /* allocate space for the uninitialized page entries */
1257        if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags)) {
1258                kfree(gtt);
1259                return NULL;
1260        }
1261        return &gtt->ttm.ttm;
1262}
1263
1264/**
1265 * amdgpu_ttm_tt_populate - Map GTT pages visible to the device
1266 *
1267 * Map the pages of a ttm_tt object to an address space visible
1268 * to the underlying device.
1269 */
1270static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
1271                        struct ttm_operation_ctx *ctx)
1272{
1273        struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
1274        struct amdgpu_ttm_tt *gtt = (void *)ttm;
1275
1276        /* user pages are bound by amdgpu_ttm_tt_pin_userptr() */
1277        if (gtt && gtt->userptr) {
1278                ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
1279                if (!ttm->sg)
1280                        return -ENOMEM;
1281
1282                ttm->page_flags |= TTM_PAGE_FLAG_SG;
1283                ttm->state = tt_unbound;
1284                return 0;
1285        }
1286
1287        if (ttm->page_flags & TTM_PAGE_FLAG_SG) {
1288                if (!ttm->sg) {
1289                        struct dma_buf_attachment *attach;
1290                        struct sg_table *sgt;
1291
1292                        attach = gtt->gobj->import_attach;
1293                        sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
1294                        if (IS_ERR(sgt))
1295                                return PTR_ERR(sgt);
1296
1297                        ttm->sg = sgt;
1298                }
1299
1300                drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
1301                                                 gtt->ttm.dma_address,
1302                                                 ttm->num_pages);
1303                ttm->state = tt_unbound;
1304                return 0;
1305        }
1306
1307#ifdef CONFIG_SWIOTLB
1308        if (adev->need_swiotlb && swiotlb_nr_tbl()) {
1309                return ttm_dma_populate(&gtt->ttm, adev->dev, ctx);
1310        }
1311#endif
1312
1313        /* fall back to generic helper to populate the page array
1314         * and map them to the device */
1315        return ttm_populate_and_map_pages(adev->dev, &gtt->ttm, ctx);
1316}
1317
1318/**
1319 * amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays
1320 *
1321 * Unmaps pages of a ttm_tt object from the device address space and
1322 * unpopulates the page array backing it.
1323 */
1324static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
1325{
1326        struct amdgpu_ttm_tt *gtt = (void *)ttm;
1327        struct amdgpu_device *adev;
1328
1329        if (gtt && gtt->userptr) {
1330                amdgpu_ttm_tt_set_user_pages(ttm, NULL);
1331                kfree(ttm->sg);
1332                ttm->page_flags &= ~TTM_PAGE_FLAG_SG;
1333                return;
1334        }
1335
1336        if (ttm->sg && gtt->gobj->import_attach) {
1337                struct dma_buf_attachment *attach;
1338
1339                attach = gtt->gobj->import_attach;
1340                dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL);
1341                ttm->sg = NULL;
1342                return;
1343        }
1344
1345        if (ttm->page_flags & TTM_PAGE_FLAG_SG)
1346                return;
1347
1348        adev = amdgpu_ttm_adev(ttm->bdev);
1349
1350#ifdef CONFIG_SWIOTLB
1351        if (adev->need_swiotlb && swiotlb_nr_tbl()) {
1352                ttm_dma_unpopulate(&gtt->ttm, adev->dev);
1353                return;
1354        }
1355#endif
1356
1357        /* fall back to generic helper to unmap and unpopulate array */
1358        ttm_unmap_and_unpopulate_pages(adev->dev, &gtt->ttm);
1359}
1360
1361/**
1362 * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt for the current
1363 * task
1364 *
1365 * @ttm: The ttm_tt object to bind this userptr object to
1366 * @addr:  The address in the current tasks VM space to use
1367 * @flags: Requirements of userptr object.
1368 *
1369 * Called by amdgpu_gem_userptr_ioctl() to bind userptr pages
1370 * to current task
1371 */
1372int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
1373                              uint32_t flags)
1374{
1375        struct amdgpu_ttm_tt *gtt = (void *)ttm;
1376
1377        if (gtt == NULL)
1378                return -EINVAL;
1379
1380        gtt->userptr = addr;
1381        gtt->userflags = flags;
1382
1383        if (gtt->usertask)
1384                put_task_struct(gtt->usertask);
1385        gtt->usertask = current->group_leader;
1386        get_task_struct(gtt->usertask);
1387
1388        return 0;
1389}
1390
1391/**
1392 * amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object
1393 */
1394struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
1395{
1396        struct amdgpu_ttm_tt *gtt = (void *)ttm;
1397
1398        if (gtt == NULL)
1399                return NULL;
1400
1401        if (gtt->usertask == NULL)
1402                return NULL;
1403
1404        return gtt->usertask->mm;
1405}
1406
1407/**
1408 * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays inside an
1409 * address range for the current task.
1410 *
1411 */
1412bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
1413                                  unsigned long end)
1414{
1415        struct amdgpu_ttm_tt *gtt = (void *)ttm;
1416        unsigned long size;
1417
1418        if (gtt == NULL || !gtt->userptr)
1419                return false;
1420
1421        /* Return false if no part of the ttm_tt object lies within
1422         * the range
1423         */
1424        size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE;
1425        if (gtt->userptr > end || gtt->userptr + size <= start)
1426                return false;
1427
1428        return true;
1429}
1430
1431/**
1432 * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
1433 */
1434bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
1435{
1436        struct amdgpu_ttm_tt *gtt = (void *)ttm;
1437
1438        if (gtt == NULL || !gtt->userptr)
1439                return false;
1440
1441        return true;
1442}
1443
1444/**
1445 * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only?
1446 */
1447bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
1448{
1449        struct amdgpu_ttm_tt *gtt = (void *)ttm;
1450
1451        if (gtt == NULL)
1452                return false;
1453
1454        return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
1455}
1456
1457/**
1458 * amdgpu_ttm_tt_pde_flags - Compute PDE flags for ttm_tt object
1459 *
1460 * @ttm: The ttm_tt object to compute the flags for
1461 * @mem: The memory registry backing this ttm_tt object
1462 *
1463 * Figure out the flags to use for a VM PDE (Page Directory Entry).
1464 */
1465uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
1466{
1467        uint64_t flags = 0;
1468
1469        if (mem && mem->mem_type != TTM_PL_SYSTEM)
1470                flags |= AMDGPU_PTE_VALID;
1471
1472        if (mem && mem->mem_type == TTM_PL_TT) {
1473                flags |= AMDGPU_PTE_SYSTEM;
1474
1475                if (ttm->caching_state == tt_cached)
1476                        flags |= AMDGPU_PTE_SNOOPED;
1477        }
1478
1479        return flags;
1480}
1481
1482/**
1483 * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
1484 *
1485 * @ttm: The ttm_tt object to compute the flags for
1486 * @mem: The memory registry backing this ttm_tt object
1487
1488 * Figure out the flags to use for a VM PTE (Page Table Entry).
1489 */
1490uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
1491                                 struct ttm_mem_reg *mem)
1492{
1493        uint64_t flags = amdgpu_ttm_tt_pde_flags(ttm, mem);
1494
1495        flags |= adev->gart.gart_pte_flags;
1496        flags |= AMDGPU_PTE_READABLE;
1497
1498        if (!amdgpu_ttm_tt_is_readonly(ttm))
1499                flags |= AMDGPU_PTE_WRITEABLE;
1500
1501        return flags;
1502}
1503
1504/**
1505 * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict a buffer
1506 * object.
1507 *
1508 * Return true if eviction is sensible. Called by ttm_mem_evict_first() on
1509 * behalf of ttm_bo_mem_force_space() which tries to evict buffer objects until
1510 * it can find space for a new object and by ttm_bo_force_list_clean() which is
1511 * used to clean out a memory space.
1512 */
1513static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
1514                                            const struct ttm_place *place)
1515{
1516        unsigned long num_pages = bo->mem.num_pages;
1517        struct drm_mm_node *node = bo->mem.mm_node;
1518        struct dma_resv_list *flist;
1519        struct dma_fence *f;
1520        int i;
1521
1522        if (bo->type == ttm_bo_type_kernel &&
1523            !amdgpu_vm_evictable(ttm_to_amdgpu_bo(bo)))
1524                return false;
1525
1526        /* If bo is a KFD BO, check if the bo belongs to the current process.
1527         * If true, then return false as any KFD process needs all its BOs to
1528         * be resident to run successfully
1529         */
1530        flist = dma_resv_get_list(bo->base.resv);
1531        if (flist) {
1532                for (i = 0; i < flist->shared_count; ++i) {
1533                        f = rcu_dereference_protected(flist->shared[i],
1534                                dma_resv_held(bo->base.resv));
1535                        if (amdkfd_fence_check_mm(f, current->mm))
1536                                return false;
1537                }
1538        }
1539
1540        switch (bo->mem.mem_type) {
1541        case TTM_PL_TT:
1542                return true;
1543
1544        case TTM_PL_VRAM:
1545                /* Check each drm MM node individually */
1546                while (num_pages) {
1547                        if (place->fpfn < (node->start + node->size) &&
1548                            !(place->lpfn && place->lpfn <= node->start))
1549                                return true;
1550
1551                        num_pages -= node->size;
1552                        ++node;
1553                }
1554                return false;
1555
1556        default:
1557                break;
1558        }
1559
1560        return ttm_bo_eviction_valuable(bo, place);
1561}
1562
1563/**
1564 * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
1565 *
1566 * @bo:  The buffer object to read/write
1567 * @offset:  Offset into buffer object
1568 * @buf:  Secondary buffer to write/read from
1569 * @len: Length in bytes of access
1570 * @write:  true if writing
1571 *
1572 * This is used to access VRAM that backs a buffer object via MMIO
1573 * access for debugging purposes.
1574 */
1575static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
1576                                    unsigned long offset,
1577                                    void *buf, int len, int write)
1578{
1579        struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
1580        struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
1581        struct drm_mm_node *nodes;
1582        uint32_t value = 0;
1583        int ret = 0;
1584        uint64_t pos;
1585        unsigned long flags;
1586
1587        if (bo->mem.mem_type != TTM_PL_VRAM)
1588                return -EIO;
1589
1590        nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
1591        pos = (nodes->start << PAGE_SHIFT) + offset;
1592
1593        while (len && pos < adev->gmc.mc_vram_size) {
1594                uint64_t aligned_pos = pos & ~(uint64_t)3;
1595                uint64_t bytes = 4 - (pos & 3);
1596                uint32_t shift = (pos & 3) * 8;
1597                uint32_t mask = 0xffffffff << shift;
1598
1599                if (len < bytes) {
1600                        mask &= 0xffffffff >> (bytes - len) * 8;
1601                        bytes = len;
1602                }
1603
1604                if (mask != 0xffffffff) {
1605                        spin_lock_irqsave(&adev->mmio_idx_lock, flags);
1606                        WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
1607                        WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
1608                        if (!write || mask != 0xffffffff)
1609                                value = RREG32_NO_KIQ(mmMM_DATA);
1610                        if (write) {
1611                                value &= ~mask;
1612                                value |= (*(uint32_t *)buf << shift) & mask;
1613                                WREG32_NO_KIQ(mmMM_DATA, value);
1614                        }
1615                        spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
1616                        if (!write) {
1617                                value = (value & mask) >> shift;
1618                                memcpy(buf, &value, bytes);
1619                        }
1620                } else {
1621                        bytes = (nodes->start + nodes->size) << PAGE_SHIFT;
1622                        bytes = min(bytes - pos, (uint64_t)len & ~0x3ull);
1623
1624                        amdgpu_device_vram_access(adev, pos, (uint32_t *)buf,
1625                                                  bytes, write);
1626                }
1627
1628                ret += bytes;
1629                buf = (uint8_t *)buf + bytes;
1630                pos += bytes;
1631                len -= bytes;
1632                if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) {
1633                        ++nodes;
1634                        pos = (nodes->start << PAGE_SHIFT);
1635                }
1636        }
1637
1638        return ret;
1639}
1640
1641static struct ttm_bo_driver amdgpu_bo_driver = {
1642        .ttm_tt_create = &amdgpu_ttm_tt_create,
1643        .ttm_tt_populate = &amdgpu_ttm_tt_populate,
1644        .ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate,
1645        .init_mem_type = &amdgpu_init_mem_type,
1646        .eviction_valuable = amdgpu_ttm_bo_eviction_valuable,
1647        .evict_flags = &amdgpu_evict_flags,
1648        .move = &amdgpu_bo_move,
1649        .verify_access = &amdgpu_verify_access,
1650        .move_notify = &amdgpu_bo_move_notify,
1651        .release_notify = &amdgpu_bo_release_notify,
1652        .fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
1653        .io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
1654        .io_mem_free = &amdgpu_ttm_io_mem_free,
1655        .io_mem_pfn = amdgpu_ttm_io_mem_pfn,
1656        .access_memory = &amdgpu_ttm_access_memory,
1657        .del_from_lru_notify = &amdgpu_vm_del_from_lru_notify
1658};
1659
1660/*
1661 * Firmware Reservation functions
1662 */
1663/**
1664 * amdgpu_ttm_fw_reserve_vram_fini - free fw reserved vram
1665 *
1666 * @adev: amdgpu_device pointer
1667 *
1668 * free fw reserved vram if it has been reserved.
1669 */
1670static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
1671{
1672        amdgpu_bo_free_kernel(&adev->fw_vram_usage.reserved_bo,
1673                NULL, &adev->fw_vram_usage.va);
1674}
1675
1676/**
1677 * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw
1678 *
1679 * @adev: amdgpu_device pointer
1680 *
1681 * create bo vram reservation from fw.
1682 */
1683static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
1684{
1685        uint64_t vram_size = adev->gmc.visible_vram_size;
1686
1687        adev->fw_vram_usage.va = NULL;
1688        adev->fw_vram_usage.reserved_bo = NULL;
1689
1690        if (adev->fw_vram_usage.size == 0 ||
1691            adev->fw_vram_usage.size > vram_size)
1692                return 0;
1693
1694        return amdgpu_bo_create_kernel_at(adev,
1695                                          adev->fw_vram_usage.start_offset,
1696                                          adev->fw_vram_usage.size,
1697                                          AMDGPU_GEM_DOMAIN_VRAM,
1698                                          &adev->fw_vram_usage.reserved_bo,
1699                                          &adev->fw_vram_usage.va);
1700}
1701
1702/*
1703 * Memoy training reservation functions
1704 */
1705
1706/**
1707 * amdgpu_ttm_training_reserve_vram_fini - free memory training reserved vram
1708 *
1709 * @adev: amdgpu_device pointer
1710 *
1711 * free memory training reserved vram if it has been reserved.
1712 */
1713static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev)
1714{
1715        struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1716
1717        ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
1718        amdgpu_bo_free_kernel(&ctx->c2p_bo, NULL, NULL);
1719        ctx->c2p_bo = NULL;
1720
1721        return 0;
1722}
1723
1724static u64 amdgpu_ttm_training_get_c2p_offset(u64 vram_size)
1725{
1726       if ((vram_size & (SZ_1M - 1)) < (SZ_4K + 1) )
1727               vram_size -= SZ_1M;
1728
1729       return ALIGN(vram_size, SZ_1M);
1730}
1731
1732/**
1733 * amdgpu_ttm_training_reserve_vram_init - create bo vram reservation from memory training
1734 *
1735 * @adev: amdgpu_device pointer
1736 *
1737 * create bo vram reservation from memory training.
1738 */
1739static int amdgpu_ttm_training_reserve_vram_init(struct amdgpu_device *adev)
1740{
1741        int ret;
1742        struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1743
1744        memset(ctx, 0, sizeof(*ctx));
1745        if (!adev->fw_vram_usage.mem_train_support) {
1746                DRM_DEBUG("memory training does not support!\n");
1747                return 0;
1748        }
1749
1750        ctx->c2p_train_data_offset = amdgpu_ttm_training_get_c2p_offset(adev->gmc.mc_vram_size);
1751        ctx->p2c_train_data_offset = (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
1752        ctx->train_data_size = GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES;
1753
1754        DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
1755                  ctx->train_data_size,
1756                  ctx->p2c_train_data_offset,
1757                  ctx->c2p_train_data_offset);
1758
1759        ret = amdgpu_bo_create_kernel_at(adev,
1760                                         ctx->c2p_train_data_offset,
1761                                         ctx->train_data_size,
1762                                         AMDGPU_GEM_DOMAIN_VRAM,
1763                                         &ctx->c2p_bo,
1764                                         NULL);
1765        if (ret) {
1766                DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
1767                amdgpu_ttm_training_reserve_vram_fini(adev);
1768                return ret;
1769        }
1770
1771        ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
1772        return 0;
1773}
1774
1775/**
1776 * amdgpu_ttm_init - Init the memory management (ttm) as well as various
1777 * gtt/vram related fields.
1778 *
1779 * This initializes all of the memory space pools that the TTM layer
1780 * will need such as the GTT space (system memory mapped to the device),
1781 * VRAM (on-board memory), and on-chip memories (GDS, GWS, OA) which
1782 * can be mapped per VMID.
1783 */
1784int amdgpu_ttm_init(struct amdgpu_device *adev)
1785{
1786        uint64_t gtt_size;
1787        int r;
1788        u64 vis_vram_limit;
1789        void *stolen_vga_buf;
1790
1791        mutex_init(&adev->mman.gtt_window_lock);
1792
1793        /* No others user of address space so set it to 0 */
1794        r = ttm_bo_device_init(&adev->mman.bdev,
1795                               &amdgpu_bo_driver,
1796                               adev->ddev->anon_inode->i_mapping,
1797                               adev->ddev->vma_offset_manager,
1798                               dma_addressing_limited(adev->dev));
1799        if (r) {
1800                DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
1801                return r;
1802        }
1803        adev->mman.initialized = true;
1804
1805        /* We opt to avoid OOM on system pages allocations */
1806        adev->mman.bdev.no_retry = true;
1807
1808        /* Initialize VRAM pool with all of VRAM divided into pages */
1809        r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
1810                                adev->gmc.real_vram_size >> PAGE_SHIFT);
1811        if (r) {
1812                DRM_ERROR("Failed initializing VRAM heap.\n");
1813                return r;
1814        }
1815
1816        /* Reduce size of CPU-visible VRAM if requested */
1817        vis_vram_limit = (u64)amdgpu_vis_vram_limit * 1024 * 1024;
1818        if (amdgpu_vis_vram_limit > 0 &&
1819            vis_vram_limit <= adev->gmc.visible_vram_size)
1820                adev->gmc.visible_vram_size = vis_vram_limit;
1821
1822        /* Change the size here instead of the init above so only lpfn is affected */
1823        amdgpu_ttm_set_buffer_funcs_status(adev, false);
1824#ifdef CONFIG_64BIT
1825        adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
1826                                                adev->gmc.visible_vram_size);
1827#endif
1828
1829        /*
1830         *The reserved vram for firmware must be pinned to the specified
1831         *place on the VRAM, so reserve it early.
1832         */
1833        r = amdgpu_ttm_fw_reserve_vram_init(adev);
1834        if (r) {
1835                return r;
1836        }
1837
1838        /*
1839         *The reserved vram for memory training must be pinned to the specified
1840         *place on the VRAM, so reserve it early.
1841         */
1842        if (!amdgpu_sriov_vf(adev)) {
1843                r = amdgpu_ttm_training_reserve_vram_init(adev);
1844                if (r)
1845                        return r;
1846        }
1847
1848        /* allocate memory as required for VGA
1849         * This is used for VGA emulation and pre-OS scanout buffers to
1850         * avoid display artifacts while transitioning between pre-OS
1851         * and driver.  */
1852        r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
1853                                    AMDGPU_GEM_DOMAIN_VRAM,
1854                                    &adev->stolen_vga_memory,
1855                                    NULL, &stolen_vga_buf);
1856        if (r)
1857                return r;
1858
1859        /*
1860         * reserve one TMR (64K) memory at the top of VRAM which holds
1861         * IP Discovery data and is protected by PSP.
1862         */
1863        r = amdgpu_bo_create_kernel_at(adev,
1864                                       adev->gmc.real_vram_size - DISCOVERY_TMR_SIZE,
1865                                       DISCOVERY_TMR_SIZE,
1866                                       AMDGPU_GEM_DOMAIN_VRAM,
1867                                       &adev->discovery_memory,
1868                                       NULL);
1869        if (r)
1870                return r;
1871
1872        DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
1873                 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
1874
1875        /* Compute GTT size, either bsaed on 3/4th the size of RAM size
1876         * or whatever the user passed on module init */
1877        if (amdgpu_gtt_size == -1) {
1878                struct sysinfo si;
1879
1880                si_meminfo(&si);
1881                gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
1882                               adev->gmc.mc_vram_size),
1883                               ((uint64_t)si.totalram * si.mem_unit * 3/4));
1884        }
1885        else
1886                gtt_size = (uint64_t)amdgpu_gtt_size << 20;
1887
1888        /* Initialize GTT memory pool */
1889        r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT);
1890        if (r) {
1891                DRM_ERROR("Failed initializing GTT heap.\n");
1892                return r;
1893        }
1894        DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
1895                 (unsigned)(gtt_size / (1024 * 1024)));
1896
1897        /* Initialize various on-chip memory pools */
1898        r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
1899                           adev->gds.gds_size);
1900        if (r) {
1901                DRM_ERROR("Failed initializing GDS heap.\n");
1902                return r;
1903        }
1904
1905        r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
1906                           adev->gds.gws_size);
1907        if (r) {
1908                DRM_ERROR("Failed initializing gws heap.\n");
1909                return r;
1910        }
1911
1912        r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
1913                           adev->gds.oa_size);
1914        if (r) {
1915                DRM_ERROR("Failed initializing oa heap.\n");
1916                return r;
1917        }
1918
1919        return 0;
1920}
1921
1922/**
1923 * amdgpu_ttm_late_init - Handle any late initialization for amdgpu_ttm
1924 */
1925void amdgpu_ttm_late_init(struct amdgpu_device *adev)
1926{
1927        void *stolen_vga_buf;
1928        /* return the VGA stolen memory (if any) back to VRAM */
1929        amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf);
1930}
1931
1932/**
1933 * amdgpu_ttm_fini - De-initialize the TTM memory pools
1934 */
1935void amdgpu_ttm_fini(struct amdgpu_device *adev)
1936{
1937        if (!adev->mman.initialized)
1938                return;
1939
1940        amdgpu_ttm_training_reserve_vram_fini(adev);
1941        /* return the IP Discovery TMR memory back to VRAM */
1942        amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL);
1943        amdgpu_ttm_fw_reserve_vram_fini(adev);
1944
1945        if (adev->mman.aper_base_kaddr)
1946                iounmap(adev->mman.aper_base_kaddr);
1947        adev->mman.aper_base_kaddr = NULL;
1948
1949        ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM);
1950        ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT);
1951        ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS);
1952        ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
1953        ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
1954        ttm_bo_device_release(&adev->mman.bdev);
1955        adev->mman.initialized = false;
1956        DRM_INFO("amdgpu: ttm finalized\n");
1957}
1958
1959/**
1960 * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer functions
1961 *
1962 * @adev: amdgpu_device pointer
1963 * @enable: true when we can use buffer functions.
1964 *
1965 * Enable/disable use of buffer functions during suspend/resume. This should
1966 * only be called at bootup or when userspace isn't running.
1967 */
1968void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
1969{
1970        struct ttm_mem_type_manager *man = &adev->mman.bdev.man[TTM_PL_VRAM];
1971        uint64_t size;
1972        int r;
1973
1974        if (!adev->mman.initialized || adev->in_gpu_reset ||
1975            adev->mman.buffer_funcs_enabled == enable)
1976                return;
1977
1978        if (enable) {
1979                struct amdgpu_ring *ring;
1980                struct drm_gpu_scheduler *sched;
1981
1982                ring = adev->mman.buffer_funcs_ring;
1983                sched = &ring->sched;
1984                r = drm_sched_entity_init(&adev->mman.entity,
1985                                          DRM_SCHED_PRIORITY_KERNEL, &sched,
1986                                          1, NULL);
1987                if (r) {
1988                        DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
1989                                  r);
1990                        return;
1991                }
1992        } else {
1993                drm_sched_entity_destroy(&adev->mman.entity);
1994                dma_fence_put(man->move);
1995                man->move = NULL;
1996        }
1997
1998        /* this just adjusts TTM size idea, which sets lpfn to the correct value */
1999        if (enable)
2000                size = adev->gmc.real_vram_size;
2001        else
2002                size = adev->gmc.visible_vram_size;
2003        man->size = size >> PAGE_SHIFT;
2004        adev->mman.buffer_funcs_enabled = enable;
2005}
2006
2007int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
2008{
2009        struct drm_file *file_priv = filp->private_data;
2010        struct amdgpu_device *adev = file_priv->minor->dev->dev_private;
2011
2012        if (adev == NULL)
2013                return -EINVAL;
2014
2015        return ttm_bo_mmap(filp, vma, &adev->mman.bdev);
2016}
2017
2018static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
2019                             struct ttm_mem_reg *mem, unsigned num_pages,
2020                             uint64_t offset, unsigned window,
2021                             struct amdgpu_ring *ring,
2022                             uint64_t *addr)
2023{
2024        struct amdgpu_ttm_tt *gtt = (void *)bo->ttm;
2025        struct amdgpu_device *adev = ring->adev;
2026        struct ttm_tt *ttm = bo->ttm;
2027        struct amdgpu_job *job;
2028        unsigned num_dw, num_bytes;
2029        dma_addr_t *dma_address;
2030        struct dma_fence *fence;
2031        uint64_t src_addr, dst_addr;
2032        uint64_t flags;
2033        int r;
2034
2035        BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
2036               AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
2037
2038        *addr = adev->gmc.gart_start;
2039        *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
2040                AMDGPU_GPU_PAGE_SIZE;
2041
2042        num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
2043        num_bytes = num_pages * 8;
2044
2045        r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, &job);
2046        if (r)
2047                return r;
2048
2049        src_addr = num_dw * 4;
2050        src_addr += job->ibs[0].gpu_addr;
2051
2052        dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
2053        dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
2054        amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
2055                                dst_addr, num_bytes);
2056
2057        amdgpu_ring_pad_ib(ring, &job->ibs[0]);
2058        WARN_ON(job->ibs[0].length_dw > num_dw);
2059
2060        dma_address = &gtt->ttm.dma_address[offset >> PAGE_SHIFT];
2061        flags = amdgpu_ttm_tt_pte_flags(adev, ttm, mem);
2062        r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags,
2063                            &job->ibs[0].ptr[num_dw]);
2064        if (r)
2065                goto error_free;
2066
2067        r = amdgpu_job_submit(job, &adev->mman.entity,
2068                              AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
2069        if (r)
2070                goto error_free;
2071
2072        dma_fence_put(fence);
2073
2074        return r;
2075
2076error_free:
2077        amdgpu_job_free(job);
2078        return r;
2079}
2080
2081int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
2082                       uint64_t dst_offset, uint32_t byte_count,
2083                       struct dma_resv *resv,
2084                       struct dma_fence **fence, bool direct_submit,
2085                       bool vm_needs_flush)
2086{
2087        struct amdgpu_device *adev = ring->adev;
2088        struct amdgpu_job *job;
2089
2090        uint32_t max_bytes;
2091        unsigned num_loops, num_dw;
2092        unsigned i;
2093        int r;
2094
2095        if (direct_submit && !ring->sched.ready) {
2096                DRM_ERROR("Trying to move memory with ring turned off.\n");
2097                return -EINVAL;
2098        }
2099
2100        max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
2101        num_loops = DIV_ROUND_UP(byte_count, max_bytes);
2102        num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
2103
2104        r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
2105        if (r)
2106                return r;
2107
2108        if (vm_needs_flush) {
2109                job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
2110                job->vm_needs_flush = true;
2111        }
2112        if (resv) {
2113                r = amdgpu_sync_resv(adev, &job->sync, resv,
2114                                     AMDGPU_SYNC_ALWAYS,
2115                                     AMDGPU_FENCE_OWNER_UNDEFINED);
2116                if (r) {
2117                        DRM_ERROR("sync failed (%d).\n", r);
2118                        goto error_free;
2119                }
2120        }
2121
2122        for (i = 0; i < num_loops; i++) {
2123                uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
2124
2125                amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
2126                                        dst_offset, cur_size_in_bytes);
2127
2128                src_offset += cur_size_in_bytes;
2129                dst_offset += cur_size_in_bytes;
2130                byte_count -= cur_size_in_bytes;
2131        }
2132
2133        amdgpu_ring_pad_ib(ring, &job->ibs[0]);
2134        WARN_ON(job->ibs[0].length_dw > num_dw);
2135        if (direct_submit)
2136                r = amdgpu_job_submit_direct(job, ring, fence);
2137        else
2138                r = amdgpu_job_submit(job, &adev->mman.entity,
2139                                      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
2140        if (r)
2141                goto error_free;
2142
2143        return r;
2144
2145error_free:
2146        amdgpu_job_free(job);
2147        DRM_ERROR("Error scheduling IBs (%d)\n", r);
2148        return r;
2149}
2150
2151int amdgpu_fill_buffer(struct amdgpu_bo *bo,
2152                       uint32_t src_data,
2153                       struct dma_resv *resv,
2154                       struct dma_fence **fence)
2155{
2156        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
2157        uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
2158        struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
2159
2160        struct drm_mm_node *mm_node;
2161        unsigned long num_pages;
2162        unsigned int num_loops, num_dw;
2163
2164        struct amdgpu_job *job;
2165        int r;
2166
2167        if (!adev->mman.buffer_funcs_enabled) {
2168                DRM_ERROR("Trying to clear memory with ring turned off.\n");
2169                return -EINVAL;
2170        }
2171
2172        if (bo->tbo.mem.mem_type == TTM_PL_TT) {
2173                r = amdgpu_ttm_alloc_gart(&bo->tbo);
2174                if (r)
2175                        return r;
2176        }
2177
2178        num_pages = bo->tbo.num_pages;
2179        mm_node = bo->tbo.mem.mm_node;
2180        num_loops = 0;
2181        while (num_pages) {
2182                uint64_t byte_count = mm_node->size << PAGE_SHIFT;
2183
2184                num_loops += DIV_ROUND_UP_ULL(byte_count, max_bytes);
2185                num_pages -= mm_node->size;
2186                ++mm_node;
2187        }
2188        num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw;
2189
2190        /* for IB padding */
2191        num_dw += 64;
2192
2193        r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
2194        if (r)
2195                return r;
2196
2197        if (resv) {
2198                r = amdgpu_sync_resv(adev, &job->sync, resv,
2199                                     AMDGPU_SYNC_ALWAYS,
2200                                     AMDGPU_FENCE_OWNER_UNDEFINED);
2201                if (r) {
2202                        DRM_ERROR("sync failed (%d).\n", r);
2203                        goto error_free;
2204                }
2205        }
2206
2207        num_pages = bo->tbo.num_pages;
2208        mm_node = bo->tbo.mem.mm_node;
2209
2210        while (num_pages) {
2211                uint64_t byte_count = mm_node->size << PAGE_SHIFT;
2212                uint64_t dst_addr;
2213
2214                dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem);
2215                while (byte_count) {
2216                        uint32_t cur_size_in_bytes = min_t(uint64_t, byte_count,
2217                                                           max_bytes);
2218
2219                        amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data,
2220                                                dst_addr, cur_size_in_bytes);
2221
2222                        dst_addr += cur_size_in_bytes;
2223                        byte_count -= cur_size_in_bytes;
2224                }
2225
2226                num_pages -= mm_node->size;
2227                ++mm_node;
2228        }
2229
2230        amdgpu_ring_pad_ib(ring, &job->ibs[0]);
2231        WARN_ON(job->ibs[0].length_dw > num_dw);
2232        r = amdgpu_job_submit(job, &adev->mman.entity,
2233                              AMDGPU_FENCE_OWNER_UNDEFINED, fence);
2234        if (r)
2235                goto error_free;
2236
2237        return 0;
2238
2239error_free:
2240        amdgpu_job_free(job);
2241        return r;
2242}
2243
2244#if defined(CONFIG_DEBUG_FS)
2245
2246static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
2247{
2248        struct drm_info_node *node = (struct drm_info_node *)m->private;
2249        unsigned ttm_pl = (uintptr_t)node->info_ent->data;
2250        struct drm_device *dev = node->minor->dev;
2251        struct amdgpu_device *adev = dev->dev_private;
2252        struct ttm_mem_type_manager *man = &adev->mman.bdev.man[ttm_pl];
2253        struct drm_printer p = drm_seq_file_printer(m);
2254
2255        man->func->debug(man, &p);
2256        return 0;
2257}
2258
2259static const struct drm_info_list amdgpu_ttm_debugfs_list[] = {
2260        {"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_VRAM},
2261        {"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_TT},
2262        {"amdgpu_gds_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GDS},
2263        {"amdgpu_gws_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GWS},
2264        {"amdgpu_oa_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_OA},
2265        {"ttm_page_pool", ttm_page_alloc_debugfs, 0, NULL},
2266#ifdef CONFIG_SWIOTLB
2267        {"ttm_dma_page_pool", ttm_dma_page_alloc_debugfs, 0, NULL}
2268#endif
2269};
2270
2271/**
2272 * amdgpu_ttm_vram_read - Linear read access to VRAM
2273 *
2274 * Accesses VRAM via MMIO for debugging purposes.
2275 */
2276static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
2277                                    size_t size, loff_t *pos)
2278{
2279        struct amdgpu_device *adev = file_inode(f)->i_private;
2280        ssize_t result = 0;
2281
2282        if (size & 0x3 || *pos & 0x3)
2283                return -EINVAL;
2284
2285        if (*pos >= adev->gmc.mc_vram_size)
2286                return -ENXIO;
2287
2288        size = min(size, (size_t)(adev->gmc.mc_vram_size - *pos));
2289        while (size) {
2290                size_t bytes = min(size, AMDGPU_TTM_VRAM_MAX_DW_READ * 4);
2291                uint32_t value[AMDGPU_TTM_VRAM_MAX_DW_READ];
2292
2293                amdgpu_device_vram_access(adev, *pos, value, bytes, false);
2294                if (copy_to_user(buf, value, bytes))
2295                        return -EFAULT;
2296
2297                result += bytes;
2298                buf += bytes;
2299                *pos += bytes;
2300                size -= bytes;
2301        }
2302
2303        return result;
2304}
2305
2306/**
2307 * amdgpu_ttm_vram_write - Linear write access to VRAM
2308 *
2309 * Accesses VRAM via MMIO for debugging purposes.
2310 */
2311static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
2312                                    size_t size, loff_t *pos)
2313{
2314        struct amdgpu_device *adev = file_inode(f)->i_private;
2315        ssize_t result = 0;
2316        int r;
2317
2318        if (size & 0x3 || *pos & 0x3)
2319                return -EINVAL;
2320
2321        if (*pos >= adev->gmc.mc_vram_size)
2322                return -ENXIO;
2323
2324        while (size) {
2325                unsigned long flags;
2326                uint32_t value;
2327
2328                if (*pos >= adev->gmc.mc_vram_size)
2329                        return result;
2330
2331                r = get_user(value, (uint32_t *)buf);
2332                if (r)
2333                        return r;
2334
2335                spin_lock_irqsave(&adev->mmio_idx_lock, flags);
2336                WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000);
2337                WREG32_NO_KIQ(mmMM_INDEX_HI, *pos >> 31);
2338                WREG32_NO_KIQ(mmMM_DATA, value);
2339                spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
2340
2341                result += 4;
2342                buf += 4;
2343                *pos += 4;
2344                size -= 4;
2345        }
2346
2347        return result;
2348}
2349
2350static const struct file_operations amdgpu_ttm_vram_fops = {
2351        .owner = THIS_MODULE,
2352        .read = amdgpu_ttm_vram_read,
2353        .write = amdgpu_ttm_vram_write,
2354        .llseek = default_llseek,
2355};
2356
2357#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
2358
2359/**
2360 * amdgpu_ttm_gtt_read - Linear read access to GTT memory
2361 */
2362static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf,
2363                                   size_t size, loff_t *pos)
2364{
2365        struct amdgpu_device *adev = file_inode(f)->i_private;
2366        ssize_t result = 0;
2367        int r;
2368
2369        while (size) {
2370                loff_t p = *pos / PAGE_SIZE;
2371                unsigned off = *pos & ~PAGE_MASK;
2372                size_t cur_size = min_t(size_t, size, PAGE_SIZE - off);
2373                struct page *page;
2374                void *ptr;
2375
2376                if (p >= adev->gart.num_cpu_pages)
2377                        return result;
2378
2379                page = adev->gart.pages[p];
2380                if (page) {
2381                        ptr = kmap(page);
2382                        ptr += off;
2383
2384                        r = copy_to_user(buf, ptr, cur_size);
2385                        kunmap(adev->gart.pages[p]);
2386                } else
2387                        r = clear_user(buf, cur_size);
2388
2389                if (r)
2390                        return -EFAULT;
2391
2392                result += cur_size;
2393                buf += cur_size;
2394                *pos += cur_size;
2395                size -= cur_size;
2396        }
2397
2398        return result;
2399}
2400
2401static const struct file_operations amdgpu_ttm_gtt_fops = {
2402        .owner = THIS_MODULE,
2403        .read = amdgpu_ttm_gtt_read,
2404        .llseek = default_llseek
2405};
2406
2407#endif
2408
2409/**
2410 * amdgpu_iomem_read - Virtual read access to GPU mapped memory
2411 *
2412 * This function is used to read memory that has been mapped to the
2413 * GPU and the known addresses are not physical addresses but instead
2414 * bus addresses (e.g., what you'd put in an IB or ring buffer).
2415 */
2416static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
2417                                 size_t size, loff_t *pos)
2418{
2419        struct amdgpu_device *adev = file_inode(f)->i_private;
2420        struct iommu_domain *dom;
2421        ssize_t result = 0;
2422        int r;
2423
2424        /* retrieve the IOMMU domain if any for this device */
2425        dom = iommu_get_domain_for_dev(adev->dev);
2426
2427        while (size) {
2428                phys_addr_t addr = *pos & PAGE_MASK;
2429                loff_t off = *pos & ~PAGE_MASK;
2430                size_t bytes = PAGE_SIZE - off;
2431                unsigned long pfn;
2432                struct page *p;
2433                void *ptr;
2434
2435                bytes = bytes < size ? bytes : size;
2436
2437                /* Translate the bus address to a physical address.  If
2438                 * the domain is NULL it means there is no IOMMU active
2439                 * and the address translation is the identity
2440                 */
2441                addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
2442
2443                pfn = addr >> PAGE_SHIFT;
2444                if (!pfn_valid(pfn))
2445                        return -EPERM;
2446
2447                p = pfn_to_page(pfn);
2448                if (p->mapping != adev->mman.bdev.dev_mapping)
2449                        return -EPERM;
2450
2451                ptr = kmap(p);
2452                r = copy_to_user(buf, ptr + off, bytes);
2453                kunmap(p);
2454                if (r)
2455                        return -EFAULT;
2456
2457                size -= bytes;
2458                *pos += bytes;
2459                result += bytes;
2460        }
2461
2462        return result;
2463}
2464
2465/**
2466 * amdgpu_iomem_write - Virtual write access to GPU mapped memory
2467 *
2468 * This function is used to write memory that has been mapped to the
2469 * GPU and the known addresses are not physical addresses but instead
2470 * bus addresses (e.g., what you'd put in an IB or ring buffer).
2471 */
2472static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
2473                                 size_t size, loff_t *pos)
2474{
2475        struct amdgpu_device *adev = file_inode(f)->i_private;
2476        struct iommu_domain *dom;
2477        ssize_t result = 0;
2478        int r;
2479
2480        dom = iommu_get_domain_for_dev(adev->dev);
2481
2482        while (size) {
2483                phys_addr_t addr = *pos & PAGE_MASK;
2484                loff_t off = *pos & ~PAGE_MASK;
2485                size_t bytes = PAGE_SIZE - off;
2486                unsigned long pfn;
2487                struct page *p;
2488                void *ptr;
2489
2490                bytes = bytes < size ? bytes : size;
2491
2492                addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
2493
2494                pfn = addr >> PAGE_SHIFT;
2495                if (!pfn_valid(pfn))
2496                        return -EPERM;
2497
2498                p = pfn_to_page(pfn);
2499                if (p->mapping != adev->mman.bdev.dev_mapping)
2500                        return -EPERM;
2501
2502                ptr = kmap(p);
2503                r = copy_from_user(ptr + off, buf, bytes);
2504                kunmap(p);
2505                if (r)
2506                        return -EFAULT;
2507
2508                size -= bytes;
2509                *pos += bytes;
2510                result += bytes;
2511        }
2512
2513        return result;
2514}
2515
2516static const struct file_operations amdgpu_ttm_iomem_fops = {
2517        .owner = THIS_MODULE,
2518        .read = amdgpu_iomem_read,
2519        .write = amdgpu_iomem_write,
2520        .llseek = default_llseek
2521};
2522
2523static const struct {
2524        char *name;
2525        const struct file_operations *fops;
2526        int domain;
2527} ttm_debugfs_entries[] = {
2528        { "amdgpu_vram", &amdgpu_ttm_vram_fops, TTM_PL_VRAM },
2529#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
2530        { "amdgpu_gtt", &amdgpu_ttm_gtt_fops, TTM_PL_TT },
2531#endif
2532        { "amdgpu_iomem", &amdgpu_ttm_iomem_fops, TTM_PL_SYSTEM },
2533};
2534
2535#endif
2536
2537int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
2538{
2539#if defined(CONFIG_DEBUG_FS)
2540        unsigned count;
2541
2542        struct drm_minor *minor = adev->ddev->primary;
2543        struct dentry *ent, *root = minor->debugfs_root;
2544
2545        for (count = 0; count < ARRAY_SIZE(ttm_debugfs_entries); count++) {
2546                ent = debugfs_create_file(
2547                                ttm_debugfs_entries[count].name,
2548                                S_IFREG | S_IRUGO, root,
2549                                adev,
2550                                ttm_debugfs_entries[count].fops);
2551                if (IS_ERR(ent))
2552                        return PTR_ERR(ent);
2553                if (ttm_debugfs_entries[count].domain == TTM_PL_VRAM)
2554                        i_size_write(ent->d_inode, adev->gmc.mc_vram_size);
2555                else if (ttm_debugfs_entries[count].domain == TTM_PL_TT)
2556                        i_size_write(ent->d_inode, adev->gmc.gart_size);
2557                adev->mman.debugfs_entries[count] = ent;
2558        }
2559
2560        count = ARRAY_SIZE(amdgpu_ttm_debugfs_list);
2561
2562#ifdef CONFIG_SWIOTLB
2563        if (!(adev->need_swiotlb && swiotlb_nr_tbl()))
2564                --count;
2565#endif
2566
2567        return amdgpu_debugfs_add_files(adev, amdgpu_ttm_debugfs_list, count);
2568#else
2569        return 0;
2570#endif
2571}
2572