LXR linux/drivers/gpu/drm/amd/amdgpu/amdgpu

   1/*
   2 * Copyright 2008 Advanced Micro Devices, Inc.
   3 * Copyright 2008 Red Hat Inc.
   4 * Copyright 2009 Jerome Glisse.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22 * OTHER DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors: Dave Airlie
  25 *          Alex Deucher
  26 *          Jerome Glisse
  27 */
  28#include <linux/dma-fence-array.h>
  29#include <linux/interval_tree_generic.h>
  30#include <linux/idr.h>
  31#include <drm/drmP.h>
  32#include <drm/amdgpu_drm.h>
  33#include "amdgpu.h"
  34#include "amdgpu_trace.h"
  35#include "amdgpu_amdkfd.h"
  36
  37/*
  38 * GPUVM
  39 * GPUVM is similar to the legacy gart on older asics, however
  40 * rather than there being a single global gart table
  41 * for the entire GPU, there are multiple VM page tables active
  42 * at any given time.  The VM page tables can contain a mix
  43 * vram pages and system memory pages and system memory pages
  44 * can be mapped as snooped (cached system pages) or unsnooped
  45 * (uncached system pages).
  46 * Each VM has an ID associated with it and there is a page table
  47 * associated with each VMID.  When execting a command buffer,
  48 * the kernel tells the the ring what VMID to use for that command
  49 * buffer.  VMIDs are allocated dynamically as commands are submitted.
  50 * The userspace drivers maintain their own address space and the kernel
  51 * sets up their pages tables accordingly when they submit their
  52 * command buffers and a VMID is assigned.
  53 * Cayman/Trinity support up to 8 active VMs at any given time;
  54 * SI supports 16.
  55 */
  56
  57#define START(node) ((node)->start)
  58#define LAST(node) ((node)->last)
  59
  60INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last,
  61                     START, LAST, static, amdgpu_vm_it)
  62
  63#undef START
  64#undef LAST
  65
  66/* Local structure. Encapsulate some VM table update parameters to reduce
  67 * the number of function parameters
  68 */
  69struct amdgpu_pte_update_params {
  70        /* amdgpu device we do this update for */
  71        struct amdgpu_device *adev;
  72        /* optional amdgpu_vm we do this update for */
  73        struct amdgpu_vm *vm;
  74        /* address where to copy page table entries from */
  75        uint64_t src;
  76        /* indirect buffer to fill with commands */
  77        struct amdgpu_ib *ib;
  78        /* Function which actually does the update */
  79        void (*func)(struct amdgpu_pte_update_params *params,
  80                     struct amdgpu_bo *bo, uint64_t pe,
  81                     uint64_t addr, unsigned count, uint32_t incr,
  82                     uint64_t flags);
  83        /* The next two are used during VM update by CPU
  84         *  DMA addresses to use for mapping
  85         *  Kernel pointer of PD/PT BO that needs to be updated
  86         */
  87        dma_addr_t *pages_addr;
  88        void *kptr;
  89};
  90
  91/* Helper to disable partial resident texture feature from a fence callback */
  92struct amdgpu_prt_cb {
  93        struct amdgpu_device *adev;
  94        struct dma_fence_cb cb;
  95};
  96
  97static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
  98                                   struct amdgpu_vm *vm,
  99                                   struct amdgpu_bo *bo)
 100{
 101        base->vm = vm;
 102        base->bo = bo;
 103        INIT_LIST_HEAD(&base->bo_list);
 104        INIT_LIST_HEAD(&base->vm_status);
 105
 106        if (!bo)
 107                return;
 108        list_add_tail(&base->bo_list, &bo->va);
 109
 110        if (bo->tbo.type == ttm_bo_type_kernel)
 111                list_move(&base->vm_status, &vm->relocated);
 112
 113        if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
 114                return;
 115
 116        if (bo->preferred_domains &
 117            amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
 118                return;
 119
 120        /*
 121         * we checked all the prerequisites, but it looks like this per vm bo
 122         * is currently evicted. add the bo to the evicted list to make sure it
 123         * is validated on next vm use to avoid fault.
 124         * */
 125        list_move_tail(&base->vm_status, &vm->evicted);
 126}
 127
 128/**
 129 * amdgpu_vm_level_shift - return the addr shift for each level
 130 *
 131 * @adev: amdgpu_device pointer
 132 *
 133 * Returns the number of bits the pfn needs to be right shifted for a level.
 134 */
 135static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
 136                                      unsigned level)
 137{
 138        unsigned shift = 0xff;
 139
 140        switch (level) {
 141        case AMDGPU_VM_PDB2:
 142        case AMDGPU_VM_PDB1:
 143        case AMDGPU_VM_PDB0:
 144                shift = 9 * (AMDGPU_VM_PDB0 - level) +
 145                        adev->vm_manager.block_size;
 146                break;
 147        case AMDGPU_VM_PTB:
 148                shift = 0;
 149                break;
 150        default:
 151                dev_err(adev->dev, "the level%d isn't supported.\n", level);
 152        }
 153
 154        return shift;
 155}
 156
 157/**
 158 * amdgpu_vm_num_entries - return the number of entries in a PD/PT
 159 *
 160 * @adev: amdgpu_device pointer
 161 *
 162 * Calculate the number of entries in a page directory or page table.
 163 */
 164static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
 165                                      unsigned level)
 166{
 167        unsigned shift = amdgpu_vm_level_shift(adev,
 168                                               adev->vm_manager.root_level);
 169
 170        if (level == adev->vm_manager.root_level)
 171                /* For the root directory */
 172                return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift;
 173        else if (level != AMDGPU_VM_PTB)
 174                /* Everything in between */
 175                return 512;
 176        else
 177                /* For the page tables on the leaves */
 178                return AMDGPU_VM_PTE_COUNT(adev);
 179}
 180
 181/**
 182 * amdgpu_vm_bo_size - returns the size of the BOs in bytes
 183 *
 184 * @adev: amdgpu_device pointer
 185 *
 186 * Calculate the size of the BO for a page directory or page table in bytes.
 187 */
 188static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
 189{
 190        return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
 191}
 192
 193/**
 194 * amdgpu_vm_get_pd_bo - add the VM PD to a validation list
 195 *
 196 * @vm: vm providing the BOs
 197 * @validated: head of validation list
 198 * @entry: entry to add
 199 *
 200 * Add the page directory to the list of BOs to
 201 * validate for command submission.
 202 */
 203void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 204                         struct list_head *validated,
 205                         struct amdgpu_bo_list_entry *entry)
 206{
 207        entry->robj = vm->root.base.bo;
 208        entry->priority = 0;
 209        entry->tv.bo = &entry->robj->tbo;
 210        entry->tv.shared = true;
 211        entry->user_pages = NULL;
 212        list_add(&entry->tv.head, validated);
 213}
 214
 215/**
 216 * amdgpu_vm_validate_pt_bos - validate the page table BOs
 217 *
 218 * @adev: amdgpu device pointer
 219 * @vm: vm providing the BOs
 220 * @validate: callback to do the validation
 221 * @param: parameter for the validation callback
 222 *
 223 * Validate the page table BOs on command submission if neccessary.
 224 */
 225int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 226                              int (*validate)(void *p, struct amdgpu_bo *bo),
 227                              void *param)
 228{
 229        struct ttm_bo_global *glob = adev->mman.bdev.glob;
 230        struct amdgpu_vm_bo_base *bo_base, *tmp;
 231        int r = 0;
 232
 233        list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
 234                struct amdgpu_bo *bo = bo_base->bo;
 235
 236                if (bo->parent) {
 237                        r = validate(param, bo);
 238                        if (r)
 239                                break;
 240
 241                        spin_lock(&glob->lru_lock);
 242                        ttm_bo_move_to_lru_tail(&bo->tbo);
 243                        if (bo->shadow)
 244                                ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
 245                        spin_unlock(&glob->lru_lock);
 246                }
 247
 248                if (bo->tbo.type != ttm_bo_type_kernel) {
 249                        spin_lock(&vm->moved_lock);
 250                        list_move(&bo_base->vm_status, &vm->moved);
 251                        spin_unlock(&vm->moved_lock);
 252                } else {
 253                        list_move(&bo_base->vm_status, &vm->relocated);
 254                }
 255        }
 256
 257        spin_lock(&glob->lru_lock);
 258        list_for_each_entry(bo_base, &vm->idle, vm_status) {
 259                struct amdgpu_bo *bo = bo_base->bo;
 260
 261                if (!bo->parent)
 262                        continue;
 263
 264                ttm_bo_move_to_lru_tail(&bo->tbo);
 265                if (bo->shadow)
 266                        ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
 267        }
 268        spin_unlock(&glob->lru_lock);
 269
 270        return r;
 271}
 272
 273/**
 274 * amdgpu_vm_ready - check VM is ready for updates
 275 *
 276 * @vm: VM to check
 277 *
 278 * Check if all VM PDs/PTs are ready for updates
 279 */
 280bool amdgpu_vm_ready(struct amdgpu_vm *vm)
 281{
 282        return list_empty(&vm->evicted);
 283}
 284
 285/**
 286 * amdgpu_vm_clear_bo - initially clear the PDs/PTs
 287 *
 288 * @adev: amdgpu_device pointer
 289 * @bo: BO to clear
 290 * @level: level this BO is at
 291 *
 292 * Root PD needs to be reserved when calling this.
 293 */
 294static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 295                              struct amdgpu_vm *vm, struct amdgpu_bo *bo,
 296                              unsigned level, bool pte_support_ats)
 297{
 298        struct ttm_operation_ctx ctx = { true, false };
 299        struct dma_fence *fence = NULL;
 300        unsigned entries, ats_entries;
 301        struct amdgpu_ring *ring;
 302        struct amdgpu_job *job;
 303        uint64_t addr;
 304        int r;
 305
 306        addr = amdgpu_bo_gpu_offset(bo);
 307        entries = amdgpu_bo_size(bo) / 8;
 308
 309        if (pte_support_ats) {
 310                if (level == adev->vm_manager.root_level) {
 311                        ats_entries = amdgpu_vm_level_shift(adev, level);
 312                        ats_entries += AMDGPU_GPU_PAGE_SHIFT;
 313                        ats_entries = AMDGPU_VA_HOLE_START >> ats_entries;
 314                        ats_entries = min(ats_entries, entries);
 315                        entries -= ats_entries;
 316                } else {
 317                        ats_entries = entries;
 318                        entries = 0;
 319                }
 320        } else {
 321                ats_entries = 0;
 322        }
 323
 324        ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
 325
 326        r = reservation_object_reserve_shared(bo->tbo.resv);
 327        if (r)
 328                return r;
 329
 330        r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 331        if (r)
 332                goto error;
 333
 334        r = amdgpu_job_alloc_with_ib(adev, 64, &job);
 335        if (r)
 336                goto error;
 337
 338        if (ats_entries) {
 339                uint64_t ats_value;
 340
 341                ats_value = AMDGPU_PTE_DEFAULT_ATC;
 342                if (level != AMDGPU_VM_PTB)
 343                        ats_value |= AMDGPU_PDE_PTE;
 344
 345                amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
 346                                      ats_entries, 0, ats_value);
 347                addr += ats_entries * 8;
 348        }
 349
 350        if (entries)
 351                amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
 352                                      entries, 0, 0);
 353
 354        amdgpu_ring_pad_ib(ring, &job->ibs[0]);
 355
 356        WARN_ON(job->ibs[0].length_dw > 64);
 357        r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
 358                             AMDGPU_FENCE_OWNER_UNDEFINED, false);
 359        if (r)
 360                goto error_free;
 361
 362        r = amdgpu_job_submit(job, ring, &vm->entity,
 363                              AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
 364        if (r)
 365                goto error_free;
 366
 367        amdgpu_bo_fence(bo, fence, true);
 368        dma_fence_put(fence);
 369
 370        if (bo->shadow)
 371                return amdgpu_vm_clear_bo(adev, vm, bo->shadow,
 372                                          level, pte_support_ats);
 373
 374        return 0;
 375
 376error_free:
 377        amdgpu_job_free(job);
 378
 379error:
 380        return r;
 381}
 382
 383/**
 384 * amdgpu_vm_alloc_levels - allocate the PD/PT levels
 385 *
 386 * @adev: amdgpu_device pointer
 387 * @vm: requested vm
 388 * @saddr: start of the address range
 389 * @eaddr: end of the address range
 390 *
 391 * Make sure the page directories and page tables are allocated
 392 */
 393static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 394                                  struct amdgpu_vm *vm,
 395                                  struct amdgpu_vm_pt *parent,
 396                                  uint64_t saddr, uint64_t eaddr,
 397                                  unsigned level, bool ats)
 398{
 399        unsigned shift = amdgpu_vm_level_shift(adev, level);
 400        unsigned pt_idx, from, to;
 401        u64 flags;
 402        int r;
 403
 404        if (!parent->entries) {
 405                unsigned num_entries = amdgpu_vm_num_entries(adev, level);
 406
 407                parent->entries = kvmalloc_array(num_entries,
 408                                                   sizeof(struct amdgpu_vm_pt),
 409                                                   GFP_KERNEL | __GFP_ZERO);
 410                if (!parent->entries)
 411                        return -ENOMEM;
 412                memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
 413        }
 414
 415        from = saddr >> shift;
 416        to = eaddr >> shift;
 417        if (from >= amdgpu_vm_num_entries(adev, level) ||
 418            to >= amdgpu_vm_num_entries(adev, level))
 419                return -EINVAL;
 420
 421        ++level;
 422        saddr = saddr & ((1 << shift) - 1);
 423        eaddr = eaddr & ((1 << shift) - 1);
 424
 425        flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
 426        if (vm->use_cpu_for_update)
 427                flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 428        else
 429                flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
 430                                AMDGPU_GEM_CREATE_SHADOW);
 431
 432        /* walk over the address space and allocate the page tables */
 433        for (pt_idx = from; pt_idx <= to; ++pt_idx) {
 434                struct reservation_object *resv = vm->root.base.bo->tbo.resv;
 435                struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
 436                struct amdgpu_bo *pt;
 437
 438                if (!entry->base.bo) {
 439                        struct amdgpu_bo_param bp;
 440
 441                        memset(&bp, 0, sizeof(bp));
 442                        bp.size = amdgpu_vm_bo_size(adev, level);
 443                        bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
 444                        bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
 445                        bp.flags = flags;
 446                        bp.type = ttm_bo_type_kernel;
 447                        bp.resv = resv;
 448                        r = amdgpu_bo_create(adev, &bp, &pt);
 449                        if (r)
 450                                return r;
 451
 452                        r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats);
 453                        if (r) {
 454                                amdgpu_bo_unref(&pt->shadow);
 455                                amdgpu_bo_unref(&pt);
 456                                return r;
 457                        }
 458
 459                        if (vm->use_cpu_for_update) {
 460                                r = amdgpu_bo_kmap(pt, NULL);
 461                                if (r) {
 462                                        amdgpu_bo_unref(&pt->shadow);
 463                                        amdgpu_bo_unref(&pt);
 464                                        return r;
 465                                }
 466                        }
 467
 468                        /* Keep a reference to the root directory to avoid
 469                        * freeing them up in the wrong order.
 470                        */
 471                        pt->parent = amdgpu_bo_ref(parent->base.bo);
 472
 473                        amdgpu_vm_bo_base_init(&entry->base, vm, pt);
 474                }
 475
 476                if (level < AMDGPU_VM_PTB) {
 477                        uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
 478                        uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
 479                                ((1 << shift) - 1);
 480                        r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
 481                                                   sub_eaddr, level, ats);
 482                        if (r)
 483                                return r;
 484                }
 485        }
 486
 487        return 0;
 488}
 489
 490/**
 491 * amdgpu_vm_alloc_pts - Allocate page tables.
 492 *
 493 * @adev: amdgpu_device pointer
 494 * @vm: VM to allocate page tables for
 495 * @saddr: Start address which needs to be allocated
 496 * @size: Size from start address we need.
 497 *
 498 * Make sure the page tables are allocated.
 499 */
 500int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 501                        struct amdgpu_vm *vm,
 502                        uint64_t saddr, uint64_t size)
 503{
 504        uint64_t eaddr;
 505        bool ats = false;
 506
 507        /* validate the parameters */
 508        if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
 509                return -EINVAL;
 510
 511        eaddr = saddr + size - 1;
 512
 513        if (vm->pte_support_ats)
 514                ats = saddr < AMDGPU_VA_HOLE_START;
 515
 516        saddr /= AMDGPU_GPU_PAGE_SIZE;
 517        eaddr /= AMDGPU_GPU_PAGE_SIZE;
 518
 519        if (eaddr >= adev->vm_manager.max_pfn) {
 520                dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
 521                        eaddr, adev->vm_manager.max_pfn);
 522                return -EINVAL;
 523        }
 524
 525        return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr,
 526                                      adev->vm_manager.root_level, ats);
 527}
 528
 529/**
 530 * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug
 531 *
 532 * @adev: amdgpu_device pointer
 533 */
 534void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
 535{
 536        const struct amdgpu_ip_block *ip_block;
 537        bool has_compute_vm_bug;
 538        struct amdgpu_ring *ring;
 539        int i;
 540
 541        has_compute_vm_bug = false;
 542
 543        ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
 544        if (ip_block) {
 545                /* Compute has a VM bug for GFX version < 7.
 546                   Compute has a VM bug for GFX 8 MEC firmware version < 673.*/
 547                if (ip_block->version->major <= 7)
 548                        has_compute_vm_bug = true;
 549                else if (ip_block->version->major == 8)
 550                        if (adev->gfx.mec_fw_version < 673)
 551                                has_compute_vm_bug = true;
 552        }
 553
 554        for (i = 0; i < adev->num_rings; i++) {
 555                ring = adev->rings[i];
 556                if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
 557                        /* only compute rings */
 558                        ring->has_compute_vm_bug = has_compute_vm_bug;
 559                else
 560                        ring->has_compute_vm_bug = false;
 561        }
 562}
 563
 564bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
 565                                  struct amdgpu_job *job)
 566{
 567        struct amdgpu_device *adev = ring->adev;
 568        unsigned vmhub = ring->funcs->vmhub;
 569        struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
 570        struct amdgpu_vmid *id;
 571        bool gds_switch_needed;
 572        bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug;
 573
 574        if (job->vmid == 0)
 575                return false;
 576        id = &id_mgr->ids[job->vmid];
 577        gds_switch_needed = ring->funcs->emit_gds_switch && (
 578                id->gds_base != job->gds_base ||
 579                id->gds_size != job->gds_size ||
 580                id->gws_base != job->gws_base ||
 581                id->gws_size != job->gws_size ||
 582                id->oa_base != job->oa_base ||
 583                id->oa_size != job->oa_size);
 584
 585        if (amdgpu_vmid_had_gpu_reset(adev, id))
 586                return true;
 587
 588        return vm_flush_needed || gds_switch_needed;
 589}
 590
 591static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
 592{
 593        return (adev->gmc.real_vram_size == adev->gmc.visible_vram_size);
 594}
 595
 596/**
 597 * amdgpu_vm_flush - hardware flush the vm
 598 *
 599 * @ring: ring to use for flush
 600 * @vmid: vmid number to use
 601 * @pd_addr: address of the page directory
 602 *
 603 * Emit a VM flush when it is necessary.
 604 */
 605int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync)
 606{
 607        struct amdgpu_device *adev = ring->adev;
 608        unsigned vmhub = ring->funcs->vmhub;
 609        struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
 610        struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
 611        bool gds_switch_needed = ring->funcs->emit_gds_switch && (
 612                id->gds_base != job->gds_base ||
 613                id->gds_size != job->gds_size ||
 614                id->gws_base != job->gws_base ||
 615                id->gws_size != job->gws_size ||
 616                id->oa_base != job->oa_base ||
 617                id->oa_size != job->oa_size);
 618        bool vm_flush_needed = job->vm_needs_flush;
 619        bool pasid_mapping_needed = id->pasid != job->pasid ||
 620                !id->pasid_mapping ||
 621                !dma_fence_is_signaled(id->pasid_mapping);
 622        struct dma_fence *fence = NULL;
 623        unsigned patch_offset = 0;
 624        int r;
 625
 626        if (amdgpu_vmid_had_gpu_reset(adev, id)) {
 627                gds_switch_needed = true;
 628                vm_flush_needed = true;
 629                pasid_mapping_needed = true;
 630        }
 631
 632        gds_switch_needed &= !!ring->funcs->emit_gds_switch;
 633        vm_flush_needed &= !!ring->funcs->emit_vm_flush;
 634        pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
 635                ring->funcs->emit_wreg;
 636
 637        if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
 638                return 0;
 639
 640        if (ring->funcs->init_cond_exec)
 641                patch_offset = amdgpu_ring_init_cond_exec(ring);
 642
 643        if (need_pipe_sync)
 644                amdgpu_ring_emit_pipeline_sync(ring);
 645
 646        if (vm_flush_needed) {
 647                trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
 648                amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
 649        }
 650
 651        if (pasid_mapping_needed)
 652                amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
 653
 654        if (vm_flush_needed || pasid_mapping_needed) {
 655                r = amdgpu_fence_emit(ring, &fence, 0);
 656                if (r)
 657                        return r;
 658        }
 659
 660        if (vm_flush_needed) {
 661                mutex_lock(&id_mgr->lock);
 662                dma_fence_put(id->last_flush);
 663                id->last_flush = dma_fence_get(fence);
 664                id->current_gpu_reset_count =
 665                        atomic_read(&adev->gpu_reset_counter);
 666                mutex_unlock(&id_mgr->lock);
 667        }
 668
 669        if (pasid_mapping_needed) {
 670                id->pasid = job->pasid;
 671                dma_fence_put(id->pasid_mapping);
 672                id->pasid_mapping = dma_fence_get(fence);
 673        }
 674        dma_fence_put(fence);
 675
 676        if (ring->funcs->emit_gds_switch && gds_switch_needed) {
 677                id->gds_base = job->gds_base;
 678                id->gds_size = job->gds_size;
 679                id->gws_base = job->gws_base;
 680                id->gws_size = job->gws_size;
 681                id->oa_base = job->oa_base;
 682                id->oa_size = job->oa_size;
 683                amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
 684                                            job->gds_size, job->gws_base,
 685                                            job->gws_size, job->oa_base,
 686                                            job->oa_size);
 687        }
 688
 689        if (ring->funcs->patch_cond_exec)
 690                amdgpu_ring_patch_cond_exec(ring, patch_offset);
 691
 692        /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
 693        if (ring->funcs->emit_switch_buffer) {
 694                amdgpu_ring_emit_switch_buffer(ring);
 695                amdgpu_ring_emit_switch_buffer(ring);
 696        }
 697        return 0;
 698}
 699
 700/**
 701 * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
 702 *
 703 * @vm: requested vm
 704 * @bo: requested buffer object
 705 *
 706 * Find @bo inside the requested vm.
 707 * Search inside the @bos vm list for the requested vm
 708 * Returns the found bo_va or NULL if none is found
 709 *
 710 * Object has to be reserved!
 711 */
 712struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
 713                                       struct amdgpu_bo *bo)
 714{
 715        struct amdgpu_bo_va *bo_va;
 716
 717        list_for_each_entry(bo_va, &bo->va, base.bo_list) {
 718                if (bo_va->base.vm == vm) {
 719                        return bo_va;
 720                }
 721        }
 722        return NULL;
 723}
 724
 725/**
 726 * amdgpu_vm_do_set_ptes - helper to call the right asic function
 727 *
 728 * @params: see amdgpu_pte_update_params definition
 729 * @bo: PD/PT to update
 730 * @pe: addr of the page entry
 731 * @addr: dst addr to write into pe
 732 * @count: number of page entries to update
 733 * @incr: increase next addr by incr bytes
 734 * @flags: hw access flags
 735 *
 736 * Traces the parameters and calls the right asic functions
 737 * to setup the page table using the DMA.
 738 */
 739static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
 740                                  struct amdgpu_bo *bo,
 741                                  uint64_t pe, uint64_t addr,
 742                                  unsigned count, uint32_t incr,
 743                                  uint64_t flags)
 744{
 745        pe += amdgpu_bo_gpu_offset(bo);
 746        trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
 747
 748        if (count < 3) {
 749                amdgpu_vm_write_pte(params->adev, params->ib, pe,
 750                                    addr | flags, count, incr);
 751
 752        } else {
 753                amdgpu_vm_set_pte_pde(params->adev, params->ib, pe, addr,
 754                                      count, incr, flags);
 755        }
 756}
 757
 758/**
 759 * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART
 760 *
 761 * @params: see amdgpu_pte_update_params definition
 762 * @bo: PD/PT to update
 763 * @pe: addr of the page entry
 764 * @addr: dst addr to write into pe
 765 * @count: number of page entries to update
 766 * @incr: increase next addr by incr bytes
 767 * @flags: hw access flags
 768 *
 769 * Traces the parameters and calls the DMA function to copy the PTEs.
 770 */
 771static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,
 772                                   struct amdgpu_bo *bo,
 773                                   uint64_t pe, uint64_t addr,
 774                                   unsigned count, uint32_t incr,
 775                                   uint64_t flags)
 776{
 777        uint64_t src = (params->src + (addr >> 12) * 8);
 778
 779        pe += amdgpu_bo_gpu_offset(bo);
 780        trace_amdgpu_vm_copy_ptes(pe, src, count);
 781
 782        amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count);
 783}
 784
 785/**
 786 * amdgpu_vm_map_gart - Resolve gart mapping of addr
 787 *
 788 * @pages_addr: optional DMA address to use for lookup
 789 * @addr: the unmapped addr
 790 *
 791 * Look up the physical address of the page that the pte resolves
 792 * to and return the pointer for the page table entry.
 793 */
 794static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
 795{
 796        uint64_t result;
 797
 798        /* page table offset */
 799        result = pages_addr[addr >> PAGE_SHIFT];
 800
 801        /* in case cpu page size != gpu page size*/
 802        result |= addr & (~PAGE_MASK);
 803
 804        result &= 0xFFFFFFFFFFFFF000ULL;
 805
 806        return result;
 807}
 808
 809/**
 810 * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
 811 *
 812 * @params: see amdgpu_pte_update_params definition
 813 * @bo: PD/PT to update
 814 * @pe: kmap addr of the page entry
 815 * @addr: dst addr to write into pe
 816 * @count: number of page entries to update
 817 * @incr: increase next addr by incr bytes
 818 * @flags: hw access flags
 819 *
 820 * Write count number of PT/PD entries directly.
 821 */
 822static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
 823                                   struct amdgpu_bo *bo,
 824                                   uint64_t pe, uint64_t addr,
 825                                   unsigned count, uint32_t incr,
 826                                   uint64_t flags)
 827{
 828        unsigned int i;
 829        uint64_t value;
 830
 831        pe += (unsigned long)amdgpu_bo_kptr(bo);
 832
 833        trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
 834
 835        for (i = 0; i < count; i++) {
 836                value = params->pages_addr ?
 837                        amdgpu_vm_map_gart(params->pages_addr, addr) :
 838                        addr;
 839                amdgpu_gmc_set_pte_pde(params->adev, (void *)(uintptr_t)pe,
 840                                       i, value, flags);
 841                addr += incr;
 842        }
 843}
 844
 845static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 846                             void *owner)
 847{
 848        struct amdgpu_sync sync;
 849        int r;
 850
 851        amdgpu_sync_create(&sync);
 852        amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false);
 853        r = amdgpu_sync_wait(&sync, true);
 854        amdgpu_sync_free(&sync);
 855
 856        return r;
 857}
 858
 859/*
 860 * amdgpu_vm_update_pde - update a single level in the hierarchy
 861 *
 862 * @param: parameters for the update
 863 * @vm: requested vm
 864 * @parent: parent directory
 865 * @entry: entry to update
 866 *
 867 * Makes sure the requested entry in parent is up to date.
 868 */
 869static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
 870                                 struct amdgpu_vm *vm,
 871                                 struct amdgpu_vm_pt *parent,
 872                                 struct amdgpu_vm_pt *entry)
 873{
 874        struct amdgpu_bo *bo = parent->base.bo, *pbo;
 875        uint64_t pde, pt, flags;
 876        unsigned level;
 877
 878        /* Don't update huge pages here */
 879        if (entry->huge)
 880                return;
 881
 882        for (level = 0, pbo = bo->parent; pbo; ++level)
 883                pbo = pbo->parent;
 884
 885        level += params->adev->vm_manager.root_level;
 886        pt = amdgpu_bo_gpu_offset(entry->base.bo);
 887        flags = AMDGPU_PTE_VALID;
 888        amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags);
 889        pde = (entry - parent->entries) * 8;
 890        if (bo->shadow)
 891                params->func(params, bo->shadow, pde, pt, 1, 0, flags);
 892        params->func(params, bo, pde, pt, 1, 0, flags);
 893}
 894
 895/*
 896 * amdgpu_vm_invalidate_level - mark all PD levels as invalid
 897 *
 898 * @parent: parent PD
 899 *
 900 * Mark all PD level as invalid after an error.
 901 */
 902static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
 903                                       struct amdgpu_vm *vm,
 904                                       struct amdgpu_vm_pt *parent,
 905                                       unsigned level)
 906{
 907        unsigned pt_idx, num_entries;
 908
 909        /*
 910         * Recurse into the subdirectories. This recursion is harmless because
 911         * we only have a maximum of 5 layers.
 912         */
 913        num_entries = amdgpu_vm_num_entries(adev, level);
 914        for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) {
 915                struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
 916
 917                if (!entry->base.bo)
 918                        continue;
 919
 920                if (!entry->base.moved)
 921                        list_move(&entry->base.vm_status, &vm->relocated);
 922                amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
 923        }
 924}
 925
 926/*
 927 * amdgpu_vm_update_directories - make sure that all directories are valid
 928 *
 929 * @adev: amdgpu_device pointer
 930 * @vm: requested vm
 931 *
 932 * Makes sure all directories are up to date.
 933 * Returns 0 for success, error for failure.
 934 */
 935int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 936                                 struct amdgpu_vm *vm)
 937{
 938        struct amdgpu_pte_update_params params;
 939        struct amdgpu_job *job;
 940        unsigned ndw = 0;
 941        int r = 0;
 942
 943        if (list_empty(&vm->relocated))
 944                return 0;
 945
 946restart:
 947        memset(&params, 0, sizeof(params));
 948        params.adev = adev;
 949
 950        if (vm->use_cpu_for_update) {
 951                struct amdgpu_vm_bo_base *bo_base;
 952
 953                list_for_each_entry(bo_base, &vm->relocated, vm_status) {
 954                        r = amdgpu_bo_kmap(bo_base->bo, NULL);
 955                        if (unlikely(r))
 956                                return r;
 957                }
 958
 959                r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
 960                if (unlikely(r))
 961                        return r;
 962
 963                params.func = amdgpu_vm_cpu_set_ptes;
 964        } else {
 965                ndw = 512 * 8;
 966                r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
 967                if (r)
 968                        return r;
 969
 970                params.ib = &job->ibs[0];
 971                params.func = amdgpu_vm_do_set_ptes;
 972        }
 973
 974        while (!list_empty(&vm->relocated)) {
 975                struct amdgpu_vm_bo_base *bo_base, *parent;
 976                struct amdgpu_vm_pt *pt, *entry;
 977                struct amdgpu_bo *bo;
 978
 979                bo_base = list_first_entry(&vm->relocated,
 980                                           struct amdgpu_vm_bo_base,
 981                                           vm_status);
 982                bo_base->moved = false;
 983                list_move(&bo_base->vm_status, &vm->idle);
 984
 985                bo = bo_base->bo->parent;
 986                if (!bo)
 987                        continue;
 988
 989                parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base,
 990                                          bo_list);
 991                pt = container_of(parent, struct amdgpu_vm_pt, base);
 992                entry = container_of(bo_base, struct amdgpu_vm_pt, base);
 993
 994                amdgpu_vm_update_pde(&params, vm, pt, entry);
 995
 996                if (!vm->use_cpu_for_update &&
 997                    (ndw - params.ib->length_dw) < 32)
 998                        break;
 999        }
1000

1001        if (vm->use_cpu_for_update) {
1002                /* Flush HDP */
1003                mb();
1004                amdgpu_asic_flush_hdp(adev, NULL);
1005        } else if (params.ib->length_dw == 0) {
1006                amdgpu_job_free(job);
1007        } else {
1008                struct amdgpu_bo *root = vm->root.base.bo;
1009                struct amdgpu_ring *ring;
1010                struct dma_fence *fence;
1011
1012                ring = container_of(vm->entity.sched, struct amdgpu_ring,
1013                                    sched);
1014
1015                amdgpu_ring_pad_ib(ring, params.ib);
1016                amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
1017                                 AMDGPU_FENCE_OWNER_VM, false);
1018                WARN_ON(params.ib->length_dw > ndw);
1019                r = amdgpu_job_submit(job, ring, &vm->entity,
1020                                      AMDGPU_FENCE_OWNER_VM, &fence);
1021                if (r)
1022                        goto error;
1023
1024                amdgpu_bo_fence(root, fence, true);
1025                dma_fence_put(vm->last_update);
1026                vm->last_update = fence;
1027        }
1028
1029        if (!list_empty(&vm->relocated))
1030                goto restart;
1031
1032        return 0;
1033
1034error:
1035        amdgpu_vm_invalidate_level(adev, vm, &vm->root,
1036                                   adev->vm_manager.root_level);
1037        amdgpu_job_free(job);
1038        return r;
1039}
1040
1041/**
1042 * amdgpu_vm_find_entry - find the entry for an address
1043 *
1044 * @p: see amdgpu_pte_update_params definition
1045 * @addr: virtual address in question
1046 * @entry: resulting entry or NULL
1047 * @parent: parent entry
1048 *
1049 * Find the vm_pt entry and it's parent for the given address.
1050 */
1051void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
1052                         struct amdgpu_vm_pt **entry,
1053                         struct amdgpu_vm_pt **parent)
1054{
1055        unsigned level = p->adev->vm_manager.root_level;
1056
1057        *parent = NULL;
1058        *entry = &p->vm->root;
1059        while ((*entry)->entries) {
1060                unsigned shift = amdgpu_vm_level_shift(p->adev, level++);
1061
1062                *parent = *entry;
1063                *entry = &(*entry)->entries[addr >> shift];
1064                addr &= (1ULL << shift) - 1;
1065        }
1066
1067        if (level != AMDGPU_VM_PTB)
1068                *entry = NULL;
1069}
1070
1071/**
1072 * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
1073 *
1074 * @p: see amdgpu_pte_update_params definition
1075 * @entry: vm_pt entry to check
1076 * @parent: parent entry
1077 * @nptes: number of PTEs updated with this operation
1078 * @dst: destination address where the PTEs should point to
1079 * @flags: access flags fro the PTEs
1080 *
1081 * Check if we can update the PD with a huge page.
1082 */
1083static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
1084                                        struct amdgpu_vm_pt *entry,
1085                                        struct amdgpu_vm_pt *parent,
1086                                        unsigned nptes, uint64_t dst,
1087                                        uint64_t flags)
1088{
1089        uint64_t pde;
1090
1091        /* In the case of a mixed PT the PDE must point to it*/
1092        if (p->adev->asic_type >= CHIP_VEGA10 && !p->src &&
1093            nptes == AMDGPU_VM_PTE_COUNT(p->adev)) {
1094                /* Set the huge page flag to stop scanning at this PDE */
1095                flags |= AMDGPU_PDE_PTE;
1096        }
1097
1098        if (!(flags & AMDGPU_PDE_PTE)) {
1099                if (entry->huge) {
1100                        /* Add the entry to the relocated list to update it. */
1101                        entry->huge = false;
1102                        list_move(&entry->base.vm_status, &p->vm->relocated);
1103                }
1104                return;
1105        }
1106
1107        entry->huge = true;
1108        amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags);
1109
1110        pde = (entry - parent->entries) * 8;
1111        if (parent->base.bo->shadow)
1112                p->func(p, parent->base.bo->shadow, pde, dst, 1, 0, flags);
1113        p->func(p, parent->base.bo, pde, dst, 1, 0, flags);
1114}
1115
1116/**
1117 * amdgpu_vm_update_ptes - make sure that page tables are valid
1118 *
1119 * @params: see amdgpu_pte_update_params definition
1120 * @vm: requested vm
1121 * @start: start of GPU address range
1122 * @end: end of GPU address range
1123 * @dst: destination address to map to, the next dst inside the function
1124 * @flags: mapping flags
1125 *
1126 * Update the page tables in the range @start - @end.
1127 * Returns 0 for success, -EINVAL for failure.
1128 */
1129static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
1130                                  uint64_t start, uint64_t end,
1131                                  uint64_t dst, uint64_t flags)
1132{
1133        struct amdgpu_device *adev = params->adev;
1134        const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
1135
1136        uint64_t addr, pe_start;
1137        struct amdgpu_bo *pt;
1138        unsigned nptes;
1139
1140        /* walk over the address space and update the page tables */
1141        for (addr = start; addr < end; addr += nptes,
1142             dst += nptes * AMDGPU_GPU_PAGE_SIZE) {
1143                struct amdgpu_vm_pt *entry, *parent;
1144
1145                amdgpu_vm_get_entry(params, addr, &entry, &parent);
1146                if (!entry)
1147                        return -ENOENT;
1148
1149                if ((addr & ~mask) == (end & ~mask))
1150                        nptes = end - addr;
1151                else
1152                        nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
1153
1154                amdgpu_vm_handle_huge_pages(params, entry, parent,
1155                                            nptes, dst, flags);
1156                /* We don't need to update PTEs for huge pages */
1157                if (entry->huge)
1158                        continue;
1159
1160                pt = entry->base.bo;
1161                pe_start = (addr & mask) * 8;
1162                if (pt->shadow)
1163                        params->func(params, pt->shadow, pe_start, dst, nptes,
1164                                     AMDGPU_GPU_PAGE_SIZE, flags);
1165                params->func(params, pt, pe_start, dst, nptes,
1166                             AMDGPU_GPU_PAGE_SIZE, flags);
1167        }
1168
1169        return 0;
1170}
1171
1172/*
1173 * amdgpu_vm_frag_ptes - add fragment information to PTEs
1174 *
1175 * @params: see amdgpu_pte_update_params definition
1176 * @vm: requested vm
1177 * @start: first PTE to handle
1178 * @end: last PTE to handle
1179 * @dst: addr those PTEs should point to
1180 * @flags: hw mapping flags
1181 * Returns 0 for success, -EINVAL for failure.
1182 */
1183static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params  *params,
1184                                uint64_t start, uint64_t end,
1185                                uint64_t dst, uint64_t flags)
1186{
1187        /**
1188         * The MC L1 TLB supports variable sized pages, based on a fragment
1189         * field in the PTE. When this field is set to a non-zero value, page
1190         * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
1191         * flags are considered valid for all PTEs within the fragment range
1192         * and corresponding mappings are assumed to be physically contiguous.
1193         *
1194         * The L1 TLB can store a single PTE for the whole fragment,
1195         * significantly increasing the space available for translation
1196         * caching. This leads to large improvements in throughput when the
1197         * TLB is under pressure.
1198         *
1199         * The L2 TLB distributes small and large fragments into two
1200         * asymmetric partitions. The large fragment cache is significantly
1201         * larger. Thus, we try to use large fragments wherever possible.
1202         * Userspace can support this by aligning virtual base address and
1203         * allocation size to the fragment size.
1204         */
1205        unsigned max_frag = params->adev->vm_manager.fragment_size;
1206        int r;
1207
1208        /* system pages are non continuously */
1209        if (params->src || !(flags & AMDGPU_PTE_VALID))
1210                return amdgpu_vm_update_ptes(params, start, end, dst, flags);
1211
1212        while (start != end) {
1213                uint64_t frag_flags, frag_end;
1214                unsigned frag;
1215
1216                /* This intentionally wraps around if no bit is set */
1217                frag = min((unsigned)ffs(start) - 1,
1218                           (unsigned)fls64(end - start) - 1);
1219                if (frag >= max_frag) {
1220                        frag_flags = AMDGPU_PTE_FRAG(max_frag);
1221                        frag_end = end & ~((1ULL << max_frag) - 1);
1222                } else {
1223                        frag_flags = AMDGPU_PTE_FRAG(frag);
1224                        frag_end = start + (1 << frag);
1225                }
1226
1227                r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
1228                                          flags | frag_flags);
1229                if (r)
1230                        return r;
1231
1232                dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
1233                start = frag_end;
1234        }
1235
1236        return 0;
1237}
1238
1239/**
1240 * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
1241 *
1242 * @adev: amdgpu_device pointer
1243 * @exclusive: fence we need to sync to
1244 * @pages_addr: DMA addresses to use for mapping
1245 * @vm: requested vm
1246 * @start: start of mapped range
1247 * @last: last mapped entry
1248 * @flags: flags for the entries
1249 * @addr: addr to set the area to
1250 * @fence: optional resulting fence
1251 *
1252 * Fill in the page table entries between @start and @last.
1253 * Returns 0 for success, -EINVAL for failure.
1254 */
1255static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
1256                                       struct dma_fence *exclusive,
1257                                       dma_addr_t *pages_addr,
1258                                       struct amdgpu_vm *vm,
1259                                       uint64_t start, uint64_t last,
1260                                       uint64_t flags, uint64_t addr,
1261                                       struct dma_fence **fence)
1262{
1263        struct amdgpu_ring *ring;
1264        void *owner = AMDGPU_FENCE_OWNER_VM;
1265        unsigned nptes, ncmds, ndw;
1266        struct amdgpu_job *job;
1267        struct amdgpu_pte_update_params params;
1268        struct dma_fence *f = NULL;
1269        int r;
1270
1271        memset(&params, 0, sizeof(params));
1272        params.adev = adev;
1273        params.vm = vm;
1274
1275        /* sync to everything on unmapping */
1276        if (!(flags & AMDGPU_PTE_VALID))
1277                owner = AMDGPU_FENCE_OWNER_UNDEFINED;
1278
1279        if (vm->use_cpu_for_update) {
1280                /* params.src is used as flag to indicate system Memory */
1281                if (pages_addr)
1282                        params.src = ~0;
1283
1284                /* Wait for PT BOs to be free. PTs share the same resv. object
1285                 * as the root PD BO
1286                 */
1287                r = amdgpu_vm_wait_pd(adev, vm, owner);
1288                if (unlikely(r))
1289                        return r;
1290
1291                params.func = amdgpu_vm_cpu_set_ptes;
1292                params.pages_addr = pages_addr;
1293                return amdgpu_vm_frag_ptes(&params, start, last + 1,
1294                                           addr, flags);
1295        }
1296
1297        ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
1298
1299        nptes = last - start + 1;
1300
1301        /*
1302         * reserve space for two commands every (1 << BLOCK_SIZE)
1303         *  entries or 2k dwords (whatever is smaller)
1304         *
1305         * The second command is for the shadow pagetables.
1306         */
1307        if (vm->root.base.bo->shadow)
1308                ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2;
1309        else
1310                ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1);
1311
1312        /* padding, etc. */
1313        ndw = 64;
1314
1315        if (pages_addr) {
1316                /* copy commands needed */
1317                ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw;
1318
1319                /* and also PTEs */
1320                ndw += nptes * 2;
1321
1322                params.func = amdgpu_vm_do_copy_ptes;
1323
1324        } else {
1325                /* set page commands needed */
1326                ndw += ncmds * 10;
1327
1328                /* extra commands for begin/end fragments */
1329                ndw += 2 * 10 * adev->vm_manager.fragment_size;
1330
1331                params.func = amdgpu_vm_do_set_ptes;
1332        }
1333
1334        r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
1335        if (r)
1336                return r;
1337
1338        params.ib = &job->ibs[0];
1339
1340        if (pages_addr) {
1341                uint64_t *pte;
1342                unsigned i;
1343
1344                /* Put the PTEs at the end of the IB. */
1345                i = ndw - nptes * 2;
1346                pte= (uint64_t *)&(job->ibs->ptr[i]);
1347                params.src = job->ibs->gpu_addr + i * 4;
1348
1349                for (i = 0; i < nptes; ++i) {
1350                        pte[i] = amdgpu_vm_map_gart(pages_addr, addr + i *
1351                                                    AMDGPU_GPU_PAGE_SIZE);
1352                        pte[i] |= flags;
1353                }
1354                addr = 0;
1355        }
1356
1357        r = amdgpu_sync_fence(adev, &job->sync, exclusive, false);
1358        if (r)
1359                goto error_free;
1360
1361        r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv,
1362                             owner, false);
1363        if (r)
1364                goto error_free;
1365
1366        r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
1367        if (r)
1368                goto error_free;
1369
1370        r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
1371        if (r)
1372                goto error_free;
1373
1374        amdgpu_ring_pad_ib(ring, params.ib);
1375        WARN_ON(params.ib->length_dw > ndw);
1376        r = amdgpu_job_submit(job, ring, &vm->entity,
1377                              AMDGPU_FENCE_OWNER_VM, &f);
1378        if (r)
1379                goto error_free;
1380
1381        amdgpu_bo_fence(vm->root.base.bo, f, true);
1382        dma_fence_put(*fence);
1383        *fence = f;
1384        return 0;
1385
1386error_free:
1387        amdgpu_job_free(job);
1388        return r;
1389}
1390
1391/**
1392 * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks
1393 *
1394 * @adev: amdgpu_device pointer
1395 * @exclusive: fence we need to sync to
1396 * @pages_addr: DMA addresses to use for mapping
1397 * @vm: requested vm
1398 * @mapping: mapped range and flags to use for the update
1399 * @flags: HW flags for the mapping
1400 * @nodes: array of drm_mm_nodes with the MC addresses
1401 * @fence: optional resulting fence
1402 *
1403 * Split the mapping into smaller chunks so that each update fits
1404 * into a SDMA IB.
1405 * Returns 0 for success, -EINVAL for failure.
1406 */
1407static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
1408                                      struct dma_fence *exclusive,
1409                                      dma_addr_t *pages_addr,
1410                                      struct amdgpu_vm *vm,
1411                                      struct amdgpu_bo_va_mapping *mapping,
1412                                      uint64_t flags,
1413                                      struct drm_mm_node *nodes,
1414                                      struct dma_fence **fence)
1415{
1416        unsigned min_linear_pages = 1 << adev->vm_manager.fragment_size;
1417        uint64_t pfn, start = mapping->start;
1418        int r;
1419
1420        /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
1421         * but in case of something, we filter the flags in first place
1422         */
1423        if (!(mapping->flags & AMDGPU_PTE_READABLE))
1424                flags &= ~AMDGPU_PTE_READABLE;
1425        if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
1426                flags &= ~AMDGPU_PTE_WRITEABLE;
1427
1428        flags &= ~AMDGPU_PTE_EXECUTABLE;
1429        flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
1430
1431        flags &= ~AMDGPU_PTE_MTYPE_MASK;
1432        flags |= (mapping->flags & AMDGPU_PTE_MTYPE_MASK);
1433
1434        if ((mapping->flags & AMDGPU_PTE_PRT) &&
1435            (adev->asic_type >= CHIP_VEGA10)) {
1436                flags |= AMDGPU_PTE_PRT;
1437                flags &= ~AMDGPU_PTE_VALID;
1438        }
1439
1440        trace_amdgpu_vm_bo_update(mapping);
1441
1442        pfn = mapping->offset >> PAGE_SHIFT;
1443        if (nodes) {
1444                while (pfn >= nodes->size) {
1445                        pfn -= nodes->size;
1446                        ++nodes;
1447                }
1448        }
1449
1450        do {
1451                dma_addr_t *dma_addr = NULL;
1452                uint64_t max_entries;
1453                uint64_t addr, last;
1454
1455                if (nodes) {
1456                        addr = nodes->start << PAGE_SHIFT;
1457                        max_entries = (nodes->size - pfn) *
1458                                (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
1459                } else {
1460                        addr = 0;
1461                        max_entries = S64_MAX;
1462                }
1463
1464                if (pages_addr) {
1465                        uint64_t count;
1466
1467                        max_entries = min(max_entries, 16ull * 1024ull);
1468                        for (count = 1;
1469                             count < max_entries / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
1470                             ++count) {
1471                                uint64_t idx = pfn + count;
1472
1473                                if (pages_addr[idx] !=
1474                                    (pages_addr[idx - 1] + PAGE_SIZE))
1475                                        break;
1476                        }
1477
1478                        if (count < min_linear_pages) {
1479                                addr = pfn << PAGE_SHIFT;
1480                                dma_addr = pages_addr;
1481                        } else {
1482                                addr = pages_addr[pfn];
1483                                max_entries = count * (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
1484                        }
1485
1486                } else if (flags & AMDGPU_PTE_VALID) {
1487                        addr += adev->vm_manager.vram_base_offset;
1488                        addr += pfn << PAGE_SHIFT;
1489                }
1490
1491                last = min((uint64_t)mapping->last, start + max_entries - 1);
1492                r = amdgpu_vm_bo_update_mapping(adev, exclusive, dma_addr, vm,
1493                                                start, last, flags, addr,
1494                                                fence);
1495                if (r)
1496                        return r;
1497
1498                pfn += (last - start + 1) / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
1499                if (nodes && nodes->size == pfn) {
1500                        pfn = 0;
1501                        ++nodes;
1502                }
1503                start = last + 1;
1504
1505        } while (unlikely(start != mapping->last + 1));
1506
1507        return 0;
1508}
1509
1510/**
1511 * amdgpu_vm_bo_update - update all BO mappings in the vm page table
1512 *
1513 * @adev: amdgpu_device pointer
1514 * @bo_va: requested BO and VM object
1515 * @clear: if true clear the entries
1516 *
1517 * Fill in the page table entries for @bo_va.
1518 * Returns 0 for success, -EINVAL for failure.
1519 */
1520int amdgpu_vm_bo_update(struct amdgpu_device *adev,
1521                        struct amdgpu_bo_va *bo_va,
1522                        bool clear)
1523{
1524        struct amdgpu_bo *bo = bo_va->base.bo;
1525        struct amdgpu_vm *vm = bo_va->base.vm;
1526        struct amdgpu_bo_va_mapping *mapping;
1527        dma_addr_t *pages_addr = NULL;
1528        struct ttm_mem_reg *mem;
1529        struct drm_mm_node *nodes;
1530        struct dma_fence *exclusive, **last_update;
1531        uint64_t flags;
1532        int r;
1533
1534        if (clear || !bo_va->base.bo) {
1535                mem = NULL;
1536                nodes = NULL;
1537                exclusive = NULL;
1538        } else {
1539                struct ttm_dma_tt *ttm;
1540
1541                mem = &bo_va->base.bo->tbo.mem;
1542                nodes = mem->mm_node;
1543                if (mem->mem_type == TTM_PL_TT) {
1544                        ttm = container_of(bo_va->base.bo->tbo.ttm,
1545                                           struct ttm_dma_tt, ttm);
1546                        pages_addr = ttm->dma_address;
1547                }
1548                exclusive = reservation_object_get_excl(bo->tbo.resv);
1549        }
1550
1551        if (bo)
1552                flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
1553        else
1554                flags = 0x0;
1555
1556        if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv))
1557                last_update = &vm->last_update;
1558        else
1559                last_update = &bo_va->last_pt_update;
1560
1561        if (!clear && bo_va->base.moved) {
1562                bo_va->base.moved = false;
1563                list_splice_init(&bo_va->valids, &bo_va->invalids);
1564
1565        } else if (bo_va->cleared != clear) {
1566                list_splice_init(&bo_va->valids, &bo_va->invalids);
1567        }
1568
1569        list_for_each_entry(mapping, &bo_va->invalids, list) {
1570                r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
1571                                               mapping, flags, nodes,
1572                                               last_update);
1573                if (r)
1574                        return r;
1575        }
1576
1577        if (vm->use_cpu_for_update) {
1578                /* Flush HDP */
1579                mb();
1580                amdgpu_asic_flush_hdp(adev, NULL);
1581        }
1582
1583        spin_lock(&vm->moved_lock);
1584        list_del_init(&bo_va->base.vm_status);
1585        spin_unlock(&vm->moved_lock);
1586
1587        /* If the BO is not in its preferred location add it back to
1588         * the evicted list so that it gets validated again on the
1589         * next command submission.
1590         */
1591        if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
1592                uint32_t mem_type = bo->tbo.mem.mem_type;
1593
1594                if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type)))
1595                        list_add_tail(&bo_va->base.vm_status, &vm->evicted);
1596                else
1597                        list_add(&bo_va->base.vm_status, &vm->idle);
1598        }
1599
1600        list_splice_init(&bo_va->invalids, &bo_va->valids);
1601        bo_va->cleared = clear;
1602
1603        if (trace_amdgpu_vm_bo_mapping_enabled()) {
1604                list_for_each_entry(mapping, &bo_va->valids, list)
1605                        trace_amdgpu_vm_bo_mapping(mapping);
1606        }
1607
1608        return 0;
1609}
1610
1611/**
1612 * amdgpu_vm_update_prt_state - update the global PRT state
1613 */
1614static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
1615{
1616        unsigned long flags;
1617        bool enable;
1618
1619        spin_lock_irqsave(&adev->vm_manager.prt_lock, flags);
1620        enable = !!atomic_read(&adev->vm_manager.num_prt_users);
1621        adev->gmc.gmc_funcs->set_prt(adev, enable);
1622        spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags);
1623}
1624
1625/**
1626 * amdgpu_vm_prt_get - add a PRT user
1627 */
1628static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
1629{
1630        if (!adev->gmc.gmc_funcs->set_prt)
1631                return;
1632
1633        if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1)
1634                amdgpu_vm_update_prt_state(adev);
1635}
1636
1637/**
1638 * amdgpu_vm_prt_put - drop a PRT user
1639 */
1640static void amdgpu_vm_prt_put(struct amdgpu_device *adev)
1641{
1642        if (atomic_dec_return(&adev->vm_manager.num_prt_users) == 0)
1643                amdgpu_vm_update_prt_state(adev);
1644}
1645
1646/**
1647 * amdgpu_vm_prt_cb - callback for updating the PRT status
1648 */
1649static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb)
1650{
1651        struct amdgpu_prt_cb *cb = container_of(_cb, struct amdgpu_prt_cb, cb);
1652
1653        amdgpu_vm_prt_put(cb->adev);
1654        kfree(cb);
1655}
1656
1657/**
1658 * amdgpu_vm_add_prt_cb - add callback for updating the PRT status
1659 */
1660static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,
1661                                 struct dma_fence *fence)
1662{
1663        struct amdgpu_prt_cb *cb;
1664
1665        if (!adev->gmc.gmc_funcs->set_prt)
1666                return;
1667
1668        cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL);
1669        if (!cb) {
1670                /* Last resort when we are OOM */
1671                if (fence)
1672                        dma_fence_wait(fence, false);
1673
1674                amdgpu_vm_prt_put(adev);
1675        } else {
1676                cb->adev = adev;
1677                if (!fence || dma_fence_add_callback(fence, &cb->cb,
1678                                                     amdgpu_vm_prt_cb))
1679                        amdgpu_vm_prt_cb(fence, &cb->cb);
1680        }
1681}
1682
1683/**
1684 * amdgpu_vm_free_mapping - free a mapping
1685 *
1686 * @adev: amdgpu_device pointer
1687 * @vm: requested vm
1688 * @mapping: mapping to be freed
1689 * @fence: fence of the unmap operation
1690 *
1691 * Free a mapping and make sure we decrease the PRT usage count if applicable.
1692 */
1693static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
1694                                   struct amdgpu_vm *vm,
1695                                   struct amdgpu_bo_va_mapping *mapping,
1696                                   struct dma_fence *fence)
1697{
1698        if (mapping->flags & AMDGPU_PTE_PRT)
1699                amdgpu_vm_add_prt_cb(adev, fence);
1700        kfree(mapping);
1701}
1702
1703/**
1704 * amdgpu_vm_prt_fini - finish all prt mappings
1705 *
1706 * @adev: amdgpu_device pointer
1707 * @vm: requested vm
1708 *
1709 * Register a cleanup callback to disable PRT support after VM dies.
1710 */
1711static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1712{
1713        struct reservation_object *resv = vm->root.base.bo->tbo.resv;
1714        struct dma_fence *excl, **shared;
1715        unsigned i, shared_count;
1716        int r;
1717
1718        r = reservation_object_get_fences_rcu(resv, &excl,
1719                                              &shared_count, &shared);
1720        if (r) {
1721                /* Not enough memory to grab the fence list, as last resort
1722                 * block for all the fences to complete.
1723                 */
1724                reservation_object_wait_timeout_rcu(resv, true, false,
1725                                                    MAX_SCHEDULE_TIMEOUT);
1726                return;
1727        }
1728
1729        /* Add a callback for each fence in the reservation object */
1730        amdgpu_vm_prt_get(adev);
1731        amdgpu_vm_add_prt_cb(adev, excl);
1732
1733        for (i = 0; i < shared_count; ++i) {
1734                amdgpu_vm_prt_get(adev);
1735                amdgpu_vm_add_prt_cb(adev, shared[i]);
1736        }
1737
1738        kfree(shared);
1739}
1740
1741/**
1742 * amdgpu_vm_clear_freed - clear freed BOs in the PT
1743 *
1744 * @adev: amdgpu_device pointer
1745 * @vm: requested vm
1746 * @fence: optional resulting fence (unchanged if no work needed to be done
1747 * or if an error occurred)
1748 *
1749 * Make sure all freed BOs are cleared in the PT.
1750 * Returns 0 for success.
1751 *
1752 * PTs have to be reserved and mutex must be locked!
1753 */
1754int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
1755                          struct amdgpu_vm *vm,
1756                          struct dma_fence **fence)
1757{
1758        struct amdgpu_bo_va_mapping *mapping;
1759        uint64_t init_pte_value = 0;
1760        struct dma_fence *f = NULL;
1761        int r;
1762
1763        while (!list_empty(&vm->freed)) {
1764                mapping = list_first_entry(&vm->freed,
1765                        struct amdgpu_bo_va_mapping, list);
1766                list_del(&mapping->list);
1767
1768                if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START)
1769                        init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
1770
1771                r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
1772                                                mapping->start, mapping->last,
1773                                                init_pte_value, 0, &f);
1774                amdgpu_vm_free_mapping(adev, vm, mapping, f);
1775                if (r) {
1776                        dma_fence_put(f);
1777                        return r;
1778                }
1779        }
1780
1781        if (fence && f) {
1782                dma_fence_put(*fence);
1783                *fence = f;
1784        } else {
1785                dma_fence_put(f);
1786        }
1787
1788        return 0;
1789
1790}
1791
1792/**
1793 * amdgpu_vm_handle_moved - handle moved BOs in the PT
1794 *
1795 * @adev: amdgpu_device pointer
1796 * @vm: requested vm
1797 * @sync: sync object to add fences to
1798 *
1799 * Make sure all BOs which are moved are updated in the PTs.
1800 * Returns 0 for success.
1801 *
1802 * PTs have to be reserved!
1803 */
1804int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
1805                           struct amdgpu_vm *vm)
1806{
1807        struct amdgpu_bo_va *bo_va, *tmp;
1808        struct list_head moved;
1809        bool clear;
1810        int r;
1811
1812        INIT_LIST_HEAD(&moved);
1813        spin_lock(&vm->moved_lock);
1814        list_splice_init(&vm->moved, &moved);
1815        spin_unlock(&vm->moved_lock);
1816
1817        list_for_each_entry_safe(bo_va, tmp, &moved, base.vm_status) {
1818                struct reservation_object *resv = bo_va->base.bo->tbo.resv;
1819
1820                /* Per VM BOs never need to bo cleared in the page tables */
1821                if (resv == vm->root.base.bo->tbo.resv)
1822                        clear = false;
1823                /* Try to reserve the BO to avoid clearing its ptes */
1824                else if (!amdgpu_vm_debug && reservation_object_trylock(resv))
1825                        clear = false;
1826                /* Somebody else is using the BO right now */
1827                else
1828                        clear = true;
1829
1830                r = amdgpu_vm_bo_update(adev, bo_va, clear);
1831                if (r) {
1832                        spin_lock(&vm->moved_lock);
1833                        list_splice(&moved, &vm->moved);
1834                        spin_unlock(&vm->moved_lock);
1835                        return r;
1836                }
1837
1838                if (!clear && resv != vm->root.base.bo->tbo.resv)
1839                        reservation_object_unlock(resv);
1840
1841        }
1842
1843        return 0;
1844}
1845
1846/**
1847 * amdgpu_vm_bo_add - add a bo to a specific vm
1848 *
1849 * @adev: amdgpu_device pointer
1850 * @vm: requested vm
1851 * @bo: amdgpu buffer object
1852 *
1853 * Add @bo into the requested vm.
1854 * Add @bo to the list of bos associated with the vm
1855 * Returns newly added bo_va or NULL for failure
1856 *
1857 * Object has to be reserved!
1858 */
1859struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
1860                                      struct amdgpu_vm *vm,
1861                                      struct amdgpu_bo *bo)
1862{
1863        struct amdgpu_bo_va *bo_va;
1864
1865        bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL);
1866        if (bo_va == NULL) {
1867                return NULL;
1868        }
1869        amdgpu_vm_bo_base_init(&bo_va->base, vm, bo);
1870
1871        bo_va->ref_count = 1;
1872        INIT_LIST_HEAD(&bo_va->valids);
1873        INIT_LIST_HEAD(&bo_va->invalids);
1874
1875        return bo_va;
1876}
1877
1878
1879/**
1880 * amdgpu_vm_bo_insert_mapping - insert a new mapping
1881 *
1882 * @adev: amdgpu_device pointer
1883 * @bo_va: bo_va to store the address
1884 * @mapping: the mapping to insert
1885 *
1886 * Insert a new mapping into all structures.
1887 */
1888static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
1889                                    struct amdgpu_bo_va *bo_va,
1890                                    struct amdgpu_bo_va_mapping *mapping)
1891{
1892        struct amdgpu_vm *vm = bo_va->base.vm;
1893        struct amdgpu_bo *bo = bo_va->base.bo;
1894
1895        mapping->bo_va = bo_va;
1896        list_add(&mapping->list, &bo_va->invalids);
1897        amdgpu_vm_it_insert(mapping, &vm->va);
1898
1899        if (mapping->flags & AMDGPU_PTE_PRT)
1900                amdgpu_vm_prt_get(adev);
1901
1902        if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv &&
1903            !bo_va->base.moved) {
1904                spin_lock(&vm->moved_lock);
1905                list_move(&bo_va->base.vm_status, &vm->moved);
1906                spin_unlock(&vm->moved_lock);
1907        }
1908        trace_amdgpu_vm_bo_map(bo_va, mapping);
1909}
1910
1911/**
1912 * amdgpu_vm_bo_map - map bo inside a vm
1913 *
1914 * @adev: amdgpu_device pointer
1915 * @bo_va: bo_va to store the address
1916 * @saddr: where to map the BO
1917 * @offset: requested offset in the BO
1918 * @flags: attributes of pages (read/write/valid/etc.)
1919 *
1920 * Add a mapping of the BO at the specefied addr into the VM.
1921 * Returns 0 for success, error for failure.
1922 *
1923 * Object has to be reserved and unreserved outside!
1924 */
1925int amdgpu_vm_bo_map(struct amdgpu_device *adev,
1926                     struct amdgpu_bo_va *bo_va,
1927                     uint64_t saddr, uint64_t offset,
1928                     uint64_t size, uint64_t flags)
1929{
1930        struct amdgpu_bo_va_mapping *mapping, *tmp;
1931        struct amdgpu_bo *bo = bo_va->base.bo;
1932        struct amdgpu_vm *vm = bo_va->base.vm;
1933        uint64_t eaddr;
1934
1935        /* validate the parameters */
1936        if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
1937            size == 0 || size & AMDGPU_GPU_PAGE_MASK)
1938                return -EINVAL;
1939
1940        /* make sure object fit at this offset */
1941        eaddr = saddr + size - 1;
1942        if (saddr >= eaddr ||
1943            (bo && offset + size > amdgpu_bo_size(bo)))
1944                return -EINVAL;
1945
1946        saddr /= AMDGPU_GPU_PAGE_SIZE;
1947        eaddr /= AMDGPU_GPU_PAGE_SIZE;
1948
1949        tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
1950        if (tmp) {
1951                /* bo and tmp overlap, invalid addr */
1952                dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with "
1953                        "0x%010Lx-0x%010Lx\n", bo, saddr, eaddr,
1954                        tmp->start, tmp->last + 1);
1955                return -EINVAL;
1956        }
1957
1958        mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
1959        if (!mapping)
1960                return -ENOMEM;
1961
1962        mapping->start = saddr;
1963        mapping->last = eaddr;
1964        mapping->offset = offset;
1965        mapping->flags = flags;
1966
1967        amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
1968
1969        return 0;
1970}
1971
1972/**
1973 * amdgpu_vm_bo_replace_map - map bo inside a vm, replacing existing mappings
1974 *
1975 * @adev: amdgpu_device pointer
1976 * @bo_va: bo_va to store the address
1977 * @saddr: where to map the BO
1978 * @offset: requested offset in the BO
1979 * @flags: attributes of pages (read/write/valid/etc.)
1980 *
1981 * Add a mapping of the BO at the specefied addr into the VM. Replace existing
1982 * mappings as we do so.
1983 * Returns 0 for success, error for failure.
1984 *
1985 * Object has to be reserved and unreserved outside!
1986 */
1987int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
1988                             struct amdgpu_bo_va *bo_va,
1989                             uint64_t saddr, uint64_t offset,
1990                             uint64_t size, uint64_t flags)
1991{
1992        struct amdgpu_bo_va_mapping *mapping;
1993        struct amdgpu_bo *bo = bo_va->base.bo;
1994        uint64_t eaddr;
1995        int r;
1996
1997        /* validate the parameters */
1998        if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
1999            size == 0 || size & AMDGPU_GPU_PAGE_MASK)
2000                return -EINVAL;

2001
2002        /* make sure object fit at this offset */
2003        eaddr = saddr + size - 1;
2004        if (saddr >= eaddr ||
2005            (bo && offset + size > amdgpu_bo_size(bo)))
2006                return -EINVAL;
2007
2008        /* Allocate all the needed memory */
2009        mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
2010        if (!mapping)
2011                return -ENOMEM;
2012
2013        r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size);
2014        if (r) {
2015                kfree(mapping);
2016                return r;
2017        }
2018
2019        saddr /= AMDGPU_GPU_PAGE_SIZE;
2020        eaddr /= AMDGPU_GPU_PAGE_SIZE;
2021
2022        mapping->start = saddr;
2023        mapping->last = eaddr;
2024        mapping->offset = offset;
2025        mapping->flags = flags;
2026
2027        amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
2028
2029        return 0;
2030}
2031
2032/**
2033 * amdgpu_vm_bo_unmap - remove bo mapping from vm
2034 *
2035 * @adev: amdgpu_device pointer
2036 * @bo_va: bo_va to remove the address from
2037 * @saddr: where to the BO is mapped
2038 *
2039 * Remove a mapping of the BO at the specefied addr from the VM.
2040 * Returns 0 for success, error for failure.
2041 *
2042 * Object has to be reserved and unreserved outside!
2043 */
2044int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
2045                       struct amdgpu_bo_va *bo_va,
2046                       uint64_t saddr)
2047{
2048        struct amdgpu_bo_va_mapping *mapping;
2049        struct amdgpu_vm *vm = bo_va->base.vm;
2050        bool valid = true;
2051
2052        saddr /= AMDGPU_GPU_PAGE_SIZE;
2053
2054        list_for_each_entry(mapping, &bo_va->valids, list) {
2055                if (mapping->start == saddr)
2056                        break;
2057        }
2058
2059        if (&mapping->list == &bo_va->valids) {
2060                valid = false;
2061
2062                list_for_each_entry(mapping, &bo_va->invalids, list) {
2063                        if (mapping->start == saddr)
2064                                break;
2065                }
2066
2067                if (&mapping->list == &bo_va->invalids)
2068                        return -ENOENT;
2069        }
2070
2071        list_del(&mapping->list);
2072        amdgpu_vm_it_remove(mapping, &vm->va);
2073        mapping->bo_va = NULL;
2074        trace_amdgpu_vm_bo_unmap(bo_va, mapping);
2075
2076        if (valid)
2077                list_add(&mapping->list, &vm->freed);
2078        else
2079                amdgpu_vm_free_mapping(adev, vm, mapping,
2080                                       bo_va->last_pt_update);
2081
2082        return 0;
2083}
2084
2085/**
2086 * amdgpu_vm_bo_clear_mappings - remove all mappings in a specific range
2087 *
2088 * @adev: amdgpu_device pointer
2089 * @vm: VM structure to use
2090 * @saddr: start of the range
2091 * @size: size of the range
2092 *
2093 * Remove all mappings in a range, split them as appropriate.
2094 * Returns 0 for success, error for failure.
2095 */
2096int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
2097                                struct amdgpu_vm *vm,
2098                                uint64_t saddr, uint64_t size)
2099{
2100        struct amdgpu_bo_va_mapping *before, *after, *tmp, *next;
2101        LIST_HEAD(removed);
2102        uint64_t eaddr;
2103
2104        eaddr = saddr + size - 1;
2105        saddr /= AMDGPU_GPU_PAGE_SIZE;
2106        eaddr /= AMDGPU_GPU_PAGE_SIZE;
2107
2108        /* Allocate all the needed memory */
2109        before = kzalloc(sizeof(*before), GFP_KERNEL);
2110        if (!before)
2111                return -ENOMEM;
2112        INIT_LIST_HEAD(&before->list);
2113
2114        after = kzalloc(sizeof(*after), GFP_KERNEL);
2115        if (!after) {
2116                kfree(before);
2117                return -ENOMEM;
2118        }
2119        INIT_LIST_HEAD(&after->list);
2120
2121        /* Now gather all removed mappings */
2122        tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
2123        while (tmp) {
2124                /* Remember mapping split at the start */
2125                if (tmp->start < saddr) {
2126                        before->start = tmp->start;
2127                        before->last = saddr - 1;
2128                        before->offset = tmp->offset;
2129                        before->flags = tmp->flags;
2130                        before->bo_va = tmp->bo_va;
2131                        list_add(&before->list, &tmp->bo_va->invalids);
2132                }
2133
2134                /* Remember mapping split at the end */
2135                if (tmp->last > eaddr) {
2136                        after->start = eaddr + 1;
2137                        after->last = tmp->last;
2138                        after->offset = tmp->offset;
2139                        after->offset += after->start - tmp->start;
2140                        after->flags = tmp->flags;
2141                        after->bo_va = tmp->bo_va;
2142                        list_add(&after->list, &tmp->bo_va->invalids);
2143                }
2144
2145                list_del(&tmp->list);
2146                list_add(&tmp->list, &removed);
2147
2148                tmp = amdgpu_vm_it_iter_next(tmp, saddr, eaddr);
2149        }
2150
2151        /* And free them up */
2152        list_for_each_entry_safe(tmp, next, &removed, list) {
2153                amdgpu_vm_it_remove(tmp, &vm->va);
2154                list_del(&tmp->list);
2155
2156                if (tmp->start < saddr)
2157                    tmp->start = saddr;
2158                if (tmp->last > eaddr)
2159                    tmp->last = eaddr;
2160
2161                tmp->bo_va = NULL;
2162                list_add(&tmp->list, &vm->freed);
2163                trace_amdgpu_vm_bo_unmap(NULL, tmp);
2164        }
2165
2166        /* Insert partial mapping before the range */
2167        if (!list_empty(&before->list)) {
2168                amdgpu_vm_it_insert(before, &vm->va);
2169                if (before->flags & AMDGPU_PTE_PRT)
2170                        amdgpu_vm_prt_get(adev);
2171        } else {
2172                kfree(before);
2173        }
2174
2175        /* Insert partial mapping after the range */
2176        if (!list_empty(&after->list)) {
2177                amdgpu_vm_it_insert(after, &vm->va);
2178                if (after->flags & AMDGPU_PTE_PRT)
2179                        amdgpu_vm_prt_get(adev);
2180        } else {
2181                kfree(after);
2182        }
2183
2184        return 0;
2185}
2186
2187/**
2188 * amdgpu_vm_bo_lookup_mapping - find mapping by address
2189 *
2190 * @vm: the requested VM
2191 *
2192 * Find a mapping by it's address.
2193 */
2194struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
2195                                                         uint64_t addr)
2196{
2197        return amdgpu_vm_it_iter_first(&vm->va, addr, addr);
2198}
2199
2200/**
2201 * amdgpu_vm_bo_rmv - remove a bo to a specific vm
2202 *
2203 * @adev: amdgpu_device pointer
2204 * @bo_va: requested bo_va
2205 *
2206 * Remove @bo_va->bo from the requested vm.
2207 *
2208 * Object have to be reserved!
2209 */
2210void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
2211                      struct amdgpu_bo_va *bo_va)
2212{
2213        struct amdgpu_bo_va_mapping *mapping, *next;
2214        struct amdgpu_vm *vm = bo_va->base.vm;
2215
2216        list_del(&bo_va->base.bo_list);
2217
2218        spin_lock(&vm->moved_lock);
2219        list_del(&bo_va->base.vm_status);
2220        spin_unlock(&vm->moved_lock);
2221
2222        list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
2223                list_del(&mapping->list);
2224                amdgpu_vm_it_remove(mapping, &vm->va);
2225                mapping->bo_va = NULL;
2226                trace_amdgpu_vm_bo_unmap(bo_va, mapping);
2227                list_add(&mapping->list, &vm->freed);
2228        }
2229        list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
2230                list_del(&mapping->list);
2231                amdgpu_vm_it_remove(mapping, &vm->va);
2232                amdgpu_vm_free_mapping(adev, vm, mapping,
2233                                       bo_va->last_pt_update);
2234        }
2235
2236        dma_fence_put(bo_va->last_pt_update);
2237        kfree(bo_va);
2238}
2239
2240/**
2241 * amdgpu_vm_bo_invalidate - mark the bo as invalid
2242 *
2243 * @adev: amdgpu_device pointer
2244 * @vm: requested vm
2245 * @bo: amdgpu buffer object
2246 *
2247 * Mark @bo as invalid.
2248 */
2249void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
2250                             struct amdgpu_bo *bo, bool evicted)
2251{
2252        struct amdgpu_vm_bo_base *bo_base;
2253
2254        /* shadow bo doesn't have bo base, its validation needs its parent */
2255        if (bo->parent && bo->parent->shadow == bo)
2256                bo = bo->parent;
2257
2258        list_for_each_entry(bo_base, &bo->va, bo_list) {
2259                struct amdgpu_vm *vm = bo_base->vm;
2260                bool was_moved = bo_base->moved;
2261
2262                bo_base->moved = true;
2263                if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
2264                        if (bo->tbo.type == ttm_bo_type_kernel)
2265                                list_move(&bo_base->vm_status, &vm->evicted);
2266                        else
2267                                list_move_tail(&bo_base->vm_status,
2268                                               &vm->evicted);
2269                        continue;
2270                }
2271
2272                if (was_moved)
2273                        continue;
2274
2275                if (bo->tbo.type == ttm_bo_type_kernel) {
2276                        list_move(&bo_base->vm_status, &vm->relocated);
2277                } else {
2278                        spin_lock(&bo_base->vm->moved_lock);
2279                        list_move(&bo_base->vm_status, &vm->moved);
2280                        spin_unlock(&bo_base->vm->moved_lock);
2281                }
2282        }
2283}
2284
2285static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
2286{
2287        /* Total bits covered by PD + PTs */
2288        unsigned bits = ilog2(vm_size) + 18;
2289
2290        /* Make sure the PD is 4K in size up to 8GB address space.
2291           Above that split equal between PD and PTs */
2292        if (vm_size <= 8)
2293                return (bits - 9);
2294        else
2295                return ((bits + 3) / 2);
2296}
2297
2298/**
2299 * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size
2300 *
2301 * @adev: amdgpu_device pointer
2302 * @vm_size: the default vm size if it's set auto
2303 */
2304void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
2305                           uint32_t fragment_size_default, unsigned max_level,
2306                           unsigned max_bits)
2307{
2308        uint64_t tmp;
2309
2310        /* adjust vm size first */
2311        if (amdgpu_vm_size != -1) {
2312                unsigned max_size = 1 << (max_bits - 30);
2313
2314                vm_size = amdgpu_vm_size;
2315                if (vm_size > max_size) {
2316                        dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n",
2317                                 amdgpu_vm_size, max_size);
2318                        vm_size = max_size;
2319                }
2320        }
2321
2322        adev->vm_manager.max_pfn = (uint64_t)vm_size << 18;
2323
2324        tmp = roundup_pow_of_two(adev->vm_manager.max_pfn);
2325        if (amdgpu_vm_block_size != -1)
2326                tmp >>= amdgpu_vm_block_size - 9;
2327        tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1;
2328        adev->vm_manager.num_level = min(max_level, (unsigned)tmp);
2329        switch (adev->vm_manager.num_level) {
2330        case 3:
2331                adev->vm_manager.root_level = AMDGPU_VM_PDB2;
2332                break;
2333        case 2:
2334                adev->vm_manager.root_level = AMDGPU_VM_PDB1;
2335                break;
2336        case 1:
2337                adev->vm_manager.root_level = AMDGPU_VM_PDB0;
2338                break;
2339        default:
2340                dev_err(adev->dev, "VMPT only supports 2~4+1 levels\n");
2341        }
2342        /* block size depends on vm size and hw setup*/
2343        if (amdgpu_vm_block_size != -1)
2344                adev->vm_manager.block_size =
2345                        min((unsigned)amdgpu_vm_block_size, max_bits
2346                            - AMDGPU_GPU_PAGE_SHIFT
2347                            - 9 * adev->vm_manager.num_level);
2348        else if (adev->vm_manager.num_level > 1)
2349                adev->vm_manager.block_size = 9;
2350        else
2351                adev->vm_manager.block_size = amdgpu_vm_get_block_size(tmp);
2352
2353        if (amdgpu_vm_fragment_size == -1)
2354                adev->vm_manager.fragment_size = fragment_size_default;
2355        else
2356                adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
2357
2358        DRM_INFO("vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n",
2359                 vm_size, adev->vm_manager.num_level + 1,
2360                 adev->vm_manager.block_size,
2361                 adev->vm_manager.fragment_size);
2362}
2363
2364/**
2365 * amdgpu_vm_init - initialize a vm instance
2366 *
2367 * @adev: amdgpu_device pointer
2368 * @vm: requested vm
2369 * @vm_context: Indicates if it GFX or Compute context
2370 *
2371 * Init @vm fields.
2372 */
2373int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2374                   int vm_context, unsigned int pasid)
2375{
2376        struct amdgpu_bo_param bp;
2377        struct amdgpu_bo *root;
2378        const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
2379                AMDGPU_VM_PTE_COUNT(adev) * 8);
2380        unsigned ring_instance;
2381        struct amdgpu_ring *ring;
2382        struct drm_sched_rq *rq;
2383        unsigned long size;
2384        uint64_t flags;
2385        int r, i;
2386
2387        vm->va = RB_ROOT_CACHED;
2388        for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
2389                vm->reserved_vmid[i] = NULL;
2390        INIT_LIST_HEAD(&vm->evicted);
2391        INIT_LIST_HEAD(&vm->relocated);
2392        spin_lock_init(&vm->moved_lock);
2393        INIT_LIST_HEAD(&vm->moved);
2394        INIT_LIST_HEAD(&vm->idle);
2395        INIT_LIST_HEAD(&vm->freed);
2396
2397        /* create scheduler entity for page table updates */
2398
2399        ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring);
2400        ring_instance %= adev->vm_manager.vm_pte_num_rings;
2401        ring = adev->vm_manager.vm_pte_rings[ring_instance];
2402        rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
2403        r = drm_sched_entity_init(&ring->sched, &vm->entity,
2404                                  rq, NULL);
2405        if (r)
2406                return r;
2407
2408        vm->pte_support_ats = false;
2409
2410        if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
2411                vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2412                                                AMDGPU_VM_USE_CPU_FOR_COMPUTE);
2413
2414                if (adev->asic_type == CHIP_RAVEN)
2415                        vm->pte_support_ats = true;
2416        } else {
2417                vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2418                                                AMDGPU_VM_USE_CPU_FOR_GFX);
2419        }
2420        DRM_DEBUG_DRIVER("VM update mode is %s\n",
2421                         vm->use_cpu_for_update ? "CPU" : "SDMA");
2422        WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
2423                  "CPU update of VM recommended only for large BAR system\n");
2424        vm->last_update = NULL;
2425
2426        flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
2427        if (vm->use_cpu_for_update)
2428                flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
2429        else
2430                flags |= AMDGPU_GEM_CREATE_SHADOW;
2431
2432        size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
2433        memset(&bp, 0, sizeof(bp));
2434        bp.size = size;
2435        bp.byte_align = align;
2436        bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
2437        bp.flags = flags;
2438        bp.type = ttm_bo_type_kernel;
2439        bp.resv = NULL;
2440        r = amdgpu_bo_create(adev, &bp, &root);
2441        if (r)
2442                goto error_free_sched_entity;
2443
2444        r = amdgpu_bo_reserve(root, true);
2445        if (r)
2446                goto error_free_root;
2447
2448        r = amdgpu_vm_clear_bo(adev, vm, root,
2449                               adev->vm_manager.root_level,
2450                               vm->pte_support_ats);
2451        if (r)
2452                goto error_unreserve;
2453
2454        amdgpu_vm_bo_base_init(&vm->root.base, vm, root);
2455        amdgpu_bo_unreserve(vm->root.base.bo);
2456
2457        if (pasid) {
2458                unsigned long flags;
2459
2460                spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
2461                r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1,
2462                              GFP_ATOMIC);
2463                spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
2464                if (r < 0)
2465                        goto error_free_root;
2466
2467                vm->pasid = pasid;
2468        }
2469
2470        INIT_KFIFO(vm->faults);
2471        vm->fault_credit = 16;
2472
2473        return 0;
2474
2475error_unreserve:
2476        amdgpu_bo_unreserve(vm->root.base.bo);
2477
2478error_free_root:
2479        amdgpu_bo_unref(&vm->root.base.bo->shadow);
2480        amdgpu_bo_unref(&vm->root.base.bo);
2481        vm->root.base.bo = NULL;
2482
2483error_free_sched_entity:
2484        drm_sched_entity_fini(&ring->sched, &vm->entity);
2485
2486        return r;
2487}
2488
2489/**
2490 * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
2491 *
2492 * This only works on GFX VMs that don't have any BOs added and no
2493 * page tables allocated yet.
2494 *
2495 * Changes the following VM parameters:
2496 * - use_cpu_for_update
2497 * - pte_supports_ats
2498 * - pasid (old PASID is released, because compute manages its own PASIDs)
2499 *
2500 * Reinitializes the page directory to reflect the changed ATS
2501 * setting. May leave behind an unused shadow BO for the page
2502 * directory when switching from SDMA updates to CPU updates.
2503 *
2504 * Returns 0 for success, -errno for errors.
2505 */
2506int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2507{
2508        bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
2509        int r;
2510
2511        r = amdgpu_bo_reserve(vm->root.base.bo, true);
2512        if (r)
2513                return r;
2514
2515        /* Sanity checks */
2516        if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) {
2517                r = -EINVAL;
2518                goto error;
2519        }
2520
2521        /* Check if PD needs to be reinitialized and do it before
2522         * changing any other state, in case it fails.
2523         */
2524        if (pte_support_ats != vm->pte_support_ats) {
2525                r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
2526                               adev->vm_manager.root_level,
2527                               pte_support_ats);
2528                if (r)
2529                        goto error;
2530        }
2531
2532        /* Update VM state */
2533        vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2534                                    AMDGPU_VM_USE_CPU_FOR_COMPUTE);
2535        vm->pte_support_ats = pte_support_ats;
2536        DRM_DEBUG_DRIVER("VM update mode is %s\n",
2537                         vm->use_cpu_for_update ? "CPU" : "SDMA");
2538        WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
2539                  "CPU update of VM recommended only for large BAR system\n");
2540
2541        if (vm->pasid) {
2542                unsigned long flags;
2543
2544                spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
2545                idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
2546                spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
2547
2548                vm->pasid = 0;
2549        }
2550
2551error:
2552        amdgpu_bo_unreserve(vm->root.base.bo);
2553        return r;
2554}
2555
2556/**
2557 * amdgpu_vm_free_levels - free PD/PT levels
2558 *
2559 * @adev: amdgpu device structure
2560 * @parent: PD/PT starting level to free
2561 * @level: level of parent structure
2562 *
2563 * Free the page directory or page table level and all sub levels.
2564 */
2565static void amdgpu_vm_free_levels(struct amdgpu_device *adev,
2566                                  struct amdgpu_vm_pt *parent,
2567                                  unsigned level)
2568{
2569        unsigned i, num_entries = amdgpu_vm_num_entries(adev, level);
2570
2571        if (parent->base.bo) {
2572                list_del(&parent->base.bo_list);
2573                list_del(&parent->base.vm_status);
2574                amdgpu_bo_unref(&parent->base.bo->shadow);
2575                amdgpu_bo_unref(&parent->base.bo);
2576        }
2577
2578        if (parent->entries)
2579                for (i = 0; i < num_entries; i++)
2580                        amdgpu_vm_free_levels(adev, &parent->entries[i],
2581                                              level + 1);
2582
2583        kvfree(parent->entries);
2584}
2585
2586/**
2587 * amdgpu_vm_fini - tear down a vm instance
2588 *
2589 * @adev: amdgpu_device pointer
2590 * @vm: requested vm
2591 *
2592 * Tear down @vm.
2593 * Unbind the VM and remove all bos from the vm bo list
2594 */
2595void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2596{
2597        struct amdgpu_bo_va_mapping *mapping, *tmp;
2598        bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
2599        struct amdgpu_bo *root;
2600        u64 fault;
2601        int i, r;
2602
2603        amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
2604
2605        /* Clear pending page faults from IH when the VM is destroyed */
2606        while (kfifo_get(&vm->faults, &fault))
2607                amdgpu_ih_clear_fault(adev, fault);
2608
2609        if (vm->pasid) {
2610                unsigned long flags;
2611
2612                spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
2613                idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
2614                spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
2615        }
2616
2617        drm_sched_entity_fini(vm->entity.sched, &vm->entity);
2618
2619        if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
2620                dev_err(adev->dev, "still active bo inside vm\n");
2621        }
2622        rbtree_postorder_for_each_entry_safe(mapping, tmp,
2623                                             &vm->va.rb_root, rb) {
2624                list_del(&mapping->list);
2625                amdgpu_vm_it_remove(mapping, &vm->va);
2626                kfree(mapping);
2627        }
2628        list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
2629                if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
2630                        amdgpu_vm_prt_fini(adev, vm);
2631                        prt_fini_needed = false;
2632                }
2633
2634                list_del(&mapping->list);
2635                amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
2636        }
2637
2638        root = amdgpu_bo_ref(vm->root.base.bo);
2639        r = amdgpu_bo_reserve(root, true);
2640        if (r) {
2641                dev_err(adev->dev, "Leaking page tables because BO reservation failed\n");
2642        } else {
2643                amdgpu_vm_free_levels(adev, &vm->root,
2644                                      adev->vm_manager.root_level);
2645                amdgpu_bo_unreserve(root);
2646        }
2647        amdgpu_bo_unref(&root);
2648        dma_fence_put(vm->last_update);
2649        for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
2650                amdgpu_vmid_free_reserved(adev, vm, i);
2651}
2652
2653/**
2654 * amdgpu_vm_pasid_fault_credit - Check fault credit for given PASID
2655 *
2656 * @adev: amdgpu_device pointer
2657 * @pasid: PASID do identify the VM
2658 *
2659 * This function is expected to be called in interrupt context. Returns
2660 * true if there was fault credit, false otherwise
2661 */
2662bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
2663                                  unsigned int pasid)
2664{
2665        struct amdgpu_vm *vm;
2666
2667        spin_lock(&adev->vm_manager.pasid_lock);
2668        vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
2669        if (!vm) {
2670                /* VM not found, can't track fault credit */
2671                spin_unlock(&adev->vm_manager.pasid_lock);
2672                return true;
2673        }
2674
2675        /* No lock needed. only accessed by IRQ handler */
2676        if (!vm->fault_credit) {
2677                /* Too many faults in this VM */
2678                spin_unlock(&adev->vm_manager.pasid_lock);
2679                return false;
2680        }
2681
2682        vm->fault_credit--;
2683        spin_unlock(&adev->vm_manager.pasid_lock);
2684        return true;
2685}
2686
2687/**
2688 * amdgpu_vm_manager_init - init the VM manager
2689 *
2690 * @adev: amdgpu_device pointer
2691 *
2692 * Initialize the VM manager structures
2693 */
2694void amdgpu_vm_manager_init(struct amdgpu_device *adev)
2695{
2696        unsigned i;
2697
2698        amdgpu_vmid_mgr_init(adev);
2699
2700        adev->vm_manager.fence_context =
2701                dma_fence_context_alloc(AMDGPU_MAX_RINGS);
2702        for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
2703                adev->vm_manager.seqno[i] = 0;
2704
2705        atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
2706        spin_lock_init(&adev->vm_manager.prt_lock);
2707        atomic_set(&adev->vm_manager.num_prt_users, 0);
2708
2709        /* If not overridden by the user, by default, only in large BAR systems
2710         * Compute VM tables will be updated by CPU
2711         */
2712#ifdef CONFIG_X86_64
2713        if (amdgpu_vm_update_mode == -1) {
2714                if (amdgpu_vm_is_large_bar(adev))
2715                        adev->vm_manager.vm_update_mode =
2716                                AMDGPU_VM_USE_CPU_FOR_COMPUTE;
2717                else
2718                        adev->vm_manager.vm_update_mode = 0;
2719        } else
2720                adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode;
2721#else
2722        adev->vm_manager.vm_update_mode = 0;
2723#endif
2724
2725        idr_init(&adev->vm_manager.pasid_idr);
2726        spin_lock_init(&adev->vm_manager.pasid_lock);
2727}
2728
2729/**
2730 * amdgpu_vm_manager_fini - cleanup VM manager
2731 *
2732 * @adev: amdgpu_device pointer
2733 *
2734 * Cleanup the VM manager and free resources.
2735 */
2736void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
2737{
2738        WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr));
2739        idr_destroy(&adev->vm_manager.pasid_idr);
2740
2741        amdgpu_vmid_mgr_fini(adev);
2742}
2743
2744int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
2745{
2746        union drm_amdgpu_vm *args = data;
2747        struct amdgpu_device *adev = dev->dev_private;
2748        struct amdgpu_fpriv *fpriv = filp->driver_priv;
2749        int r;
2750
2751        switch (args->in.op) {
2752        case AMDGPU_VM_OP_RESERVE_VMID:
2753                /* current, we only have requirement to reserve vmid from gfxhub */
2754                r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
2755                if (r)
2756                        return r;
2757                break;
2758        case AMDGPU_VM_OP_UNRESERVE_VMID:
2759                amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
2760                break;
2761        default:
2762                return -EINVAL;
2763        }
2764
2765        return 0;
2766}
2767