LXR linux/drivers/gpu/drm/amd/amdgpu/amdgpu_vm

   1/*
   2 * Copyright 2019 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22
  23#include "amdgpu_vm.h"
  24#include "amdgpu_job.h"
  25#include "amdgpu_object.h"
  26#include "amdgpu_trace.h"
  27
  28#define AMDGPU_VM_SDMA_MIN_NUM_DW       256u
  29#define AMDGPU_VM_SDMA_MAX_NUM_DW       (16u * 1024u)
  30
  31/**
  32 * amdgpu_vm_sdma_map_table - make sure new PDs/PTs are GTT mapped
  33 *
  34 * @table: newly allocated or validated PD/PT
  35 */
  36static int amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm *table)
  37{
  38        int r;
  39
  40        r = amdgpu_ttm_alloc_gart(&table->bo.tbo);
  41        if (r)
  42                return r;
  43
  44        if (table->shadow)
  45                r = amdgpu_ttm_alloc_gart(&table->shadow->tbo);
  46
  47        return r;
  48}
  49
  50/**
  51 * amdgpu_vm_sdma_prepare - prepare SDMA command submission
  52 *
  53 * @p: see amdgpu_vm_update_params definition
  54 * @resv: reservation object with embedded fence
  55 * @sync_mode: synchronization mode
  56 *
  57 * Returns:
  58 * Negativ errno, 0 for success.
  59 */
  60static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
  61                                  struct dma_resv *resv,
  62                                  enum amdgpu_sync_mode sync_mode)
  63{
  64        enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE
  65                : AMDGPU_IB_POOL_DELAYED;
  66        unsigned int ndw = AMDGPU_VM_SDMA_MIN_NUM_DW;
  67        int r;
  68
  69        r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool, &p->job);
  70        if (r)
  71                return r;
  72
  73        p->num_dw_left = ndw;
  74
  75        if (!resv)
  76                return 0;
  77
  78        return amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode, p->vm);
  79}
  80
  81/**
  82 * amdgpu_vm_sdma_commit - commit SDMA command submission
  83 *
  84 * @p: see amdgpu_vm_update_params definition
  85 * @fence: resulting fence
  86 *
  87 * Returns:
  88 * Negativ errno, 0 for success.
  89 */
  90static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
  91                                 struct dma_fence **fence)
  92{
  93        struct amdgpu_ib *ib = p->job->ibs;
  94        struct drm_sched_entity *entity;
  95        struct amdgpu_ring *ring;
  96        struct dma_fence *f;
  97        int r;
  98
  99        entity = p->immediate ? &p->vm->immediate : &p->vm->delayed;
 100        ring = container_of(entity->rq->sched, struct amdgpu_ring, sched);
 101
 102        WARN_ON(ib->length_dw == 0);
 103        amdgpu_ring_pad_ib(ring, ib);
 104        WARN_ON(ib->length_dw > p->num_dw_left);
 105        r = amdgpu_job_submit(p->job, entity, AMDGPU_FENCE_OWNER_VM, &f);
 106        if (r)
 107                goto error;
 108
 109        if (p->unlocked) {
 110                struct dma_fence *tmp = dma_fence_get(f);
 111
 112                swap(p->vm->last_unlocked, tmp);
 113                dma_fence_put(tmp);
 114        } else {
 115                amdgpu_bo_fence(p->vm->root.bo, f, true);
 116        }
 117
 118        if (fence && !p->immediate)
 119                swap(*fence, f);
 120        dma_fence_put(f);
 121        return 0;
 122
 123error:
 124        amdgpu_job_free(p->job);
 125        return r;
 126}
 127
 128/**
 129 * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping
 130 *
 131 * @p: see amdgpu_vm_update_params definition
 132 * @bo: PD/PT to update
 133 * @pe: addr of the page entry
 134 * @count: number of page entries to copy
 135 *
 136 * Traces the parameters and calls the DMA function to copy the PTEs.
 137 */
 138static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p,
 139                                     struct amdgpu_bo *bo, uint64_t pe,
 140                                     unsigned count)
 141{
 142        struct amdgpu_ib *ib = p->job->ibs;
 143        uint64_t src = ib->gpu_addr;
 144
 145        src += p->num_dw_left * 4;
 146
 147        pe += amdgpu_gmc_sign_extend(amdgpu_bo_gpu_offset_no_check(bo));
 148        trace_amdgpu_vm_copy_ptes(pe, src, count, p->immediate);
 149
 150        amdgpu_vm_copy_pte(p->adev, ib, pe, src, count);
 151}
 152
 153/**
 154 * amdgpu_vm_sdma_set_ptes - helper to call the right asic function
 155 *
 156 * @p: see amdgpu_vm_update_params definition
 157 * @bo: PD/PT to update
 158 * @pe: byte offset of the PDE/PTE, relative to start of PDB/PTB
 159 * @addr: dst addr to write into pe
 160 * @count: number of page entries to update
 161 * @incr: increase next addr by incr bytes
 162 * @flags: hw access flags
 163 *
 164 * Traces the parameters and calls the right asic functions
 165 * to setup the page table using the DMA.
 166 */
 167static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p,
 168                                    struct amdgpu_bo *bo, uint64_t pe,
 169                                    uint64_t addr, unsigned count,
 170                                    uint32_t incr, uint64_t flags)
 171{
 172        struct amdgpu_ib *ib = p->job->ibs;
 173
 174        pe += amdgpu_gmc_sign_extend(amdgpu_bo_gpu_offset_no_check(bo));
 175        trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->immediate);
 176        if (count < 3) {
 177                amdgpu_vm_write_pte(p->adev, ib, pe, addr | flags,
 178                                    count, incr);
 179        } else {
 180                amdgpu_vm_set_pte_pde(p->adev, ib, pe, addr,
 181                                      count, incr, flags);
 182        }
 183}
 184
 185/**
 186 * amdgpu_vm_sdma_update - execute VM update
 187 *
 188 * @p: see amdgpu_vm_update_params definition
 189 * @vmbo: PD/PT to update
 190 * @pe: byte offset of the PDE/PTE, relative to start of PDB/PTB
 191 * @addr: dst addr to write into pe
 192 * @count: number of page entries to update
 193 * @incr: increase next addr by incr bytes
 194 * @flags: hw access flags
 195 *
 196 * Reserve space in the IB, setup mapping buffer on demand and write commands to
 197 * the IB.
 198 */
 199static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
 200                                 struct amdgpu_bo_vm *vmbo, uint64_t pe,
 201                                 uint64_t addr, unsigned count, uint32_t incr,
 202                                 uint64_t flags)
 203{
 204        struct amdgpu_bo *bo = &vmbo->bo;
 205        enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE
 206                : AMDGPU_IB_POOL_DELAYED;
 207        struct dma_resv_iter cursor;
 208        unsigned int i, ndw, nptes;
 209        struct dma_fence *fence;
 210        uint64_t *pte;
 211        int r;
 212
 213        /* Wait for PD/PT moves to be completed */
 214        dma_resv_for_each_fence(&cursor, bo->tbo.base.resv,
 215                                DMA_RESV_USAGE_KERNEL, fence) {
 216                r = amdgpu_sync_fence(&p->job->sync, fence);
 217                if (r)
 218                        return r;
 219        }
 220
 221        do {
 222                ndw = p->num_dw_left;
 223                ndw -= p->job->ibs->length_dw;
 224
 225                if (ndw < 32) {
 226                        r = amdgpu_vm_sdma_commit(p, NULL);
 227                        if (r)
 228                                return r;
 229
 230                        /* estimate how many dw we need */
 231                        ndw = 32;
 232                        if (p->pages_addr)
 233                                ndw += count * 2;
 234                        ndw = max(ndw, AMDGPU_VM_SDMA_MIN_NUM_DW);
 235                        ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW);
 236
 237                        r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool,
 238                                                     &p->job);
 239                        if (r)
 240                                return r;
 241
 242                        p->num_dw_left = ndw;
 243                }
 244
 245                if (!p->pages_addr) {
 246                        /* set page commands needed */
 247                        if (vmbo->shadow)
 248                                amdgpu_vm_sdma_set_ptes(p, vmbo->shadow, pe, addr,
 249                                                        count, incr, flags);
 250                        amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count,
 251                                                incr, flags);
 252                        return 0;
 253                }
 254
 255                /* copy commands needed */
 256                ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw *
 257                        (vmbo->shadow ? 2 : 1);
 258
 259                /* for padding */
 260                ndw -= 7;
 261
 262                nptes = min(count, ndw / 2);
 263
 264                /* Put the PTEs at the end of the IB. */
 265                p->num_dw_left -= nptes * 2;
 266                pte = (uint64_t *)&(p->job->ibs->ptr[p->num_dw_left]);
 267                for (i = 0; i < nptes; ++i, addr += incr) {
 268                        pte[i] = amdgpu_vm_map_gart(p->pages_addr, addr);
 269                        pte[i] |= flags;
 270                }
 271
 272                if (vmbo->shadow)
 273                        amdgpu_vm_sdma_copy_ptes(p, vmbo->shadow, pe, nptes);
 274                amdgpu_vm_sdma_copy_ptes(p, bo, pe, nptes);
 275
 276                pe += nptes * 8;
 277                count -= nptes;
 278        } while (count);
 279
 280        return 0;
 281}
 282
 283const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs = {
 284        .map_table = amdgpu_vm_sdma_map_table,
 285        .prepare = amdgpu_vm_sdma_prepare,
 286        .update = amdgpu_vm_sdma_update,
 287        .commit = amdgpu_vm_sdma_commit
 288};
 289