linux/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
<<
>>
Prefs
   1/*
   2 * Copyright 2019 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22#include "amdgpu.h"
  23#include "amdgpu_amdkfd.h"
  24#include "gc/gc_10_1_0_offset.h"
  25#include "gc/gc_10_1_0_sh_mask.h"
  26#include "athub/athub_2_0_0_offset.h"
  27#include "athub/athub_2_0_0_sh_mask.h"
  28#include "oss/osssys_5_0_0_offset.h"
  29#include "oss/osssys_5_0_0_sh_mask.h"
  30#include "soc15_common.h"
  31#include "v10_structs.h"
  32#include "nv.h"
  33#include "nvd.h"
  34
  35enum hqd_dequeue_request_type {
  36        NO_ACTION = 0,
  37        DRAIN_PIPE,
  38        RESET_WAVES,
  39        SAVE_WAVES
  40};
  41
  42static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
  43{
  44        return (struct amdgpu_device *)kgd;
  45}
  46
  47static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
  48                        uint32_t queue, uint32_t vmid)
  49{
  50        struct amdgpu_device *adev = get_amdgpu_device(kgd);
  51
  52        mutex_lock(&adev->srbm_mutex);
  53        nv_grbm_select(adev, mec, pipe, queue, vmid);
  54}
  55
  56static void unlock_srbm(struct kgd_dev *kgd)
  57{
  58        struct amdgpu_device *adev = get_amdgpu_device(kgd);
  59
  60        nv_grbm_select(adev, 0, 0, 0, 0);
  61        mutex_unlock(&adev->srbm_mutex);
  62}
  63
  64static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
  65                                uint32_t queue_id)
  66{
  67        struct amdgpu_device *adev = get_amdgpu_device(kgd);
  68
  69        uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
  70        uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
  71
  72        lock_srbm(kgd, mec, pipe, queue_id, 0);
  73}
  74
  75static uint64_t get_queue_mask(struct amdgpu_device *adev,
  76                               uint32_t pipe_id, uint32_t queue_id)
  77{
  78        unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
  79                        queue_id;
  80
  81        return 1ull << bit;
  82}
  83
  84static void release_queue(struct kgd_dev *kgd)
  85{
  86        unlock_srbm(kgd);
  87}
  88
  89static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
  90                                        uint32_t sh_mem_config,
  91                                        uint32_t sh_mem_ape1_base,
  92                                        uint32_t sh_mem_ape1_limit,
  93                                        uint32_t sh_mem_bases)
  94{
  95        struct amdgpu_device *adev = get_amdgpu_device(kgd);
  96
  97        lock_srbm(kgd, 0, 0, 0, vmid);
  98
  99        WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
 100        WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
 101        /* APE1 no longer exists on GFX9 */
 102
 103        unlock_srbm(kgd);
 104}
 105
 106static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
 107                                        unsigned int vmid)
 108{
 109        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 110
 111        /*
 112         * We have to assume that there is no outstanding mapping.
 113         * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
 114         * a mapping is in progress or because a mapping finished
 115         * and the SW cleared it.
 116         * So the protocol is to always wait & clear.
 117         */
 118        uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
 119                        ATC_VMID0_PASID_MAPPING__VALID_MASK;
 120
 121        pr_debug("pasid 0x%x vmid %d, reg value %x\n", pasid, vmid, pasid_mapping);
 122
 123        pr_debug("ATHUB, reg %x\n", SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid);
 124        WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
 125               pasid_mapping);
 126
 127#if 0
 128        /* TODO: uncomment this code when the hardware support is ready. */
 129        while (!(RREG32(SOC15_REG_OFFSET(
 130                                ATHUB, 0,
 131                                mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
 132                 (1U << vmid)))
 133                cpu_relax();
 134
 135        pr_debug("ATHUB mapping update finished\n");
 136        WREG32(SOC15_REG_OFFSET(ATHUB, 0,
 137                                mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
 138               1U << vmid);
 139#endif
 140
 141        /* Mapping vmid to pasid also for IH block */
 142        pr_debug("update mapping for IH block and mmhub");
 143        WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid,
 144               pasid_mapping);
 145
 146        return 0;
 147}
 148
 149/* TODO - RING0 form of field is obsolete, seems to date back to SI
 150 * but still works
 151 */
 152
 153static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 154{
 155        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 156        uint32_t mec;
 157        uint32_t pipe;
 158
 159        mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
 160        pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 161
 162        lock_srbm(kgd, mec, pipe, 0, 0);
 163
 164        WREG32_SOC15(GC, 0, mmCPC_INT_CNTL,
 165                CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
 166                CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
 167
 168        unlock_srbm(kgd);
 169
 170        return 0;
 171}
 172
 173static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
 174                                unsigned int engine_id,
 175                                unsigned int queue_id)
 176{
 177        uint32_t sdma_engine_reg_base[2] = {
 178                SOC15_REG_OFFSET(SDMA0, 0,
 179                                 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
 180                /* On gfx10, mmSDMA1_xxx registers are defined NOT based
 181                 * on SDMA1 base address (dw 0x1860) but based on SDMA0
 182                 * base address (dw 0x1260). Therefore use mmSDMA0_RLC0_RB_CNTL
 183                 * instead of mmSDMA1_RLC0_RB_CNTL for the base address calc
 184                 * below
 185                 */
 186                SOC15_REG_OFFSET(SDMA1, 0,
 187                                 mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL
 188        };
 189
 190        uint32_t retval = sdma_engine_reg_base[engine_id]
 191                + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
 192
 193        pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
 194                        queue_id, retval);
 195
 196        return retval;
 197}
 198
 199#if 0
 200static uint32_t get_watch_base_addr(struct amdgpu_device *adev)
 201{
 202        uint32_t retval = SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) -
 203                        mmTCP_WATCH0_ADDR_H;
 204
 205        pr_debug("kfd: reg watch base address: 0x%x\n", retval);
 206
 207        return retval;
 208}
 209#endif
 210
 211static inline struct v10_compute_mqd *get_mqd(void *mqd)
 212{
 213        return (struct v10_compute_mqd *)mqd;
 214}
 215
 216static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
 217{
 218        return (struct v10_sdma_mqd *)mqd;
 219}
 220
 221static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 222                        uint32_t queue_id, uint32_t __user *wptr,
 223                        uint32_t wptr_shift, uint32_t wptr_mask,
 224                        struct mm_struct *mm)
 225{
 226        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 227        struct v10_compute_mqd *m;
 228        uint32_t *mqd_hqd;
 229        uint32_t reg, hqd_base, data;
 230
 231        m = get_mqd(mqd);
 232
 233        pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
 234        acquire_queue(kgd, pipe_id, queue_id);
 235
 236        /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
 237        mqd_hqd = &m->cp_mqd_base_addr_lo;
 238        hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
 239
 240        for (reg = hqd_base;
 241             reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
 242                WREG32_SOC15_IP(GC, reg, mqd_hqd[reg - hqd_base]);
 243
 244
 245        /* Activate doorbell logic before triggering WPTR poll. */
 246        data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
 247                             CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
 248        WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data);
 249
 250        if (wptr) {
 251                /* Don't read wptr with get_user because the user
 252                 * context may not be accessible (if this function
 253                 * runs in a work queue). Instead trigger a one-shot
 254                 * polling read from memory in the CP. This assumes
 255                 * that wptr is GPU-accessible in the queue's VMID via
 256                 * ATC or SVM. WPTR==RPTR before starting the poll so
 257                 * the CP starts fetching new commands from the right
 258                 * place.
 259                 *
 260                 * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
 261                 * tricky. Assume that the queue didn't overflow. The
 262                 * number of valid bits in the 32-bit RPTR depends on
 263                 * the queue size. The remaining bits are taken from
 264                 * the saved 64-bit WPTR. If the WPTR wrapped, add the
 265                 * queue size.
 266                 */
 267                uint32_t queue_size =
 268                        2 << REG_GET_FIELD(m->cp_hqd_pq_control,
 269                                           CP_HQD_PQ_CONTROL, QUEUE_SIZE);
 270                uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
 271
 272                if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
 273                        guessed_wptr += queue_size;
 274                guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
 275                guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
 276
 277                WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
 278                       lower_32_bits(guessed_wptr));
 279                WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
 280                       upper_32_bits(guessed_wptr));
 281                WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
 282                       lower_32_bits((uint64_t)wptr));
 283                WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
 284                       upper_32_bits((uint64_t)wptr));
 285                pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
 286                         (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
 287                WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1,
 288                       (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
 289        }
 290
 291        /* Start the EOP fetcher */
 292        WREG32_SOC15(GC, 0, mmCP_HQD_EOP_RPTR,
 293               REG_SET_FIELD(m->cp_hqd_eop_rptr,
 294                             CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
 295
 296        data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
 297        WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, data);
 298
 299        release_queue(kgd);
 300
 301        return 0;
 302}
 303
 304static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
 305                            uint32_t pipe_id, uint32_t queue_id,
 306                            uint32_t doorbell_off)
 307{
 308        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 309        struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
 310        struct v10_compute_mqd *m;
 311        uint32_t mec, pipe;
 312        int r;
 313
 314        m = get_mqd(mqd);
 315
 316        acquire_queue(kgd, pipe_id, queue_id);
 317
 318        mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
 319        pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 320
 321        pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
 322                 mec, pipe, queue_id);
 323
 324        spin_lock(&adev->gfx.kiq.ring_lock);
 325        r = amdgpu_ring_alloc(kiq_ring, 7);
 326        if (r) {
 327                pr_err("Failed to alloc KIQ (%d).\n", r);
 328                goto out_unlock;
 329        }
 330
 331        amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
 332        amdgpu_ring_write(kiq_ring,
 333                          PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
 334                          PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
 335                          PACKET3_MAP_QUEUES_QUEUE(queue_id) |
 336                          PACKET3_MAP_QUEUES_PIPE(pipe) |
 337                          PACKET3_MAP_QUEUES_ME((mec - 1)) |
 338                          PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
 339                          PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
 340                          PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
 341                          PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
 342        amdgpu_ring_write(kiq_ring,
 343                          PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
 344        amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
 345        amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
 346        amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
 347        amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
 348        amdgpu_ring_commit(kiq_ring);
 349
 350out_unlock:
 351        spin_unlock(&adev->gfx.kiq.ring_lock);
 352        release_queue(kgd);
 353
 354        return r;
 355}
 356
 357static int kgd_hqd_dump(struct kgd_dev *kgd,
 358                        uint32_t pipe_id, uint32_t queue_id,
 359                        uint32_t (**dump)[2], uint32_t *n_regs)
 360{
 361        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 362        uint32_t i = 0, reg;
 363#define HQD_N_REGS 56
 364#define DUMP_REG(addr) do {                             \
 365                if (WARN_ON_ONCE(i >= HQD_N_REGS))      \
 366                        break;                          \
 367                (*dump)[i][0] = (addr) << 2;            \
 368                (*dump)[i++][1] = RREG32_SOC15_IP(GC, addr);            \
 369        } while (0)
 370
 371        *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
 372        if (*dump == NULL)
 373                return -ENOMEM;
 374
 375        acquire_queue(kgd, pipe_id, queue_id);
 376
 377        for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
 378             reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
 379                DUMP_REG(reg);
 380
 381        release_queue(kgd);
 382
 383        WARN_ON_ONCE(i != HQD_N_REGS);
 384        *n_regs = i;
 385
 386        return 0;
 387}
 388
 389static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
 390                             uint32_t __user *wptr, struct mm_struct *mm)
 391{
 392        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 393        struct v10_sdma_mqd *m;
 394        uint32_t sdma_rlc_reg_offset;
 395        unsigned long end_jiffies;
 396        uint32_t data;
 397        uint64_t data64;
 398        uint64_t __user *wptr64 = (uint64_t __user *)wptr;
 399
 400        m = get_sdma_mqd(mqd);
 401        sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
 402                                            m->sdma_queue_id);
 403
 404        WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
 405                m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
 406
 407        end_jiffies = msecs_to_jiffies(2000) + jiffies;
 408        while (true) {
 409                data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
 410                if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
 411                        break;
 412                if (time_after(jiffies, end_jiffies)) {
 413                        pr_err("SDMA RLC not idle in %s\n", __func__);
 414                        return -ETIME;
 415                }
 416                usleep_range(500, 1000);
 417        }
 418
 419        WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
 420               m->sdmax_rlcx_doorbell_offset);
 421
 422        data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
 423                             ENABLE, 1);
 424        WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
 425        WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
 426                                m->sdmax_rlcx_rb_rptr);
 427        WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
 428                                m->sdmax_rlcx_rb_rptr_hi);
 429
 430        WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
 431        if (read_user_wptr(mm, wptr64, data64)) {
 432                WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
 433                       lower_32_bits(data64));
 434                WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
 435                       upper_32_bits(data64));
 436        } else {
 437                WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
 438                       m->sdmax_rlcx_rb_rptr);
 439                WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
 440                       m->sdmax_rlcx_rb_rptr_hi);
 441        }
 442        WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
 443
 444        WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
 445        WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
 446                        m->sdmax_rlcx_rb_base_hi);
 447        WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
 448                        m->sdmax_rlcx_rb_rptr_addr_lo);
 449        WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
 450                        m->sdmax_rlcx_rb_rptr_addr_hi);
 451
 452        data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
 453                             RB_ENABLE, 1);
 454        WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
 455
 456        return 0;
 457}
 458
 459static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
 460                             uint32_t engine_id, uint32_t queue_id,
 461                             uint32_t (**dump)[2], uint32_t *n_regs)
 462{
 463        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 464        uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
 465                        engine_id, queue_id);
 466        uint32_t i = 0, reg;
 467#undef HQD_N_REGS
 468#define HQD_N_REGS (19+6+7+10)
 469
 470        *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
 471        if (*dump == NULL)
 472                return -ENOMEM;
 473
 474        for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
 475                DUMP_REG(sdma_rlc_reg_offset + reg);
 476        for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
 477                DUMP_REG(sdma_rlc_reg_offset + reg);
 478        for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
 479             reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
 480                DUMP_REG(sdma_rlc_reg_offset + reg);
 481        for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
 482             reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
 483                DUMP_REG(sdma_rlc_reg_offset + reg);
 484
 485        WARN_ON_ONCE(i != HQD_N_REGS);
 486        *n_regs = i;
 487
 488        return 0;
 489}
 490
 491static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
 492                                uint32_t pipe_id, uint32_t queue_id)
 493{
 494        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 495        uint32_t act;
 496        bool retval = false;
 497        uint32_t low, high;
 498
 499        acquire_queue(kgd, pipe_id, queue_id);
 500        act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
 501        if (act) {
 502                low = lower_32_bits(queue_address >> 8);
 503                high = upper_32_bits(queue_address >> 8);
 504
 505                if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) &&
 506                   high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI))
 507                        retval = true;
 508        }
 509        release_queue(kgd);
 510        return retval;
 511}
 512
 513static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
 514{
 515        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 516        struct v10_sdma_mqd *m;
 517        uint32_t sdma_rlc_reg_offset;
 518        uint32_t sdma_rlc_rb_cntl;
 519
 520        m = get_sdma_mqd(mqd);
 521        sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
 522                                            m->sdma_queue_id);
 523
 524        sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
 525
 526        if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
 527                return true;
 528
 529        return false;
 530}
 531
 532static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 533                                enum kfd_preempt_type reset_type,
 534                                unsigned int utimeout, uint32_t pipe_id,
 535                                uint32_t queue_id)
 536{
 537        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 538        enum hqd_dequeue_request_type type;
 539        unsigned long end_jiffies;
 540        uint32_t temp;
 541        struct v10_compute_mqd *m = get_mqd(mqd);
 542
 543        if (amdgpu_in_reset(adev))
 544                return -EIO;
 545
 546#if 0
 547        unsigned long flags;
 548        int retry;
 549#endif
 550
 551        acquire_queue(kgd, pipe_id, queue_id);
 552
 553        if (m->cp_hqd_vmid == 0)
 554                WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
 555
 556        switch (reset_type) {
 557        case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
 558                type = DRAIN_PIPE;
 559                break;
 560        case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
 561                type = RESET_WAVES;
 562                break;
 563        case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
 564                type = SAVE_WAVES;
 565                break;
 566        default:
 567                type = DRAIN_PIPE;
 568                break;
 569        }
 570
 571#if 0 /* Is this still needed? */
 572        /* Workaround: If IQ timer is active and the wait time is close to or
 573         * equal to 0, dequeueing is not safe. Wait until either the wait time
 574         * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
 575         * cleared before continuing. Also, ensure wait times are set to at
 576         * least 0x3.
 577         */
 578        local_irq_save(flags);
 579        preempt_disable();
 580        retry = 5000; /* wait for 500 usecs at maximum */
 581        while (true) {
 582                temp = RREG32(mmCP_HQD_IQ_TIMER);
 583                if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
 584                        pr_debug("HW is processing IQ\n");
 585                        goto loop;
 586                }
 587                if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
 588                        if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
 589                                        == 3) /* SEM-rearm is safe */
 590                                break;
 591                        /* Wait time 3 is safe for CP, but our MMIO read/write
 592                         * time is close to 1 microsecond, so check for 10 to
 593                         * leave more buffer room
 594                         */
 595                        if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
 596                                        >= 10)
 597                                break;
 598                        pr_debug("IQ timer is active\n");
 599                } else
 600                        break;
 601loop:
 602                if (!retry) {
 603                        pr_err("CP HQD IQ timer status time out\n");
 604                        break;
 605                }
 606                ndelay(100);
 607                --retry;
 608        }
 609        retry = 1000;
 610        while (true) {
 611                temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
 612                if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
 613                        break;
 614                pr_debug("Dequeue request is pending\n");
 615
 616                if (!retry) {
 617                        pr_err("CP HQD dequeue request time out\n");
 618                        break;
 619                }
 620                ndelay(100);
 621                --retry;
 622        }
 623        local_irq_restore(flags);
 624        preempt_enable();
 625#endif
 626
 627        WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, type);
 628
 629        end_jiffies = (utimeout * HZ / 1000) + jiffies;
 630        while (true) {
 631                temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
 632                if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
 633                        break;
 634                if (time_after(jiffies, end_jiffies)) {
 635                        pr_err("cp queue preemption time out.\n");
 636                        release_queue(kgd);
 637                        return -ETIME;
 638                }
 639                usleep_range(500, 1000);
 640        }
 641
 642        release_queue(kgd);
 643        return 0;
 644}
 645
 646static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 647                                unsigned int utimeout)
 648{
 649        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 650        struct v10_sdma_mqd *m;
 651        uint32_t sdma_rlc_reg_offset;
 652        uint32_t temp;
 653        unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
 654
 655        m = get_sdma_mqd(mqd);
 656        sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
 657                                            m->sdma_queue_id);
 658
 659        temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
 660        temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
 661        WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
 662
 663        while (true) {
 664                temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
 665                if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
 666                        break;
 667                if (time_after(jiffies, end_jiffies)) {
 668                        pr_err("SDMA RLC not idle in %s\n", __func__);
 669                        return -ETIME;
 670                }
 671                usleep_range(500, 1000);
 672        }
 673
 674        WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
 675        WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
 676                RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
 677                SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
 678
 679        m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
 680        m->sdmax_rlcx_rb_rptr_hi =
 681                RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
 682
 683        return 0;
 684}
 685
 686static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
 687                                        uint8_t vmid, uint16_t *p_pasid)
 688{
 689        uint32_t value;
 690        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 691
 692        value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
 693                     + vmid);
 694        *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
 695
 696        return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
 697}
 698
 699static int kgd_address_watch_disable(struct kgd_dev *kgd)
 700{
 701        return 0;
 702}
 703
 704static int kgd_address_watch_execute(struct kgd_dev *kgd,
 705                                        unsigned int watch_point_id,
 706                                        uint32_t cntl_val,
 707                                        uint32_t addr_hi,
 708                                        uint32_t addr_lo)
 709{
 710        return 0;
 711}
 712
 713static int kgd_wave_control_execute(struct kgd_dev *kgd,
 714                                        uint32_t gfx_index_val,
 715                                        uint32_t sq_cmd)
 716{
 717        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 718        uint32_t data = 0;
 719
 720        mutex_lock(&adev->grbm_idx_mutex);
 721
 722        WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
 723        WREG32_SOC15(GC, 0, mmSQ_CMD, sq_cmd);
 724
 725        data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
 726                INSTANCE_BROADCAST_WRITES, 1);
 727        data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
 728                SA_BROADCAST_WRITES, 1);
 729        data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
 730                SE_BROADCAST_WRITES, 1);
 731
 732        WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
 733        mutex_unlock(&adev->grbm_idx_mutex);
 734
 735        return 0;
 736}
 737
 738static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 739                                        unsigned int watch_point_id,
 740                                        unsigned int reg_offset)
 741{
 742        return 0;
 743}
 744
 745static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 746                uint64_t page_table_base)
 747{
 748        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 749
 750        if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
 751                pr_err("trying to set page table base for wrong VMID %u\n",
 752                       vmid);
 753                return;
 754        }
 755
 756        /* SDMA is on gfxhub as well for Navi1* series */
 757        adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
 758}
 759
 760static void program_trap_handler_settings(struct kgd_dev *kgd,
 761                uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
 762{
 763        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 764
 765        lock_srbm(kgd, 0, 0, 0, vmid);
 766
 767        /*
 768         * Program TBA registers
 769         */
 770        WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
 771                        lower_32_bits(tba_addr >> 8));
 772        WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
 773                        upper_32_bits(tba_addr >> 8) |
 774                        (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT));
 775
 776        /*
 777         * Program TMA registers
 778         */
 779        WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
 780                        lower_32_bits(tma_addr >> 8));
 781        WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
 782                        upper_32_bits(tma_addr >> 8));
 783
 784        unlock_srbm(kgd);
 785}
 786
 787const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
 788        .program_sh_mem_settings = kgd_program_sh_mem_settings,
 789        .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
 790        .init_interrupts = kgd_init_interrupts,
 791        .hqd_load = kgd_hqd_load,
 792        .hiq_mqd_load = kgd_hiq_mqd_load,
 793        .hqd_sdma_load = kgd_hqd_sdma_load,
 794        .hqd_dump = kgd_hqd_dump,
 795        .hqd_sdma_dump = kgd_hqd_sdma_dump,
 796        .hqd_is_occupied = kgd_hqd_is_occupied,
 797        .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
 798        .hqd_destroy = kgd_hqd_destroy,
 799        .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
 800        .address_watch_disable = kgd_address_watch_disable,
 801        .address_watch_execute = kgd_address_watch_execute,
 802        .wave_control_execute = kgd_wave_control_execute,
 803        .address_watch_get_offset = kgd_address_watch_get_offset,
 804        .get_atc_vmid_pasid_mapping_info =
 805                        get_atc_vmid_pasid_mapping_info,
 806        .set_vm_context_page_table_base = set_vm_context_page_table_base,
 807        .program_trap_handler_settings = program_trap_handler_settings,
 808};
 809