linux/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22
  23#include <linux/fdtable.h>
  24#include <linux/uaccess.h>
  25#include <linux/firmware.h>
  26#include <drm/drmP.h>
  27#include "amdgpu.h"
  28#include "amdgpu_amdkfd.h"
  29#include "cikd.h"
  30#include "cik_sdma.h"
  31#include "amdgpu_ucode.h"
  32#include "gfx_v7_0.h"
  33#include "gca/gfx_7_2_d.h"
  34#include "gca/gfx_7_2_enum.h"
  35#include "gca/gfx_7_2_sh_mask.h"
  36#include "oss/oss_2_0_d.h"
  37#include "oss/oss_2_0_sh_mask.h"
  38#include "gmc/gmc_7_1_d.h"
  39#include "gmc/gmc_7_1_sh_mask.h"
  40#include "cik_structs.h"
  41
  42enum hqd_dequeue_request_type {
  43        NO_ACTION = 0,
  44        DRAIN_PIPE,
  45        RESET_WAVES
  46};
  47
  48enum {
  49        MAX_TRAPID = 8,         /* 3 bits in the bitfield. */
  50        MAX_WATCH_ADDRESSES = 4
  51};
  52
  53enum {
  54        ADDRESS_WATCH_REG_ADDR_HI = 0,
  55        ADDRESS_WATCH_REG_ADDR_LO,
  56        ADDRESS_WATCH_REG_CNTL,
  57        ADDRESS_WATCH_REG_MAX
  58};
  59
  60/*  not defined in the CI/KV reg file  */
  61enum {
  62        ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL,
  63        ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF,
  64        ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000,
  65        /* extend the mask to 26 bits to match the low address field */
  66        ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6,
  67        ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF
  68};
  69
  70static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = {
  71        mmTCP_WATCH0_ADDR_H, mmTCP_WATCH0_ADDR_L, mmTCP_WATCH0_CNTL,
  72        mmTCP_WATCH1_ADDR_H, mmTCP_WATCH1_ADDR_L, mmTCP_WATCH1_CNTL,
  73        mmTCP_WATCH2_ADDR_H, mmTCP_WATCH2_ADDR_L, mmTCP_WATCH2_CNTL,
  74        mmTCP_WATCH3_ADDR_H, mmTCP_WATCH3_ADDR_L, mmTCP_WATCH3_CNTL
  75};
  76
  77union TCP_WATCH_CNTL_BITS {
  78        struct {
  79                uint32_t mask:24;
  80                uint32_t vmid:4;
  81                uint32_t atc:1;
  82                uint32_t mode:2;
  83                uint32_t valid:1;
  84        } bitfields, bits;
  85        uint32_t u32All;
  86        signed int i32All;
  87        float f32All;
  88};
  89
  90/*
  91 * Register access functions
  92 */
  93
  94static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
  95                uint32_t sh_mem_config, uint32_t sh_mem_ape1_base,
  96                uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
  97
  98static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
  99                                        unsigned int vmid);
 100
 101static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 102                                uint32_t hpd_size, uint64_t hpd_gpu_addr);
 103static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
 104static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 105                        uint32_t queue_id, uint32_t __user *wptr,
 106                        uint32_t wptr_shift, uint32_t wptr_mask,
 107                        struct mm_struct *mm);
 108static int kgd_hqd_dump(struct kgd_dev *kgd,
 109                        uint32_t pipe_id, uint32_t queue_id,
 110                        uint32_t (**dump)[2], uint32_t *n_regs);
 111static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
 112                             uint32_t __user *wptr, struct mm_struct *mm);
 113static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
 114                             uint32_t engine_id, uint32_t queue_id,
 115                             uint32_t (**dump)[2], uint32_t *n_regs);
 116static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
 117                                uint32_t pipe_id, uint32_t queue_id);
 118
 119static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 120                                enum kfd_preempt_type reset_type,
 121                                unsigned int utimeout, uint32_t pipe_id,
 122                                uint32_t queue_id);
 123static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
 124static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 125                                unsigned int utimeout);
 126static int kgd_address_watch_disable(struct kgd_dev *kgd);
 127static int kgd_address_watch_execute(struct kgd_dev *kgd,
 128                                        unsigned int watch_point_id,
 129                                        uint32_t cntl_val,
 130                                        uint32_t addr_hi,
 131                                        uint32_t addr_lo);
 132static int kgd_wave_control_execute(struct kgd_dev *kgd,
 133                                        uint32_t gfx_index_val,
 134                                        uint32_t sq_cmd);
 135static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 136                                        unsigned int watch_point_id,
 137                                        unsigned int reg_offset);
 138
 139static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
 140static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 141                                                        uint8_t vmid);
 142static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
 143
 144static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
 145static void set_scratch_backing_va(struct kgd_dev *kgd,
 146                                        uint64_t va, uint32_t vmid);
 147
 148/* Because of REG_GET_FIELD() being used, we put this function in the
 149 * asic specific file.
 150 */
 151static int get_tile_config(struct kgd_dev *kgd,
 152                struct tile_config *config)
 153{
 154        struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 155
 156        config->gb_addr_config = adev->gfx.config.gb_addr_config;
 157        config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
 158                                MC_ARB_RAMCFG, NOOFBANK);
 159        config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
 160                                MC_ARB_RAMCFG, NOOFRANKS);
 161
 162        config->tile_config_ptr = adev->gfx.config.tile_mode_array;
 163        config->num_tile_configs =
 164                        ARRAY_SIZE(adev->gfx.config.tile_mode_array);
 165        config->macro_tile_config_ptr =
 166                        adev->gfx.config.macrotile_mode_array;
 167        config->num_macro_tile_configs =
 168                        ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
 169
 170        return 0;
 171}
 172
 173static const struct kfd2kgd_calls kfd2kgd = {
 174        .init_gtt_mem_allocation = alloc_gtt_mem,
 175        .free_gtt_mem = free_gtt_mem,
 176        .get_local_mem_info = get_local_mem_info,
 177        .get_gpu_clock_counter = get_gpu_clock_counter,
 178        .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
 179        .alloc_pasid = amdgpu_pasid_alloc,
 180        .free_pasid = amdgpu_pasid_free,
 181        .program_sh_mem_settings = kgd_program_sh_mem_settings,
 182        .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
 183        .init_pipeline = kgd_init_pipeline,
 184        .init_interrupts = kgd_init_interrupts,
 185        .hqd_load = kgd_hqd_load,
 186        .hqd_sdma_load = kgd_hqd_sdma_load,
 187        .hqd_dump = kgd_hqd_dump,
 188        .hqd_sdma_dump = kgd_hqd_sdma_dump,
 189        .hqd_is_occupied = kgd_hqd_is_occupied,
 190        .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
 191        .hqd_destroy = kgd_hqd_destroy,
 192        .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
 193        .address_watch_disable = kgd_address_watch_disable,
 194        .address_watch_execute = kgd_address_watch_execute,
 195        .wave_control_execute = kgd_wave_control_execute,
 196        .address_watch_get_offset = kgd_address_watch_get_offset,
 197        .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
 198        .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
 199        .write_vmid_invalidate_request = write_vmid_invalidate_request,
 200        .get_fw_version = get_fw_version,
 201        .set_scratch_backing_va = set_scratch_backing_va,
 202        .get_tile_config = get_tile_config,
 203        .get_cu_info = get_cu_info,
 204        .get_vram_usage = amdgpu_amdkfd_get_vram_usage
 205};
 206
 207struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
 208{
 209        return (struct kfd2kgd_calls *)&kfd2kgd;
 210}
 211
 212static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
 213{
 214        return (struct amdgpu_device *)kgd;
 215}
 216
 217static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
 218                        uint32_t queue, uint32_t vmid)
 219{
 220        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 221        uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
 222
 223        mutex_lock(&adev->srbm_mutex);
 224        WREG32(mmSRBM_GFX_CNTL, value);
 225}
 226
 227static void unlock_srbm(struct kgd_dev *kgd)
 228{
 229        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 230
 231        WREG32(mmSRBM_GFX_CNTL, 0);
 232        mutex_unlock(&adev->srbm_mutex);
 233}
 234
 235static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
 236                                uint32_t queue_id)
 237{
 238        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 239
 240        uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
 241        uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 242
 243        lock_srbm(kgd, mec, pipe, queue_id, 0);
 244}
 245
 246static void release_queue(struct kgd_dev *kgd)
 247{
 248        unlock_srbm(kgd);
 249}
 250
 251static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
 252                                        uint32_t sh_mem_config,
 253                                        uint32_t sh_mem_ape1_base,
 254                                        uint32_t sh_mem_ape1_limit,
 255                                        uint32_t sh_mem_bases)
 256{
 257        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 258
 259        lock_srbm(kgd, 0, 0, 0, vmid);
 260
 261        WREG32(mmSH_MEM_CONFIG, sh_mem_config);
 262        WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base);
 263        WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
 264        WREG32(mmSH_MEM_BASES, sh_mem_bases);
 265
 266        unlock_srbm(kgd);
 267}
 268
 269static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 270                                        unsigned int vmid)
 271{
 272        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 273
 274        /*
 275         * We have to assume that there is no outstanding mapping.
 276         * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
 277         * a mapping is in progress or because a mapping finished and the
 278         * SW cleared it. So the protocol is to always wait & clear.
 279         */
 280        uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
 281                        ATC_VMID0_PASID_MAPPING__VALID_MASK;
 282
 283        WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping);
 284
 285        while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
 286                cpu_relax();
 287        WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
 288
 289        /* Mapping vmid to pasid also for IH block */
 290        WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
 291
 292        return 0;
 293}
 294
 295static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 296                                uint32_t hpd_size, uint64_t hpd_gpu_addr)
 297{
 298        /* amdgpu owns the per-pipe state */
 299        return 0;
 300}
 301
 302static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 303{
 304        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 305        uint32_t mec;
 306        uint32_t pipe;
 307
 308        mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
 309        pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 310
 311        lock_srbm(kgd, mec, pipe, 0, 0);
 312
 313        WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
 314                        CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
 315
 316        unlock_srbm(kgd);
 317
 318        return 0;
 319}
 320
 321static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
 322{
 323        uint32_t retval;
 324
 325        retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
 326                        m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
 327
 328        pr_debug("kfd: sdma base address: 0x%x\n", retval);
 329
 330        return retval;
 331}
 332
 333static inline struct cik_mqd *get_mqd(void *mqd)
 334{
 335        return (struct cik_mqd *)mqd;
 336}
 337
 338static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
 339{
 340        return (struct cik_sdma_rlc_registers *)mqd;
 341}
 342
 343static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 344                        uint32_t queue_id, uint32_t __user *wptr,
 345                        uint32_t wptr_shift, uint32_t wptr_mask,
 346                        struct mm_struct *mm)
 347{
 348        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 349        struct cik_mqd *m;
 350        uint32_t *mqd_hqd;
 351        uint32_t reg, wptr_val, data;
 352        bool valid_wptr = false;
 353
 354        m = get_mqd(mqd);
 355
 356        acquire_queue(kgd, pipe_id, queue_id);
 357
 358        /* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */
 359        mqd_hqd = &m->cp_mqd_base_addr_lo;
 360
 361        for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++)
 362                WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
 363
 364        /* Copy userspace write pointer value to register.
 365         * Activate doorbell logic to monitor subsequent changes.
 366         */
 367        data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
 368                             CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
 369        WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
 370
 371        /* read_user_ptr may take the mm->mmap_sem.
 372         * release srbm_mutex to avoid circular dependency between
 373         * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex.
 374         */
 375        release_queue(kgd);
 376        valid_wptr = read_user_wptr(mm, wptr, wptr_val);
 377        acquire_queue(kgd, pipe_id, queue_id);
 378        if (valid_wptr)
 379                WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
 380
 381        data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
 382        WREG32(mmCP_HQD_ACTIVE, data);
 383
 384        release_queue(kgd);
 385
 386        return 0;
 387}
 388
 389static int kgd_hqd_dump(struct kgd_dev *kgd,
 390                        uint32_t pipe_id, uint32_t queue_id,
 391                        uint32_t (**dump)[2], uint32_t *n_regs)
 392{
 393        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 394        uint32_t i = 0, reg;
 395#define HQD_N_REGS (35+4)
 396#define DUMP_REG(addr) do {                             \
 397                if (WARN_ON_ONCE(i >= HQD_N_REGS))      \
 398                        break;                          \
 399                (*dump)[i][0] = (addr) << 2;            \
 400                (*dump)[i++][1] = RREG32(addr);         \
 401        } while (0)
 402
 403        *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
 404        if (*dump == NULL)
 405                return -ENOMEM;
 406
 407        acquire_queue(kgd, pipe_id, queue_id);
 408
 409        DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
 410        DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
 411        DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2);
 412        DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3);
 413
 414        for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++)
 415                DUMP_REG(reg);
 416
 417        release_queue(kgd);
 418
 419        WARN_ON_ONCE(i != HQD_N_REGS);
 420        *n_regs = i;
 421
 422        return 0;
 423}
 424
 425static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
 426                             uint32_t __user *wptr, struct mm_struct *mm)
 427{
 428        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 429        struct cik_sdma_rlc_registers *m;
 430        unsigned long end_jiffies;
 431        uint32_t sdma_base_addr;
 432        uint32_t data;
 433
 434        m = get_sdma_mqd(mqd);
 435        sdma_base_addr = get_sdma_base_addr(m);
 436
 437        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
 438                m->sdma_rlc_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
 439
 440        end_jiffies = msecs_to_jiffies(2000) + jiffies;
 441        while (true) {
 442                data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
 443                if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
 444                        break;
 445                if (time_after(jiffies, end_jiffies))
 446                        return -ETIME;
 447                usleep_range(500, 1000);
 448        }
 449        if (m->sdma_engine_id) {
 450                data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL);
 451                data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL,
 452                                RESUME_CTX, 0);
 453                WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data);
 454        } else {
 455                data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL);
 456                data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
 457                                RESUME_CTX, 0);
 458                WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
 459        }
 460
 461        data = REG_SET_FIELD(m->sdma_rlc_doorbell, SDMA0_RLC0_DOORBELL,
 462                             ENABLE, 1);
 463        WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
 464        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdma_rlc_rb_rptr);
 465
 466        if (read_user_wptr(mm, wptr, data))
 467                WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
 468        else
 469                WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
 470                       m->sdma_rlc_rb_rptr);
 471
 472        WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
 473                                m->sdma_rlc_virtual_addr);
 474        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base);
 475        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
 476                        m->sdma_rlc_rb_base_hi);
 477        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
 478                        m->sdma_rlc_rb_rptr_addr_lo);
 479        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
 480                        m->sdma_rlc_rb_rptr_addr_hi);
 481
 482        data = REG_SET_FIELD(m->sdma_rlc_rb_cntl, SDMA0_RLC0_RB_CNTL,
 483                             RB_ENABLE, 1);
 484        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
 485
 486        return 0;
 487}
 488
 489static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
 490                             uint32_t engine_id, uint32_t queue_id,
 491                             uint32_t (**dump)[2], uint32_t *n_regs)
 492{
 493        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 494        uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
 495                queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
 496        uint32_t i = 0, reg;
 497#undef HQD_N_REGS
 498#define HQD_N_REGS (19+4)
 499
 500        *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
 501        if (*dump == NULL)
 502                return -ENOMEM;
 503
 504        for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
 505                DUMP_REG(sdma_offset + reg);
 506        for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK;
 507             reg++)
 508                DUMP_REG(sdma_offset + reg);
 509
 510        WARN_ON_ONCE(i != HQD_N_REGS);
 511        *n_regs = i;
 512
 513        return 0;
 514}
 515
 516static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
 517                                uint32_t pipe_id, uint32_t queue_id)
 518{
 519        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 520        uint32_t act;
 521        bool retval = false;
 522        uint32_t low, high;
 523
 524        acquire_queue(kgd, pipe_id, queue_id);
 525        act = RREG32(mmCP_HQD_ACTIVE);
 526        if (act) {
 527                low = lower_32_bits(queue_address >> 8);
 528                high = upper_32_bits(queue_address >> 8);
 529
 530                if (low == RREG32(mmCP_HQD_PQ_BASE) &&
 531                                high == RREG32(mmCP_HQD_PQ_BASE_HI))
 532                        retval = true;
 533        }
 534        release_queue(kgd);
 535        return retval;
 536}
 537
 538static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
 539{
 540        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 541        struct cik_sdma_rlc_registers *m;
 542        uint32_t sdma_base_addr;
 543        uint32_t sdma_rlc_rb_cntl;
 544
 545        m = get_sdma_mqd(mqd);
 546        sdma_base_addr = get_sdma_base_addr(m);
 547
 548        sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
 549
 550        if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
 551                return true;
 552
 553        return false;
 554}
 555
 556static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 557                                enum kfd_preempt_type reset_type,
 558                                unsigned int utimeout, uint32_t pipe_id,
 559                                uint32_t queue_id)
 560{
 561        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 562        uint32_t temp;
 563        enum hqd_dequeue_request_type type;
 564        unsigned long flags, end_jiffies;
 565        int retry;
 566
 567        acquire_queue(kgd, pipe_id, queue_id);
 568        WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
 569
 570        switch (reset_type) {
 571        case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
 572                type = DRAIN_PIPE;
 573                break;
 574        case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
 575                type = RESET_WAVES;
 576                break;
 577        default:
 578                type = DRAIN_PIPE;
 579                break;
 580        }
 581
 582        /* Workaround: If IQ timer is active and the wait time is close to or
 583         * equal to 0, dequeueing is not safe. Wait until either the wait time
 584         * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
 585         * cleared before continuing. Also, ensure wait times are set to at
 586         * least 0x3.
 587         */
 588        local_irq_save(flags);
 589        preempt_disable();
 590        retry = 5000; /* wait for 500 usecs at maximum */
 591        while (true) {
 592                temp = RREG32(mmCP_HQD_IQ_TIMER);
 593                if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
 594                        pr_debug("HW is processing IQ\n");
 595                        goto loop;
 596                }
 597                if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
 598                        if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
 599                                        == 3) /* SEM-rearm is safe */
 600                                break;
 601                        /* Wait time 3 is safe for CP, but our MMIO read/write
 602                         * time is close to 1 microsecond, so check for 10 to
 603                         * leave more buffer room
 604                         */
 605                        if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
 606                                        >= 10)
 607                                break;
 608                        pr_debug("IQ timer is active\n");
 609                } else
 610                        break;
 611loop:
 612                if (!retry) {
 613                        pr_err("CP HQD IQ timer status time out\n");
 614                        break;
 615                }
 616                ndelay(100);
 617                --retry;
 618        }
 619        retry = 1000;
 620        while (true) {
 621                temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
 622                if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
 623                        break;
 624                pr_debug("Dequeue request is pending\n");
 625
 626                if (!retry) {
 627                        pr_err("CP HQD dequeue request time out\n");
 628                        break;
 629                }
 630                ndelay(100);
 631                --retry;
 632        }
 633        local_irq_restore(flags);
 634        preempt_enable();
 635
 636        WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
 637
 638        end_jiffies = (utimeout * HZ / 1000) + jiffies;
 639        while (true) {
 640                temp = RREG32(mmCP_HQD_ACTIVE);
 641                if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
 642                        break;
 643                if (time_after(jiffies, end_jiffies)) {
 644                        pr_err("cp queue preemption time out\n");
 645                        release_queue(kgd);
 646                        return -ETIME;
 647                }
 648                usleep_range(500, 1000);
 649        }
 650
 651        release_queue(kgd);
 652        return 0;
 653}
 654
 655static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 656                                unsigned int utimeout)
 657{
 658        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 659        struct cik_sdma_rlc_registers *m;
 660        uint32_t sdma_base_addr;
 661        uint32_t temp;
 662        unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
 663
 664        m = get_sdma_mqd(mqd);
 665        sdma_base_addr = get_sdma_base_addr(m);
 666
 667        temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
 668        temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
 669        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
 670
 671        while (true) {
 672                temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
 673                if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
 674                        break;
 675                if (time_after(jiffies, end_jiffies))
 676                        return -ETIME;
 677                usleep_range(500, 1000);
 678        }
 679
 680        WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
 681        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
 682                RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
 683                SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
 684
 685        m->sdma_rlc_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
 686
 687        return 0;
 688}
 689
 690static int kgd_address_watch_disable(struct kgd_dev *kgd)
 691{
 692        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 693        union TCP_WATCH_CNTL_BITS cntl;
 694        unsigned int i;
 695
 696        cntl.u32All = 0;
 697
 698        cntl.bitfields.valid = 0;
 699        cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
 700        cntl.bitfields.atc = 1;
 701
 702        /* Turning off this address until we set all the registers */
 703        for (i = 0; i < MAX_WATCH_ADDRESSES; i++)
 704                WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX +
 705                        ADDRESS_WATCH_REG_CNTL], cntl.u32All);
 706
 707        return 0;
 708}
 709
 710static int kgd_address_watch_execute(struct kgd_dev *kgd,
 711                                        unsigned int watch_point_id,
 712                                        uint32_t cntl_val,
 713                                        uint32_t addr_hi,
 714                                        uint32_t addr_lo)
 715{
 716        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 717        union TCP_WATCH_CNTL_BITS cntl;
 718
 719        cntl.u32All = cntl_val;
 720
 721        /* Turning off this watch point until we set all the registers */
 722        cntl.bitfields.valid = 0;
 723        WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 724                ADDRESS_WATCH_REG_CNTL], cntl.u32All);
 725
 726        WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 727                ADDRESS_WATCH_REG_ADDR_HI], addr_hi);
 728
 729        WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 730                ADDRESS_WATCH_REG_ADDR_LO], addr_lo);
 731
 732        /* Enable the watch point */
 733        cntl.bitfields.valid = 1;
 734
 735        WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 736                ADDRESS_WATCH_REG_CNTL], cntl.u32All);
 737
 738        return 0;
 739}
 740
 741static int kgd_wave_control_execute(struct kgd_dev *kgd,
 742                                        uint32_t gfx_index_val,
 743                                        uint32_t sq_cmd)
 744{
 745        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 746        uint32_t data;
 747
 748        mutex_lock(&adev->grbm_idx_mutex);
 749
 750        WREG32(mmGRBM_GFX_INDEX, gfx_index_val);
 751        WREG32(mmSQ_CMD, sq_cmd);
 752
 753        /*  Restore the GRBM_GFX_INDEX register  */
 754
 755        data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK |
 756                GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
 757                GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK;
 758
 759        WREG32(mmGRBM_GFX_INDEX, data);
 760
 761        mutex_unlock(&adev->grbm_idx_mutex);
 762
 763        return 0;
 764}
 765
 766static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 767                                        unsigned int watch_point_id,
 768                                        unsigned int reg_offset)
 769{
 770        return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset];
 771}
 772
 773static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
 774                                                        uint8_t vmid)
 775{
 776        uint32_t reg;
 777        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 778
 779        reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
 780        return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
 781}
 782
 783static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 784                                                                uint8_t vmid)
 785{
 786        uint32_t reg;
 787        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 788
 789        reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
 790        return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
 791}
 792
 793static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
 794{
 795        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 796
 797        WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
 798}
 799
 800static void set_scratch_backing_va(struct kgd_dev *kgd,
 801                                        uint64_t va, uint32_t vmid)
 802{
 803        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 804
 805        lock_srbm(kgd, 0, 0, 0, vmid);
 806        WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
 807        unlock_srbm(kgd);
 808}
 809
 810static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 811{
 812        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 813        const union amdgpu_firmware_header *hdr;
 814
 815        BUG_ON(kgd == NULL);
 816
 817        switch (type) {
 818        case KGD_ENGINE_PFP:
 819                hdr = (const union amdgpu_firmware_header *)
 820                                                adev->gfx.pfp_fw->data;
 821                break;
 822
 823        case KGD_ENGINE_ME:
 824                hdr = (const union amdgpu_firmware_header *)
 825                                                adev->gfx.me_fw->data;
 826                break;
 827
 828        case KGD_ENGINE_CE:
 829                hdr = (const union amdgpu_firmware_header *)
 830                                                adev->gfx.ce_fw->data;
 831                break;
 832
 833        case KGD_ENGINE_MEC1:
 834                hdr = (const union amdgpu_firmware_header *)
 835                                                adev->gfx.mec_fw->data;
 836                break;
 837
 838        case KGD_ENGINE_MEC2:
 839                hdr = (const union amdgpu_firmware_header *)
 840                                                adev->gfx.mec2_fw->data;
 841                break;
 842
 843        case KGD_ENGINE_RLC:
 844                hdr = (const union amdgpu_firmware_header *)
 845                                                adev->gfx.rlc_fw->data;
 846                break;
 847
 848        case KGD_ENGINE_SDMA1:
 849                hdr = (const union amdgpu_firmware_header *)
 850                                                adev->sdma.instance[0].fw->data;
 851                break;
 852
 853        case KGD_ENGINE_SDMA2:
 854                hdr = (const union amdgpu_firmware_header *)
 855                                                adev->sdma.instance[1].fw->data;
 856                break;
 857
 858        default:
 859                return 0;
 860        }
 861
 862        if (hdr == NULL)
 863                return 0;
 864
 865        /* Only 12 bit in use*/
 866        return hdr->common.ucode_version;
 867}
 868
 869