linux/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22
  23#include <linux/fdtable.h>
  24#include <linux/uaccess.h>
  25#include <linux/firmware.h>
  26#include <drm/drmP.h>
  27#include "amdgpu.h"
  28#include "amdgpu_amdkfd.h"
  29#include "cikd.h"
  30#include "cik_sdma.h"
  31#include "amdgpu_ucode.h"
  32#include "gfx_v7_0.h"
  33#include "gca/gfx_7_2_d.h"
  34#include "gca/gfx_7_2_enum.h"
  35#include "gca/gfx_7_2_sh_mask.h"
  36#include "oss/oss_2_0_d.h"
  37#include "oss/oss_2_0_sh_mask.h"
  38#include "gmc/gmc_7_1_d.h"
  39#include "gmc/gmc_7_1_sh_mask.h"
  40#include "cik_structs.h"
  41
  42enum hqd_dequeue_request_type {
  43        NO_ACTION = 0,
  44        DRAIN_PIPE,
  45        RESET_WAVES
  46};
  47
  48enum {
  49        MAX_TRAPID = 8,         /* 3 bits in the bitfield. */
  50        MAX_WATCH_ADDRESSES = 4
  51};
  52
  53enum {
  54        ADDRESS_WATCH_REG_ADDR_HI = 0,
  55        ADDRESS_WATCH_REG_ADDR_LO,
  56        ADDRESS_WATCH_REG_CNTL,
  57        ADDRESS_WATCH_REG_MAX
  58};
  59
  60/*  not defined in the CI/KV reg file  */
  61enum {
  62        ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL,
  63        ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF,
  64        ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000,
  65        /* extend the mask to 26 bits to match the low address field */
  66        ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6,
  67        ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF
  68};
  69
  70static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = {
  71        mmTCP_WATCH0_ADDR_H, mmTCP_WATCH0_ADDR_L, mmTCP_WATCH0_CNTL,
  72        mmTCP_WATCH1_ADDR_H, mmTCP_WATCH1_ADDR_L, mmTCP_WATCH1_CNTL,
  73        mmTCP_WATCH2_ADDR_H, mmTCP_WATCH2_ADDR_L, mmTCP_WATCH2_CNTL,
  74        mmTCP_WATCH3_ADDR_H, mmTCP_WATCH3_ADDR_L, mmTCP_WATCH3_CNTL
  75};
  76
  77union TCP_WATCH_CNTL_BITS {
  78        struct {
  79                uint32_t mask:24;
  80                uint32_t vmid:4;
  81                uint32_t atc:1;
  82                uint32_t mode:2;
  83                uint32_t valid:1;
  84        } bitfields, bits;
  85        uint32_t u32All;
  86        signed int i32All;
  87        float f32All;
  88};
  89
  90/*
  91 * Register access functions
  92 */
  93
  94static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
  95                uint32_t sh_mem_config, uint32_t sh_mem_ape1_base,
  96                uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
  97
  98static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
  99                                        unsigned int vmid);
 100
 101static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 102                                uint32_t hpd_size, uint64_t hpd_gpu_addr);
 103static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
 104static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 105                        uint32_t queue_id, uint32_t __user *wptr,
 106                        uint32_t wptr_shift, uint32_t wptr_mask,
 107                        struct mm_struct *mm);
 108static int kgd_hqd_dump(struct kgd_dev *kgd,
 109                        uint32_t pipe_id, uint32_t queue_id,
 110                        uint32_t (**dump)[2], uint32_t *n_regs);
 111static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
 112                             uint32_t __user *wptr, struct mm_struct *mm);
 113static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
 114                             uint32_t engine_id, uint32_t queue_id,
 115                             uint32_t (**dump)[2], uint32_t *n_regs);
 116static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
 117                                uint32_t pipe_id, uint32_t queue_id);
 118
 119static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 120                                enum kfd_preempt_type reset_type,
 121                                unsigned int utimeout, uint32_t pipe_id,
 122                                uint32_t queue_id);
 123static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
 124static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 125                                unsigned int utimeout);
 126static int kgd_address_watch_disable(struct kgd_dev *kgd);
 127static int kgd_address_watch_execute(struct kgd_dev *kgd,
 128                                        unsigned int watch_point_id,
 129                                        uint32_t cntl_val,
 130                                        uint32_t addr_hi,
 131                                        uint32_t addr_lo);
 132static int kgd_wave_control_execute(struct kgd_dev *kgd,
 133                                        uint32_t gfx_index_val,
 134                                        uint32_t sq_cmd);
 135static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 136                                        unsigned int watch_point_id,
 137                                        unsigned int reg_offset);
 138
 139static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
 140static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 141                                                        uint8_t vmid);
 142
 143static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
 144static void set_scratch_backing_va(struct kgd_dev *kgd,
 145                                        uint64_t va, uint32_t vmid);
 146static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 147                uint32_t page_table_base);
 148static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
 149static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
 150
 151/* Because of REG_GET_FIELD() being used, we put this function in the
 152 * asic specific file.
 153 */
 154static int get_tile_config(struct kgd_dev *kgd,
 155                struct tile_config *config)
 156{
 157        struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 158
 159        config->gb_addr_config = adev->gfx.config.gb_addr_config;
 160        config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
 161                                MC_ARB_RAMCFG, NOOFBANK);
 162        config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
 163                                MC_ARB_RAMCFG, NOOFRANKS);
 164
 165        config->tile_config_ptr = adev->gfx.config.tile_mode_array;
 166        config->num_tile_configs =
 167                        ARRAY_SIZE(adev->gfx.config.tile_mode_array);
 168        config->macro_tile_config_ptr =
 169                        adev->gfx.config.macrotile_mode_array;
 170        config->num_macro_tile_configs =
 171                        ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
 172
 173        return 0;
 174}
 175
 176static const struct kfd2kgd_calls kfd2kgd = {
 177        .init_gtt_mem_allocation = alloc_gtt_mem,
 178        .free_gtt_mem = free_gtt_mem,
 179        .get_local_mem_info = get_local_mem_info,
 180        .get_gpu_clock_counter = get_gpu_clock_counter,
 181        .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
 182        .alloc_pasid = amdgpu_pasid_alloc,
 183        .free_pasid = amdgpu_pasid_free,
 184        .program_sh_mem_settings = kgd_program_sh_mem_settings,
 185        .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
 186        .init_pipeline = kgd_init_pipeline,
 187        .init_interrupts = kgd_init_interrupts,
 188        .hqd_load = kgd_hqd_load,
 189        .hqd_sdma_load = kgd_hqd_sdma_load,
 190        .hqd_dump = kgd_hqd_dump,
 191        .hqd_sdma_dump = kgd_hqd_sdma_dump,
 192        .hqd_is_occupied = kgd_hqd_is_occupied,
 193        .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
 194        .hqd_destroy = kgd_hqd_destroy,
 195        .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
 196        .address_watch_disable = kgd_address_watch_disable,
 197        .address_watch_execute = kgd_address_watch_execute,
 198        .wave_control_execute = kgd_wave_control_execute,
 199        .address_watch_get_offset = kgd_address_watch_get_offset,
 200        .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
 201        .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
 202        .get_fw_version = get_fw_version,
 203        .set_scratch_backing_va = set_scratch_backing_va,
 204        .get_tile_config = get_tile_config,
 205        .get_cu_info = get_cu_info,
 206        .get_vram_usage = amdgpu_amdkfd_get_vram_usage,
 207        .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
 208        .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
 209        .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
 210        .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
 211        .set_vm_context_page_table_base = set_vm_context_page_table_base,
 212        .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
 213        .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
 214        .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
 215        .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
 216        .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
 217        .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
 218        .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
 219        .invalidate_tlbs = invalidate_tlbs,
 220        .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
 221        .submit_ib = amdgpu_amdkfd_submit_ib,
 222};
 223
 224struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
 225{
 226        return (struct kfd2kgd_calls *)&kfd2kgd;
 227}
 228
 229static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
 230{
 231        return (struct amdgpu_device *)kgd;
 232}
 233
 234static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
 235                        uint32_t queue, uint32_t vmid)
 236{
 237        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 238        uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
 239
 240        mutex_lock(&adev->srbm_mutex);
 241        WREG32(mmSRBM_GFX_CNTL, value);
 242}
 243
 244static void unlock_srbm(struct kgd_dev *kgd)
 245{
 246        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 247
 248        WREG32(mmSRBM_GFX_CNTL, 0);
 249        mutex_unlock(&adev->srbm_mutex);
 250}
 251
 252static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
 253                                uint32_t queue_id)
 254{
 255        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 256
 257        uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
 258        uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 259
 260        lock_srbm(kgd, mec, pipe, queue_id, 0);
 261}
 262
 263static void release_queue(struct kgd_dev *kgd)
 264{
 265        unlock_srbm(kgd);
 266}
 267
 268static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
 269                                        uint32_t sh_mem_config,
 270                                        uint32_t sh_mem_ape1_base,
 271                                        uint32_t sh_mem_ape1_limit,
 272                                        uint32_t sh_mem_bases)
 273{
 274        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 275
 276        lock_srbm(kgd, 0, 0, 0, vmid);
 277
 278        WREG32(mmSH_MEM_CONFIG, sh_mem_config);
 279        WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base);
 280        WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
 281        WREG32(mmSH_MEM_BASES, sh_mem_bases);
 282
 283        unlock_srbm(kgd);
 284}
 285
 286static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 287                                        unsigned int vmid)
 288{
 289        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 290
 291        /*
 292         * We have to assume that there is no outstanding mapping.
 293         * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
 294         * a mapping is in progress or because a mapping finished and the
 295         * SW cleared it. So the protocol is to always wait & clear.
 296         */
 297        uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
 298                        ATC_VMID0_PASID_MAPPING__VALID_MASK;
 299
 300        WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping);
 301
 302        while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
 303                cpu_relax();
 304        WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
 305
 306        /* Mapping vmid to pasid also for IH block */
 307        WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
 308
 309        return 0;
 310}
 311
 312static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 313                                uint32_t hpd_size, uint64_t hpd_gpu_addr)
 314{
 315        /* amdgpu owns the per-pipe state */
 316        return 0;
 317}
 318
 319static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 320{
 321        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 322        uint32_t mec;
 323        uint32_t pipe;
 324
 325        mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
 326        pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 327
 328        lock_srbm(kgd, mec, pipe, 0, 0);
 329
 330        WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
 331                        CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
 332
 333        unlock_srbm(kgd);
 334
 335        return 0;
 336}
 337
 338static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
 339{
 340        uint32_t retval;
 341
 342        retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
 343                        m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
 344
 345        pr_debug("kfd: sdma base address: 0x%x\n", retval);
 346
 347        return retval;
 348}
 349
 350static inline struct cik_mqd *get_mqd(void *mqd)
 351{
 352        return (struct cik_mqd *)mqd;
 353}
 354
 355static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
 356{
 357        return (struct cik_sdma_rlc_registers *)mqd;
 358}
 359
 360static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 361                        uint32_t queue_id, uint32_t __user *wptr,
 362                        uint32_t wptr_shift, uint32_t wptr_mask,
 363                        struct mm_struct *mm)
 364{
 365        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 366        struct cik_mqd *m;
 367        uint32_t *mqd_hqd;
 368        uint32_t reg, wptr_val, data;
 369        bool valid_wptr = false;
 370
 371        m = get_mqd(mqd);
 372
 373        acquire_queue(kgd, pipe_id, queue_id);
 374
 375        /* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */
 376        mqd_hqd = &m->cp_mqd_base_addr_lo;
 377
 378        for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++)
 379                WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
 380
 381        /* Copy userspace write pointer value to register.
 382         * Activate doorbell logic to monitor subsequent changes.
 383         */
 384        data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
 385                             CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
 386        WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
 387
 388        /* read_user_ptr may take the mm->mmap_sem.
 389         * release srbm_mutex to avoid circular dependency between
 390         * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex.
 391         */
 392        release_queue(kgd);
 393        valid_wptr = read_user_wptr(mm, wptr, wptr_val);
 394        acquire_queue(kgd, pipe_id, queue_id);
 395        if (valid_wptr)
 396                WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
 397
 398        data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
 399        WREG32(mmCP_HQD_ACTIVE, data);
 400
 401        release_queue(kgd);
 402
 403        return 0;
 404}
 405
 406static int kgd_hqd_dump(struct kgd_dev *kgd,
 407                        uint32_t pipe_id, uint32_t queue_id,
 408                        uint32_t (**dump)[2], uint32_t *n_regs)
 409{
 410        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 411        uint32_t i = 0, reg;
 412#define HQD_N_REGS (35+4)
 413#define DUMP_REG(addr) do {                             \
 414                if (WARN_ON_ONCE(i >= HQD_N_REGS))      \
 415                        break;                          \
 416                (*dump)[i][0] = (addr) << 2;            \
 417                (*dump)[i++][1] = RREG32(addr);         \
 418        } while (0)
 419
 420        *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
 421        if (*dump == NULL)
 422                return -ENOMEM;
 423
 424        acquire_queue(kgd, pipe_id, queue_id);
 425
 426        DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
 427        DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
 428        DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2);
 429        DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3);
 430
 431        for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++)
 432                DUMP_REG(reg);
 433
 434        release_queue(kgd);
 435
 436        WARN_ON_ONCE(i != HQD_N_REGS);
 437        *n_regs = i;
 438
 439        return 0;
 440}
 441
 442static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
 443                             uint32_t __user *wptr, struct mm_struct *mm)
 444{
 445        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 446        struct cik_sdma_rlc_registers *m;
 447        unsigned long end_jiffies;
 448        uint32_t sdma_base_addr;
 449        uint32_t data;
 450
 451        m = get_sdma_mqd(mqd);
 452        sdma_base_addr = get_sdma_base_addr(m);
 453
 454        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
 455                m->sdma_rlc_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
 456
 457        end_jiffies = msecs_to_jiffies(2000) + jiffies;
 458        while (true) {
 459                data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
 460                if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
 461                        break;
 462                if (time_after(jiffies, end_jiffies))
 463                        return -ETIME;
 464                usleep_range(500, 1000);
 465        }
 466        if (m->sdma_engine_id) {
 467                data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL);
 468                data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL,
 469                                RESUME_CTX, 0);
 470                WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data);
 471        } else {
 472                data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL);
 473                data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
 474                                RESUME_CTX, 0);
 475                WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
 476        }
 477
 478        data = REG_SET_FIELD(m->sdma_rlc_doorbell, SDMA0_RLC0_DOORBELL,
 479                             ENABLE, 1);
 480        WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
 481        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdma_rlc_rb_rptr);
 482
 483        if (read_user_wptr(mm, wptr, data))
 484                WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
 485        else
 486                WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
 487                       m->sdma_rlc_rb_rptr);
 488
 489        WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
 490                                m->sdma_rlc_virtual_addr);
 491        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base);
 492        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
 493                        m->sdma_rlc_rb_base_hi);
 494        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
 495                        m->sdma_rlc_rb_rptr_addr_lo);
 496        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
 497                        m->sdma_rlc_rb_rptr_addr_hi);
 498
 499        data = REG_SET_FIELD(m->sdma_rlc_rb_cntl, SDMA0_RLC0_RB_CNTL,
 500                             RB_ENABLE, 1);
 501        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
 502
 503        return 0;
 504}
 505
 506static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
 507                             uint32_t engine_id, uint32_t queue_id,
 508                             uint32_t (**dump)[2], uint32_t *n_regs)
 509{
 510        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 511        uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
 512                queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
 513        uint32_t i = 0, reg;
 514#undef HQD_N_REGS
 515#define HQD_N_REGS (19+4)
 516
 517        *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
 518        if (*dump == NULL)
 519                return -ENOMEM;
 520
 521        for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
 522                DUMP_REG(sdma_offset + reg);
 523        for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK;
 524             reg++)
 525                DUMP_REG(sdma_offset + reg);
 526
 527        WARN_ON_ONCE(i != HQD_N_REGS);
 528        *n_regs = i;
 529
 530        return 0;
 531}
 532
 533static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
 534                                uint32_t pipe_id, uint32_t queue_id)
 535{
 536        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 537        uint32_t act;
 538        bool retval = false;
 539        uint32_t low, high;
 540
 541        acquire_queue(kgd, pipe_id, queue_id);
 542        act = RREG32(mmCP_HQD_ACTIVE);
 543        if (act) {
 544                low = lower_32_bits(queue_address >> 8);
 545                high = upper_32_bits(queue_address >> 8);
 546
 547                if (low == RREG32(mmCP_HQD_PQ_BASE) &&
 548                                high == RREG32(mmCP_HQD_PQ_BASE_HI))
 549                        retval = true;
 550        }
 551        release_queue(kgd);
 552        return retval;
 553}
 554
 555static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
 556{
 557        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 558        struct cik_sdma_rlc_registers *m;
 559        uint32_t sdma_base_addr;
 560        uint32_t sdma_rlc_rb_cntl;
 561
 562        m = get_sdma_mqd(mqd);
 563        sdma_base_addr = get_sdma_base_addr(m);
 564
 565        sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
 566
 567        if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
 568                return true;
 569
 570        return false;
 571}
 572
 573static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 574                                enum kfd_preempt_type reset_type,
 575                                unsigned int utimeout, uint32_t pipe_id,
 576                                uint32_t queue_id)
 577{
 578        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 579        uint32_t temp;
 580        enum hqd_dequeue_request_type type;
 581        unsigned long flags, end_jiffies;
 582        int retry;
 583
 584        acquire_queue(kgd, pipe_id, queue_id);
 585        WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
 586
 587        switch (reset_type) {
 588        case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
 589                type = DRAIN_PIPE;
 590                break;
 591        case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
 592                type = RESET_WAVES;
 593                break;
 594        default:
 595                type = DRAIN_PIPE;
 596                break;
 597        }
 598
 599        /* Workaround: If IQ timer is active and the wait time is close to or
 600         * equal to 0, dequeueing is not safe. Wait until either the wait time
 601         * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
 602         * cleared before continuing. Also, ensure wait times are set to at
 603         * least 0x3.
 604         */
 605        local_irq_save(flags);
 606        preempt_disable();
 607        retry = 5000; /* wait for 500 usecs at maximum */
 608        while (true) {
 609                temp = RREG32(mmCP_HQD_IQ_TIMER);
 610                if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
 611                        pr_debug("HW is processing IQ\n");
 612                        goto loop;
 613                }
 614                if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
 615                        if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
 616                                        == 3) /* SEM-rearm is safe */
 617                                break;
 618                        /* Wait time 3 is safe for CP, but our MMIO read/write
 619                         * time is close to 1 microsecond, so check for 10 to
 620                         * leave more buffer room
 621                         */
 622                        if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
 623                                        >= 10)
 624                                break;
 625                        pr_debug("IQ timer is active\n");
 626                } else
 627                        break;
 628loop:
 629                if (!retry) {
 630                        pr_err("CP HQD IQ timer status time out\n");
 631                        break;
 632                }
 633                ndelay(100);
 634                --retry;
 635        }
 636        retry = 1000;
 637        while (true) {
 638                temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
 639                if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
 640                        break;
 641                pr_debug("Dequeue request is pending\n");
 642
 643                if (!retry) {
 644                        pr_err("CP HQD dequeue request time out\n");
 645                        break;
 646                }
 647                ndelay(100);
 648                --retry;
 649        }
 650        local_irq_restore(flags);
 651        preempt_enable();
 652
 653        WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
 654
 655        end_jiffies = (utimeout * HZ / 1000) + jiffies;
 656        while (true) {
 657                temp = RREG32(mmCP_HQD_ACTIVE);
 658                if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
 659                        break;
 660                if (time_after(jiffies, end_jiffies)) {
 661                        pr_err("cp queue preemption time out\n");
 662                        release_queue(kgd);
 663                        return -ETIME;
 664                }
 665                usleep_range(500, 1000);
 666        }
 667
 668        release_queue(kgd);
 669        return 0;
 670}
 671
 672static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 673                                unsigned int utimeout)
 674{
 675        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 676        struct cik_sdma_rlc_registers *m;
 677        uint32_t sdma_base_addr;
 678        uint32_t temp;
 679        unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
 680
 681        m = get_sdma_mqd(mqd);
 682        sdma_base_addr = get_sdma_base_addr(m);
 683
 684        temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
 685        temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
 686        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
 687
 688        while (true) {
 689                temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
 690                if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
 691                        break;
 692                if (time_after(jiffies, end_jiffies))
 693                        return -ETIME;
 694                usleep_range(500, 1000);
 695        }
 696
 697        WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
 698        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
 699                RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
 700                SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
 701
 702        m->sdma_rlc_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
 703
 704        return 0;
 705}
 706
 707static int kgd_address_watch_disable(struct kgd_dev *kgd)
 708{
 709        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 710        union TCP_WATCH_CNTL_BITS cntl;
 711        unsigned int i;
 712
 713        cntl.u32All = 0;
 714
 715        cntl.bitfields.valid = 0;
 716        cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
 717        cntl.bitfields.atc = 1;
 718
 719        /* Turning off this address until we set all the registers */
 720        for (i = 0; i < MAX_WATCH_ADDRESSES; i++)
 721                WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX +
 722                        ADDRESS_WATCH_REG_CNTL], cntl.u32All);
 723
 724        return 0;
 725}
 726
 727static int kgd_address_watch_execute(struct kgd_dev *kgd,
 728                                        unsigned int watch_point_id,
 729                                        uint32_t cntl_val,
 730                                        uint32_t addr_hi,
 731                                        uint32_t addr_lo)
 732{
 733        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 734        union TCP_WATCH_CNTL_BITS cntl;
 735
 736        cntl.u32All = cntl_val;
 737
 738        /* Turning off this watch point until we set all the registers */
 739        cntl.bitfields.valid = 0;
 740        WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 741                ADDRESS_WATCH_REG_CNTL], cntl.u32All);
 742
 743        WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 744                ADDRESS_WATCH_REG_ADDR_HI], addr_hi);
 745
 746        WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 747                ADDRESS_WATCH_REG_ADDR_LO], addr_lo);
 748
 749        /* Enable the watch point */
 750        cntl.bitfields.valid = 1;
 751
 752        WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 753                ADDRESS_WATCH_REG_CNTL], cntl.u32All);
 754
 755        return 0;
 756}
 757
 758static int kgd_wave_control_execute(struct kgd_dev *kgd,
 759                                        uint32_t gfx_index_val,
 760                                        uint32_t sq_cmd)
 761{
 762        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 763        uint32_t data;
 764
 765        mutex_lock(&adev->grbm_idx_mutex);
 766
 767        WREG32(mmGRBM_GFX_INDEX, gfx_index_val);
 768        WREG32(mmSQ_CMD, sq_cmd);
 769
 770        /*  Restore the GRBM_GFX_INDEX register  */
 771
 772        data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK |
 773                GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
 774                GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK;
 775
 776        WREG32(mmGRBM_GFX_INDEX, data);
 777
 778        mutex_unlock(&adev->grbm_idx_mutex);
 779
 780        return 0;
 781}
 782
 783static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 784                                        unsigned int watch_point_id,
 785                                        unsigned int reg_offset)
 786{
 787        return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset];
 788}
 789
 790static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
 791                                                        uint8_t vmid)
 792{
 793        uint32_t reg;
 794        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 795
 796        reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
 797        return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
 798}
 799
 800static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 801                                                                uint8_t vmid)
 802{
 803        uint32_t reg;
 804        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 805
 806        reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
 807        return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
 808}
 809
 810static void set_scratch_backing_va(struct kgd_dev *kgd,
 811                                        uint64_t va, uint32_t vmid)
 812{
 813        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 814
 815        lock_srbm(kgd, 0, 0, 0, vmid);
 816        WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
 817        unlock_srbm(kgd);
 818}
 819
 820static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 821{
 822        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 823        const union amdgpu_firmware_header *hdr;
 824
 825        switch (type) {
 826        case KGD_ENGINE_PFP:
 827                hdr = (const union amdgpu_firmware_header *)
 828                                                adev->gfx.pfp_fw->data;
 829                break;
 830
 831        case KGD_ENGINE_ME:
 832                hdr = (const union amdgpu_firmware_header *)
 833                                                adev->gfx.me_fw->data;
 834                break;
 835
 836        case KGD_ENGINE_CE:
 837                hdr = (const union amdgpu_firmware_header *)
 838                                                adev->gfx.ce_fw->data;
 839                break;
 840
 841        case KGD_ENGINE_MEC1:
 842                hdr = (const union amdgpu_firmware_header *)
 843                                                adev->gfx.mec_fw->data;
 844                break;
 845
 846        case KGD_ENGINE_MEC2:
 847                hdr = (const union amdgpu_firmware_header *)
 848                                                adev->gfx.mec2_fw->data;
 849                break;
 850
 851        case KGD_ENGINE_RLC:
 852                hdr = (const union amdgpu_firmware_header *)
 853                                                adev->gfx.rlc_fw->data;
 854                break;
 855
 856        case KGD_ENGINE_SDMA1:
 857                hdr = (const union amdgpu_firmware_header *)
 858                                                adev->sdma.instance[0].fw->data;
 859                break;
 860
 861        case KGD_ENGINE_SDMA2:
 862                hdr = (const union amdgpu_firmware_header *)
 863                                                adev->sdma.instance[1].fw->data;
 864                break;
 865
 866        default:
 867                return 0;
 868        }
 869
 870        if (hdr == NULL)
 871                return 0;
 872
 873        /* Only 12 bit in use*/
 874        return hdr->common.ucode_version;
 875}
 876
 877static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 878                        uint32_t page_table_base)
 879{
 880        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 881
 882        if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
 883                pr_err("trying to set page table base for wrong VMID\n");
 884                return;
 885        }
 886        WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
 887}
 888
 889static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
 890{
 891        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 892        int vmid;
 893        unsigned int tmp;
 894
 895        for (vmid = 0; vmid < 16; vmid++) {
 896                if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
 897                        continue;
 898
 899                tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
 900                if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
 901                        (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
 902                        WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
 903                        RREG32(mmVM_INVALIDATE_RESPONSE);
 904                        break;
 905                }
 906        }
 907
 908        return 0;
 909}
 910
 911static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
 912{
 913        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 914
 915        if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
 916                pr_err("non kfd vmid\n");
 917                return 0;
 918        }
 919
 920        WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
 921        RREG32(mmVM_INVALIDATE_RESPONSE);
 922        return 0;
 923}
 924