linux/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014-2018 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22
  23#define pr_fmt(fmt) "kfd2kgd: " fmt
  24
  25#include <linux/module.h>
  26#include <linux/fdtable.h>
  27#include <linux/uaccess.h>
  28#include <linux/firmware.h>
  29#include <drm/drmP.h>
  30#include "amdgpu.h"
  31#include "amdgpu_amdkfd.h"
  32#include "amdgpu_ucode.h"
  33#include "soc15_hw_ip.h"
  34#include "gc/gc_9_0_offset.h"
  35#include "gc/gc_9_0_sh_mask.h"
  36#include "vega10_enum.h"
  37#include "sdma0/sdma0_4_0_offset.h"
  38#include "sdma0/sdma0_4_0_sh_mask.h"
  39#include "sdma1/sdma1_4_0_offset.h"
  40#include "sdma1/sdma1_4_0_sh_mask.h"
  41#include "athub/athub_1_0_offset.h"
  42#include "athub/athub_1_0_sh_mask.h"
  43#include "oss/osssys_4_0_offset.h"
  44#include "oss/osssys_4_0_sh_mask.h"
  45#include "soc15_common.h"
  46#include "v9_structs.h"
  47#include "soc15.h"
  48#include "soc15d.h"
  49
  50/* HACK: MMHUB and GC both have VM-related register with the same
  51 * names but different offsets. Define the MMHUB register we need here
  52 * with a prefix. A proper solution would be to move the functions
  53 * programming these registers into gfx_v9_0.c and mmhub_v1_0.c
  54 * respectively.
  55 */
  56#define mmMMHUB_VM_INVALIDATE_ENG16_REQ                         0x06f3
  57#define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX                0
  58
  59#define mmMMHUB_VM_INVALIDATE_ENG16_ACK                         0x0705
  60#define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX                0
  61
  62#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32           0x072b
  63#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX  0
  64#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32           0x072c
  65#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX  0
  66
  67#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32          0x074b
  68#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX 0
  69#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32          0x074c
  70#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX 0
  71
  72#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32            0x076b
  73#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX   0
  74#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32            0x076c
  75#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX   0
  76
  77#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32             0x0727
  78#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX    0
  79#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32             0x0728
  80#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX    0
  81
  82#define V9_PIPE_PER_MEC         (4)
  83#define V9_QUEUES_PER_PIPE_MEC  (8)
  84
  85enum hqd_dequeue_request_type {
  86        NO_ACTION = 0,
  87        DRAIN_PIPE,
  88        RESET_WAVES
  89};
  90
  91/*
  92 * Register access functions
  93 */
  94
  95static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
  96                uint32_t sh_mem_config,
  97                uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
  98                uint32_t sh_mem_bases);
  99static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 100                unsigned int vmid);
 101static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
 102static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 103                        uint32_t queue_id, uint32_t __user *wptr,
 104                        uint32_t wptr_shift, uint32_t wptr_mask,
 105                        struct mm_struct *mm);
 106static int kgd_hqd_dump(struct kgd_dev *kgd,
 107                        uint32_t pipe_id, uint32_t queue_id,
 108                        uint32_t (**dump)[2], uint32_t *n_regs);
 109static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
 110                             uint32_t __user *wptr, struct mm_struct *mm);
 111static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
 112                             uint32_t engine_id, uint32_t queue_id,
 113                             uint32_t (**dump)[2], uint32_t *n_regs);
 114static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
 115                uint32_t pipe_id, uint32_t queue_id);
 116static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
 117static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 118                                enum kfd_preempt_type reset_type,
 119                                unsigned int utimeout, uint32_t pipe_id,
 120                                uint32_t queue_id);
 121static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 122                                unsigned int utimeout);
 123static int kgd_address_watch_disable(struct kgd_dev *kgd);
 124static int kgd_address_watch_execute(struct kgd_dev *kgd,
 125                                        unsigned int watch_point_id,
 126                                        uint32_t cntl_val,
 127                                        uint32_t addr_hi,
 128                                        uint32_t addr_lo);
 129static int kgd_wave_control_execute(struct kgd_dev *kgd,
 130                                        uint32_t gfx_index_val,
 131                                        uint32_t sq_cmd);
 132static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 133                                        unsigned int watch_point_id,
 134                                        unsigned int reg_offset);
 135
 136static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
 137                uint8_t vmid);
 138static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 139                uint8_t vmid);
 140static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 141                uint32_t page_table_base);
 142static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
 143static void set_scratch_backing_va(struct kgd_dev *kgd,
 144                                        uint64_t va, uint32_t vmid);
 145static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
 146static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
 147
 148/* Because of REG_GET_FIELD() being used, we put this function in the
 149 * asic specific file.
 150 */
 151static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
 152                struct tile_config *config)
 153{
 154        struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 155
 156        config->gb_addr_config = adev->gfx.config.gb_addr_config;
 157
 158        config->tile_config_ptr = adev->gfx.config.tile_mode_array;
 159        config->num_tile_configs =
 160                        ARRAY_SIZE(adev->gfx.config.tile_mode_array);
 161        config->macro_tile_config_ptr =
 162                        adev->gfx.config.macrotile_mode_array;
 163        config->num_macro_tile_configs =
 164                        ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
 165
 166        return 0;
 167}
 168
 169static const struct kfd2kgd_calls kfd2kgd = {
 170        .init_gtt_mem_allocation = alloc_gtt_mem,
 171        .free_gtt_mem = free_gtt_mem,
 172        .get_local_mem_info = get_local_mem_info,
 173        .get_gpu_clock_counter = get_gpu_clock_counter,
 174        .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
 175        .alloc_pasid = amdgpu_pasid_alloc,
 176        .free_pasid = amdgpu_pasid_free,
 177        .program_sh_mem_settings = kgd_program_sh_mem_settings,
 178        .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
 179        .init_interrupts = kgd_init_interrupts,
 180        .hqd_load = kgd_hqd_load,
 181        .hqd_sdma_load = kgd_hqd_sdma_load,
 182        .hqd_dump = kgd_hqd_dump,
 183        .hqd_sdma_dump = kgd_hqd_sdma_dump,
 184        .hqd_is_occupied = kgd_hqd_is_occupied,
 185        .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
 186        .hqd_destroy = kgd_hqd_destroy,
 187        .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
 188        .address_watch_disable = kgd_address_watch_disable,
 189        .address_watch_execute = kgd_address_watch_execute,
 190        .wave_control_execute = kgd_wave_control_execute,
 191        .address_watch_get_offset = kgd_address_watch_get_offset,
 192        .get_atc_vmid_pasid_mapping_pasid =
 193                        get_atc_vmid_pasid_mapping_pasid,
 194        .get_atc_vmid_pasid_mapping_valid =
 195                        get_atc_vmid_pasid_mapping_valid,
 196        .get_fw_version = get_fw_version,
 197        .set_scratch_backing_va = set_scratch_backing_va,
 198        .get_tile_config = amdgpu_amdkfd_get_tile_config,
 199        .get_cu_info = get_cu_info,
 200        .get_vram_usage = amdgpu_amdkfd_get_vram_usage,
 201        .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
 202        .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
 203        .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
 204        .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
 205        .set_vm_context_page_table_base = set_vm_context_page_table_base,
 206        .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
 207        .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
 208        .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
 209        .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
 210        .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
 211        .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
 212        .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
 213        .invalidate_tlbs = invalidate_tlbs,
 214        .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
 215        .submit_ib = amdgpu_amdkfd_submit_ib,
 216        .gpu_recover = amdgpu_amdkfd_gpu_reset,
 217        .set_compute_idle = amdgpu_amdkfd_set_compute_idle
 218};
 219
 220struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
 221{
 222        return (struct kfd2kgd_calls *)&kfd2kgd;
 223}
 224
 225static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
 226{
 227        return (struct amdgpu_device *)kgd;
 228}
 229
 230static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
 231                        uint32_t queue, uint32_t vmid)
 232{
 233        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 234
 235        mutex_lock(&adev->srbm_mutex);
 236        soc15_grbm_select(adev, mec, pipe, queue, vmid);
 237}
 238
 239static void unlock_srbm(struct kgd_dev *kgd)
 240{
 241        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 242
 243        soc15_grbm_select(adev, 0, 0, 0, 0);
 244        mutex_unlock(&adev->srbm_mutex);
 245}
 246
 247static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
 248                                uint32_t queue_id)
 249{
 250        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 251
 252        uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
 253        uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 254
 255        lock_srbm(kgd, mec, pipe, queue_id, 0);
 256}
 257
 258static uint32_t get_queue_mask(struct amdgpu_device *adev,
 259                               uint32_t pipe_id, uint32_t queue_id)
 260{
 261        unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe +
 262                            queue_id) & 31;
 263
 264        return ((uint32_t)1) << bit;
 265}
 266
 267static void release_queue(struct kgd_dev *kgd)
 268{
 269        unlock_srbm(kgd);
 270}
 271
 272static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
 273                                        uint32_t sh_mem_config,
 274                                        uint32_t sh_mem_ape1_base,
 275                                        uint32_t sh_mem_ape1_limit,
 276                                        uint32_t sh_mem_bases)
 277{
 278        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 279
 280        lock_srbm(kgd, 0, 0, 0, vmid);
 281
 282        WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
 283        WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
 284        /* APE1 no longer exists on GFX9 */
 285
 286        unlock_srbm(kgd);
 287}
 288
 289static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 290                                        unsigned int vmid)
 291{
 292        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 293
 294        /*
 295         * We have to assume that there is no outstanding mapping.
 296         * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
 297         * a mapping is in progress or because a mapping finished
 298         * and the SW cleared it.
 299         * So the protocol is to always wait & clear.
 300         */
 301        uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
 302                        ATC_VMID0_PASID_MAPPING__VALID_MASK;
 303
 304        /*
 305         * need to do this twice, once for gfx and once for mmhub
 306         * for ATC add 16 to VMID for mmhub, for IH different registers.
 307         * ATC_VMID0..15 registers are separate from ATC_VMID16..31.
 308         */
 309
 310        WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
 311               pasid_mapping);
 312
 313        while (!(RREG32(SOC15_REG_OFFSET(
 314                                ATHUB, 0,
 315                                mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
 316                 (1U << vmid)))
 317                cpu_relax();
 318
 319        WREG32(SOC15_REG_OFFSET(ATHUB, 0,
 320                                mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
 321               1U << vmid);
 322
 323        /* Mapping vmid to pasid also for IH block */
 324        WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid,
 325               pasid_mapping);
 326
 327        WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid,
 328               pasid_mapping);
 329
 330        while (!(RREG32(SOC15_REG_OFFSET(
 331                                ATHUB, 0,
 332                                mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
 333                 (1U << (vmid + 16))))
 334                cpu_relax();
 335
 336        WREG32(SOC15_REG_OFFSET(ATHUB, 0,
 337                                mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
 338               1U << (vmid + 16));
 339
 340        /* Mapping vmid to pasid also for IH block */
 341        WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid,
 342               pasid_mapping);
 343        return 0;
 344}
 345
 346/* TODO - RING0 form of field is obsolete, seems to date back to SI
 347 * but still works
 348 */
 349
 350static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 351{
 352        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 353        uint32_t mec;
 354        uint32_t pipe;
 355
 356        mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
 357        pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 358
 359        lock_srbm(kgd, mec, pipe, 0, 0);
 360
 361        WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
 362                CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
 363                CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
 364
 365        unlock_srbm(kgd);
 366
 367        return 0;
 368}
 369
 370static uint32_t get_sdma_base_addr(struct amdgpu_device *adev,
 371                                unsigned int engine_id,
 372                                unsigned int queue_id)
 373{
 374        uint32_t base[2] = {
 375                SOC15_REG_OFFSET(SDMA0, 0,
 376                                 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
 377                SOC15_REG_OFFSET(SDMA1, 0,
 378                                 mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL
 379        };
 380        uint32_t retval;
 381
 382        retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL -
 383                                               mmSDMA0_RLC0_RB_CNTL);
 384
 385        pr_debug("sdma base address: 0x%x\n", retval);
 386
 387        return retval;
 388}
 389
 390static inline struct v9_mqd *get_mqd(void *mqd)
 391{
 392        return (struct v9_mqd *)mqd;
 393}
 394
 395static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
 396{
 397        return (struct v9_sdma_mqd *)mqd;
 398}
 399
 400static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 401                        uint32_t queue_id, uint32_t __user *wptr,
 402                        uint32_t wptr_shift, uint32_t wptr_mask,
 403                        struct mm_struct *mm)
 404{
 405        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 406        struct v9_mqd *m;
 407        uint32_t *mqd_hqd;
 408        uint32_t reg, hqd_base, data;
 409
 410        m = get_mqd(mqd);
 411
 412        acquire_queue(kgd, pipe_id, queue_id);
 413
 414        /* HIQ is set during driver init period with vmid set to 0*/
 415        if (m->cp_hqd_vmid == 0) {
 416                uint32_t value, mec, pipe;
 417
 418                mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
 419                pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 420
 421                pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
 422                        mec, pipe, queue_id);
 423                value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
 424                value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
 425                        ((mec << 5) | (pipe << 3) | queue_id | 0x80));
 426                WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
 427        }
 428
 429        /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
 430        mqd_hqd = &m->cp_mqd_base_addr_lo;
 431        hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
 432
 433        for (reg = hqd_base;
 434             reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
 435                WREG32(reg, mqd_hqd[reg - hqd_base]);
 436
 437
 438        /* Activate doorbell logic before triggering WPTR poll. */
 439        data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
 440                             CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
 441        WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
 442
 443        if (wptr) {
 444                /* Don't read wptr with get_user because the user
 445                 * context may not be accessible (if this function
 446                 * runs in a work queue). Instead trigger a one-shot
 447                 * polling read from memory in the CP. This assumes
 448                 * that wptr is GPU-accessible in the queue's VMID via
 449                 * ATC or SVM. WPTR==RPTR before starting the poll so
 450                 * the CP starts fetching new commands from the right
 451                 * place.
 452                 *
 453                 * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
 454                 * tricky. Assume that the queue didn't overflow. The
 455                 * number of valid bits in the 32-bit RPTR depends on
 456                 * the queue size. The remaining bits are taken from
 457                 * the saved 64-bit WPTR. If the WPTR wrapped, add the
 458                 * queue size.
 459                 */
 460                uint32_t queue_size =
 461                        2 << REG_GET_FIELD(m->cp_hqd_pq_control,
 462                                           CP_HQD_PQ_CONTROL, QUEUE_SIZE);
 463                uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
 464
 465                if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
 466                        guessed_wptr += queue_size;
 467                guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
 468                guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
 469
 470                WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
 471                       lower_32_bits(guessed_wptr));
 472                WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
 473                       upper_32_bits(guessed_wptr));
 474                WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
 475                       lower_32_bits((uintptr_t)wptr));
 476                WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
 477                       upper_32_bits((uintptr_t)wptr));
 478                WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
 479                       get_queue_mask(adev, pipe_id, queue_id));
 480        }
 481
 482        /* Start the EOP fetcher */
 483        WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
 484               REG_SET_FIELD(m->cp_hqd_eop_rptr,
 485                             CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
 486
 487        data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
 488        WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
 489
 490        release_queue(kgd);
 491
 492        return 0;
 493}
 494
 495static int kgd_hqd_dump(struct kgd_dev *kgd,
 496                        uint32_t pipe_id, uint32_t queue_id,
 497                        uint32_t (**dump)[2], uint32_t *n_regs)
 498{
 499        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 500        uint32_t i = 0, reg;
 501#define HQD_N_REGS 56
 502#define DUMP_REG(addr) do {                             \
 503                if (WARN_ON_ONCE(i >= HQD_N_REGS))      \
 504                        break;                          \
 505                (*dump)[i][0] = (addr) << 2;            \
 506                (*dump)[i++][1] = RREG32(addr);         \
 507        } while (0)
 508
 509        *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
 510        if (*dump == NULL)
 511                return -ENOMEM;
 512
 513        acquire_queue(kgd, pipe_id, queue_id);
 514
 515        for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
 516             reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
 517                DUMP_REG(reg);
 518
 519        release_queue(kgd);
 520
 521        WARN_ON_ONCE(i != HQD_N_REGS);
 522        *n_regs = i;
 523
 524        return 0;
 525}
 526
 527static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
 528                             uint32_t __user *wptr, struct mm_struct *mm)
 529{
 530        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 531        struct v9_sdma_mqd *m;
 532        uint32_t sdma_base_addr, sdmax_gfx_context_cntl;
 533        unsigned long end_jiffies;
 534        uint32_t data;
 535        uint64_t data64;
 536        uint64_t __user *wptr64 = (uint64_t __user *)wptr;
 537
 538        m = get_sdma_mqd(mqd);
 539        sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
 540                                            m->sdma_queue_id);
 541        sdmax_gfx_context_cntl = m->sdma_engine_id ?
 542                SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) :
 543                SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL);
 544
 545        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
 546                m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
 547
 548        end_jiffies = msecs_to_jiffies(2000) + jiffies;
 549        while (true) {
 550                data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
 551                if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
 552                        break;
 553                if (time_after(jiffies, end_jiffies))
 554                        return -ETIME;
 555                usleep_range(500, 1000);
 556        }
 557        data = RREG32(sdmax_gfx_context_cntl);
 558        data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
 559                             RESUME_CTX, 0);
 560        WREG32(sdmax_gfx_context_cntl, data);
 561
 562        WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
 563               m->sdmax_rlcx_doorbell_offset);
 564
 565        data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
 566                             ENABLE, 1);
 567        WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
 568        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
 569        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI,
 570                                m->sdmax_rlcx_rb_rptr_hi);
 571
 572        WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
 573        if (read_user_wptr(mm, wptr64, data64)) {
 574                WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
 575                       lower_32_bits(data64));
 576                WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
 577                       upper_32_bits(data64));
 578        } else {
 579                WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
 580                       m->sdmax_rlcx_rb_rptr);
 581                WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
 582                       m->sdmax_rlcx_rb_rptr_hi);
 583        }
 584        WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
 585
 586        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
 587        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
 588                        m->sdmax_rlcx_rb_base_hi);
 589        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
 590                        m->sdmax_rlcx_rb_rptr_addr_lo);
 591        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
 592                        m->sdmax_rlcx_rb_rptr_addr_hi);
 593
 594        data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
 595                             RB_ENABLE, 1);
 596        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
 597
 598        return 0;
 599}
 600
 601static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
 602                             uint32_t engine_id, uint32_t queue_id,
 603                             uint32_t (**dump)[2], uint32_t *n_regs)
 604{
 605        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 606        uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id);
 607        uint32_t i = 0, reg;
 608#undef HQD_N_REGS
 609#define HQD_N_REGS (19+6+7+10)
 610
 611        *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
 612        if (*dump == NULL)
 613                return -ENOMEM;
 614
 615        for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
 616                DUMP_REG(sdma_base_addr + reg);
 617        for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
 618                DUMP_REG(sdma_base_addr + reg);
 619        for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
 620             reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
 621                DUMP_REG(sdma_base_addr + reg);
 622        for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
 623             reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
 624                DUMP_REG(sdma_base_addr + reg);
 625
 626        WARN_ON_ONCE(i != HQD_N_REGS);
 627        *n_regs = i;
 628
 629        return 0;
 630}
 631
 632static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
 633                                uint32_t pipe_id, uint32_t queue_id)
 634{
 635        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 636        uint32_t act;
 637        bool retval = false;
 638        uint32_t low, high;
 639
 640        acquire_queue(kgd, pipe_id, queue_id);
 641        act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
 642        if (act) {
 643                low = lower_32_bits(queue_address >> 8);
 644                high = upper_32_bits(queue_address >> 8);
 645
 646                if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
 647                   high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
 648                        retval = true;
 649        }
 650        release_queue(kgd);
 651        return retval;
 652}
 653
 654static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
 655{
 656        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 657        struct v9_sdma_mqd *m;
 658        uint32_t sdma_base_addr;
 659        uint32_t sdma_rlc_rb_cntl;
 660
 661        m = get_sdma_mqd(mqd);
 662        sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
 663                                            m->sdma_queue_id);
 664
 665        sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
 666
 667        if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
 668                return true;
 669
 670        return false;
 671}
 672
 673static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 674                                enum kfd_preempt_type reset_type,
 675                                unsigned int utimeout, uint32_t pipe_id,
 676                                uint32_t queue_id)
 677{
 678        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 679        enum hqd_dequeue_request_type type;
 680        unsigned long end_jiffies;
 681        uint32_t temp;
 682        struct v9_mqd *m = get_mqd(mqd);
 683
 684        if (adev->in_gpu_reset)
 685                return -EIO;
 686
 687        acquire_queue(kgd, pipe_id, queue_id);
 688
 689        if (m->cp_hqd_vmid == 0)
 690                WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
 691
 692        switch (reset_type) {
 693        case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
 694                type = DRAIN_PIPE;
 695                break;
 696        case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
 697                type = RESET_WAVES;
 698                break;
 699        default:
 700                type = DRAIN_PIPE;
 701                break;
 702        }
 703
 704        WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
 705
 706        end_jiffies = (utimeout * HZ / 1000) + jiffies;
 707        while (true) {
 708                temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
 709                if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
 710                        break;
 711                if (time_after(jiffies, end_jiffies)) {
 712                        pr_err("cp queue preemption time out.\n");
 713                        release_queue(kgd);
 714                        return -ETIME;
 715                }
 716                usleep_range(500, 1000);
 717        }
 718
 719        release_queue(kgd);
 720        return 0;
 721}
 722
 723static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 724                                unsigned int utimeout)
 725{
 726        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 727        struct v9_sdma_mqd *m;
 728        uint32_t sdma_base_addr;
 729        uint32_t temp;
 730        unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
 731
 732        m = get_sdma_mqd(mqd);
 733        sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
 734                                            m->sdma_queue_id);
 735
 736        temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
 737        temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
 738        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
 739
 740        while (true) {
 741                temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
 742                if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
 743                        break;
 744                if (time_after(jiffies, end_jiffies))
 745                        return -ETIME;
 746                usleep_range(500, 1000);
 747        }
 748
 749        WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
 750        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
 751                RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
 752                SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
 753
 754        m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
 755        m->sdmax_rlcx_rb_rptr_hi =
 756                RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI);
 757
 758        return 0;
 759}
 760
 761static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
 762                                                        uint8_t vmid)
 763{
 764        uint32_t reg;
 765        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 766
 767        reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
 768                     + vmid);
 769        return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
 770}
 771
 772static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 773                                                                uint8_t vmid)
 774{
 775        uint32_t reg;
 776        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 777
 778        reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
 779                     + vmid);
 780        return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
 781}
 782
 783static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
 784{
 785        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 786        uint32_t req = (1 << vmid) |
 787                (0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */
 788                VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK |
 789                VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK |
 790                VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK |
 791                VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK |
 792                VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK;
 793
 794        mutex_lock(&adev->srbm_mutex);
 795
 796        /* Use legacy mode tlb invalidation.
 797         *
 798         * Currently on Raven the code below is broken for anything but
 799         * legacy mode due to a MMHUB power gating problem. A workaround
 800         * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
 801         * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
 802         * bit.
 803         *
 804         * TODO 1: agree on the right set of invalidation registers for
 805         * KFD use. Use the last one for now. Invalidate both GC and
 806         * MMHUB.
 807         *
 808         * TODO 2: support range-based invalidation, requires kfg2kgd
 809         * interface change
 810         */
 811        WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
 812                                0xffffffff);
 813        WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
 814                                0x0000001f);
 815
 816        WREG32(SOC15_REG_OFFSET(MMHUB, 0,
 817                                mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
 818                                0xffffffff);
 819        WREG32(SOC15_REG_OFFSET(MMHUB, 0,
 820                                mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
 821                                0x0000001f);
 822
 823        WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req);
 824
 825        WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ),
 826                                req);
 827
 828        while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) &
 829                                        (1 << vmid)))
 830                cpu_relax();
 831
 832        while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0,
 833                                        mmMMHUB_VM_INVALIDATE_ENG16_ACK)) &
 834                                        (1 << vmid)))
 835                cpu_relax();
 836
 837        mutex_unlock(&adev->srbm_mutex);
 838
 839}
 840
 841static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
 842{
 843        signed long r;
 844        uint32_t seq;
 845        struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
 846
 847        spin_lock(&adev->gfx.kiq.ring_lock);
 848        amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
 849        amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
 850        amdgpu_ring_write(ring,
 851                        PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
 852                        PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
 853                        PACKET3_INVALIDATE_TLBS_PASID(pasid) |
 854                        PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */
 855        amdgpu_fence_emit_polling(ring, &seq);
 856        amdgpu_ring_commit(ring);
 857        spin_unlock(&adev->gfx.kiq.ring_lock);
 858
 859        r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
 860        if (r < 1) {
 861                DRM_ERROR("wait for kiq fence error: %ld.\n", r);
 862                return -ETIME;
 863        }
 864
 865        return 0;
 866}
 867
 868static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
 869{
 870        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 871        int vmid;
 872        struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
 873
 874        if (adev->in_gpu_reset)
 875                return -EIO;
 876
 877        if (ring->ready)
 878                return invalidate_tlbs_with_kiq(adev, pasid);
 879
 880        for (vmid = 0; vmid < 16; vmid++) {
 881                if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
 882                        continue;
 883                if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
 884                        if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
 885                                == pasid) {
 886                                write_vmid_invalidate_request(kgd, vmid);
 887                                break;
 888                        }
 889                }
 890        }
 891
 892        return 0;
 893}
 894
 895static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
 896{
 897        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 898
 899        if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
 900                pr_err("non kfd vmid %d\n", vmid);
 901                return 0;
 902        }
 903
 904        write_vmid_invalidate_request(kgd, vmid);
 905        return 0;
 906}
 907
 908static int kgd_address_watch_disable(struct kgd_dev *kgd)
 909{
 910        return 0;
 911}
 912
 913static int kgd_address_watch_execute(struct kgd_dev *kgd,
 914                                        unsigned int watch_point_id,
 915                                        uint32_t cntl_val,
 916                                        uint32_t addr_hi,
 917                                        uint32_t addr_lo)
 918{
 919        return 0;
 920}
 921
 922static int kgd_wave_control_execute(struct kgd_dev *kgd,
 923                                        uint32_t gfx_index_val,
 924                                        uint32_t sq_cmd)
 925{
 926        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 927        uint32_t data = 0;
 928
 929        mutex_lock(&adev->grbm_idx_mutex);
 930
 931        WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val);
 932        WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
 933
 934        data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
 935                INSTANCE_BROADCAST_WRITES, 1);
 936        data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
 937                SH_BROADCAST_WRITES, 1);
 938        data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
 939                SE_BROADCAST_WRITES, 1);
 940
 941        WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
 942        mutex_unlock(&adev->grbm_idx_mutex);
 943
 944        return 0;
 945}
 946
 947static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 948                                        unsigned int watch_point_id,
 949                                        unsigned int reg_offset)
 950{
 951        return 0;
 952}
 953
 954static void set_scratch_backing_va(struct kgd_dev *kgd,
 955                                        uint64_t va, uint32_t vmid)
 956{
 957        /* No longer needed on GFXv9. The scratch base address is
 958         * passed to the shader by the CP. It's the user mode driver's
 959         * responsibility.
 960         */
 961}
 962
 963/* FIXME: Does this need to be ASIC-specific code? */
 964static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 965{
 966        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 967        const union amdgpu_firmware_header *hdr;
 968
 969        switch (type) {
 970        case KGD_ENGINE_PFP:
 971                hdr = (const union amdgpu_firmware_header *)adev->gfx.pfp_fw->data;
 972                break;
 973
 974        case KGD_ENGINE_ME:
 975                hdr = (const union amdgpu_firmware_header *)adev->gfx.me_fw->data;
 976                break;
 977
 978        case KGD_ENGINE_CE:
 979                hdr = (const union amdgpu_firmware_header *)adev->gfx.ce_fw->data;
 980                break;
 981
 982        case KGD_ENGINE_MEC1:
 983                hdr = (const union amdgpu_firmware_header *)adev->gfx.mec_fw->data;
 984                break;
 985
 986        case KGD_ENGINE_MEC2:
 987                hdr = (const union amdgpu_firmware_header *)adev->gfx.mec2_fw->data;
 988                break;
 989
 990        case KGD_ENGINE_RLC:
 991                hdr = (const union amdgpu_firmware_header *)adev->gfx.rlc_fw->data;
 992                break;
 993
 994        case KGD_ENGINE_SDMA1:
 995                hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[0].fw->data;
 996                break;
 997
 998        case KGD_ENGINE_SDMA2:
 999                hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[1].fw->data;
1000                break;
1001
1002        default:
1003                return 0;
1004        }
1005
1006        if (hdr == NULL)
1007                return 0;
1008
1009        /* Only 12 bit in use*/
1010        return hdr->common.ucode_version;
1011}
1012
1013static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
1014                uint32_t page_table_base)
1015{
1016        struct amdgpu_device *adev = get_amdgpu_device(kgd);
1017        uint64_t base = (uint64_t)page_table_base << PAGE_SHIFT |
1018                AMDGPU_PTE_VALID;
1019
1020        if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
1021                pr_err("trying to set page table base for wrong VMID %u\n",
1022                       vmid);
1023                return;
1024        }
1025
1026        /* TODO: take advantage of per-process address space size. For
1027         * now, all processes share the same address space size, like
1028         * on GFX8 and older.
1029         */
1030        WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
1031        WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
1032
1033        WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
1034                        lower_32_bits(adev->vm_manager.max_pfn - 1));
1035        WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
1036                        upper_32_bits(adev->vm_manager.max_pfn - 1));
1037
1038        WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
1039        WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
1040
1041        WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
1042        WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
1043
1044        WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
1045                        lower_32_bits(adev->vm_manager.max_pfn - 1));
1046        WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
1047                        upper_32_bits(adev->vm_manager.max_pfn - 1));
1048
1049        WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
1050        WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
1051}
1052