linux/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22
  23#include <linux/fdtable.h>
  24#include <linux/uaccess.h>
  25#include <linux/firmware.h>
  26#include <drm/drmP.h>
  27#include "amdgpu.h"
  28#include "amdgpu_amdkfd.h"
  29#include "cikd.h"
  30#include "cik_sdma.h"
  31#include "amdgpu_ucode.h"
  32#include "gca/gfx_7_2_d.h"
  33#include "gca/gfx_7_2_enum.h"
  34#include "gca/gfx_7_2_sh_mask.h"
  35#include "oss/oss_2_0_d.h"
  36#include "oss/oss_2_0_sh_mask.h"
  37#include "gmc/gmc_7_1_d.h"
  38#include "gmc/gmc_7_1_sh_mask.h"
  39#include "cik_structs.h"
  40
  41#define CIK_PIPE_PER_MEC        (4)
  42
  43enum {
  44        MAX_TRAPID = 8,         /* 3 bits in the bitfield. */
  45        MAX_WATCH_ADDRESSES = 4
  46};
  47
  48enum {
  49        ADDRESS_WATCH_REG_ADDR_HI = 0,
  50        ADDRESS_WATCH_REG_ADDR_LO,
  51        ADDRESS_WATCH_REG_CNTL,
  52        ADDRESS_WATCH_REG_MAX
  53};
  54
  55/*  not defined in the CI/KV reg file  */
  56enum {
  57        ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL,
  58        ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF,
  59        ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000,
  60        /* extend the mask to 26 bits to match the low address field */
  61        ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6,
  62        ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF
  63};
  64
  65static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = {
  66        mmTCP_WATCH0_ADDR_H, mmTCP_WATCH0_ADDR_L, mmTCP_WATCH0_CNTL,
  67        mmTCP_WATCH1_ADDR_H, mmTCP_WATCH1_ADDR_L, mmTCP_WATCH1_CNTL,
  68        mmTCP_WATCH2_ADDR_H, mmTCP_WATCH2_ADDR_L, mmTCP_WATCH2_CNTL,
  69        mmTCP_WATCH3_ADDR_H, mmTCP_WATCH3_ADDR_L, mmTCP_WATCH3_CNTL
  70};
  71
  72union TCP_WATCH_CNTL_BITS {
  73        struct {
  74                uint32_t mask:24;
  75                uint32_t vmid:4;
  76                uint32_t atc:1;
  77                uint32_t mode:2;
  78                uint32_t valid:1;
  79        } bitfields, bits;
  80        uint32_t u32All;
  81        signed int i32All;
  82        float f32All;
  83};
  84
  85/*
  86 * Register access functions
  87 */
  88
  89static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
  90                uint32_t sh_mem_config, uint32_t sh_mem_ape1_base,
  91                uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
  92
  93static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
  94                                        unsigned int vmid);
  95
  96static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
  97                                uint32_t hpd_size, uint64_t hpd_gpu_addr);
  98static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
  99static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 100                        uint32_t queue_id, uint32_t __user *wptr);
 101static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
 102static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
 103                                uint32_t pipe_id, uint32_t queue_id);
 104
 105static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
 106                                unsigned int timeout, uint32_t pipe_id,
 107                                uint32_t queue_id);
 108static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
 109static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 110                                unsigned int timeout);
 111static int kgd_address_watch_disable(struct kgd_dev *kgd);
 112static int kgd_address_watch_execute(struct kgd_dev *kgd,
 113                                        unsigned int watch_point_id,
 114                                        uint32_t cntl_val,
 115                                        uint32_t addr_hi,
 116                                        uint32_t addr_lo);
 117static int kgd_wave_control_execute(struct kgd_dev *kgd,
 118                                        uint32_t gfx_index_val,
 119                                        uint32_t sq_cmd);
 120static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 121                                        unsigned int watch_point_id,
 122                                        unsigned int reg_offset);
 123
 124static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
 125static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 126                                                        uint8_t vmid);
 127static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
 128
 129static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
 130
 131static const struct kfd2kgd_calls kfd2kgd = {
 132        .init_gtt_mem_allocation = alloc_gtt_mem,
 133        .free_gtt_mem = free_gtt_mem,
 134        .get_vmem_size = get_vmem_size,
 135        .get_gpu_clock_counter = get_gpu_clock_counter,
 136        .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
 137        .program_sh_mem_settings = kgd_program_sh_mem_settings,
 138        .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
 139        .init_pipeline = kgd_init_pipeline,
 140        .init_interrupts = kgd_init_interrupts,
 141        .hqd_load = kgd_hqd_load,
 142        .hqd_sdma_load = kgd_hqd_sdma_load,
 143        .hqd_is_occupied = kgd_hqd_is_occupied,
 144        .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
 145        .hqd_destroy = kgd_hqd_destroy,
 146        .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
 147        .address_watch_disable = kgd_address_watch_disable,
 148        .address_watch_execute = kgd_address_watch_execute,
 149        .wave_control_execute = kgd_wave_control_execute,
 150        .address_watch_get_offset = kgd_address_watch_get_offset,
 151        .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
 152        .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
 153        .write_vmid_invalidate_request = write_vmid_invalidate_request,
 154        .get_fw_version = get_fw_version
 155};
 156
 157struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions()
 158{
 159        return (struct kfd2kgd_calls *)&kfd2kgd;
 160}
 161
 162static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
 163{
 164        return (struct amdgpu_device *)kgd;
 165}
 166
 167static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
 168                        uint32_t queue, uint32_t vmid)
 169{
 170        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 171        uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
 172
 173        mutex_lock(&adev->srbm_mutex);
 174        WREG32(mmSRBM_GFX_CNTL, value);
 175}
 176
 177static void unlock_srbm(struct kgd_dev *kgd)
 178{
 179        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 180
 181        WREG32(mmSRBM_GFX_CNTL, 0);
 182        mutex_unlock(&adev->srbm_mutex);
 183}
 184
 185static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
 186                                uint32_t queue_id)
 187{
 188        uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
 189        uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
 190
 191        lock_srbm(kgd, mec, pipe, queue_id, 0);
 192}
 193
 194static void release_queue(struct kgd_dev *kgd)
 195{
 196        unlock_srbm(kgd);
 197}
 198
 199static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
 200                                        uint32_t sh_mem_config,
 201                                        uint32_t sh_mem_ape1_base,
 202                                        uint32_t sh_mem_ape1_limit,
 203                                        uint32_t sh_mem_bases)
 204{
 205        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 206
 207        lock_srbm(kgd, 0, 0, 0, vmid);
 208
 209        WREG32(mmSH_MEM_CONFIG, sh_mem_config);
 210        WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base);
 211        WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
 212        WREG32(mmSH_MEM_BASES, sh_mem_bases);
 213
 214        unlock_srbm(kgd);
 215}
 216
 217static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 218                                        unsigned int vmid)
 219{
 220        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 221
 222        /*
 223         * We have to assume that there is no outstanding mapping.
 224         * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
 225         * a mapping is in progress or because a mapping finished and the
 226         * SW cleared it. So the protocol is to always wait & clear.
 227         */
 228        uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
 229                        ATC_VMID0_PASID_MAPPING__VALID_MASK;
 230
 231        WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping);
 232
 233        while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
 234                cpu_relax();
 235        WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
 236
 237        /* Mapping vmid to pasid also for IH block */
 238        WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
 239
 240        return 0;
 241}
 242
 243static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 244                                uint32_t hpd_size, uint64_t hpd_gpu_addr)
 245{
 246        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 247
 248        uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
 249        uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
 250
 251        lock_srbm(kgd, mec, pipe, 0, 0);
 252        WREG32(mmCP_HPD_EOP_BASE_ADDR, lower_32_bits(hpd_gpu_addr >> 8));
 253        WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(hpd_gpu_addr >> 8));
 254        WREG32(mmCP_HPD_EOP_VMID, 0);
 255        WREG32(mmCP_HPD_EOP_CONTROL, hpd_size);
 256        unlock_srbm(kgd);
 257
 258        return 0;
 259}
 260
 261static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 262{
 263        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 264        uint32_t mec;
 265        uint32_t pipe;
 266
 267        mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
 268        pipe = (pipe_id % CIK_PIPE_PER_MEC);
 269
 270        lock_srbm(kgd, mec, pipe, 0, 0);
 271
 272        WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
 273                        CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
 274
 275        unlock_srbm(kgd);
 276
 277        return 0;
 278}
 279
 280static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
 281{
 282        uint32_t retval;
 283
 284        retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
 285                        m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
 286
 287        pr_debug("kfd: sdma base address: 0x%x\n", retval);
 288
 289        return retval;
 290}
 291
 292static inline struct cik_mqd *get_mqd(void *mqd)
 293{
 294        return (struct cik_mqd *)mqd;
 295}
 296
 297static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
 298{
 299        return (struct cik_sdma_rlc_registers *)mqd;
 300}
 301
 302static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 303                        uint32_t queue_id, uint32_t __user *wptr)
 304{
 305        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 306        uint32_t wptr_shadow, is_wptr_shadow_valid;
 307        struct cik_mqd *m;
 308
 309        m = get_mqd(mqd);
 310
 311        is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
 312
 313        acquire_queue(kgd, pipe_id, queue_id);
 314        WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
 315        WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
 316        WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
 317
 318        WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
 319        WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
 320        WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
 321
 322        WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
 323        WREG32(mmCP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo);
 324        WREG32(mmCP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi);
 325
 326        WREG32(mmCP_HQD_IB_RPTR, m->cp_hqd_ib_rptr);
 327
 328        WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
 329        WREG32(mmCP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd);
 330        WREG32(mmCP_HQD_MSG_TYPE, m->cp_hqd_msg_type);
 331
 332        WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, m->cp_hqd_atomic0_preop_lo);
 333        WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, m->cp_hqd_atomic0_preop_hi);
 334        WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, m->cp_hqd_atomic1_preop_lo);
 335        WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, m->cp_hqd_atomic1_preop_hi);
 336
 337        WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
 338        WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
 339                        m->cp_hqd_pq_rptr_report_addr_hi);
 340
 341        WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
 342
 343        WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, m->cp_hqd_pq_wptr_poll_addr_lo);
 344        WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, m->cp_hqd_pq_wptr_poll_addr_hi);
 345
 346        WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
 347
 348        WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
 349
 350        WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
 351
 352        WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
 353        WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
 354
 355        WREG32(mmCP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr);
 356
 357        if (is_wptr_shadow_valid)
 358                WREG32(mmCP_HQD_PQ_WPTR, wptr_shadow);
 359
 360        WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
 361        release_queue(kgd);
 362
 363        return 0;
 364}
 365
 366static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
 367{
 368        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 369        struct cik_sdma_rlc_registers *m;
 370        uint32_t sdma_base_addr;
 371
 372        m = get_sdma_mqd(mqd);
 373        sdma_base_addr = get_sdma_base_addr(m);
 374
 375        WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
 376                        m->sdma_rlc_virtual_addr);
 377
 378        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE,
 379                        m->sdma_rlc_rb_base);
 380
 381        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
 382                        m->sdma_rlc_rb_base_hi);
 383
 384        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
 385                        m->sdma_rlc_rb_rptr_addr_lo);
 386
 387        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
 388                        m->sdma_rlc_rb_rptr_addr_hi);
 389
 390        WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL,
 391                        m->sdma_rlc_doorbell);
 392
 393        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
 394                        m->sdma_rlc_rb_cntl);
 395
 396        return 0;
 397}
 398
 399static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
 400                                uint32_t pipe_id, uint32_t queue_id)
 401{
 402        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 403        uint32_t act;
 404        bool retval = false;
 405        uint32_t low, high;
 406
 407        acquire_queue(kgd, pipe_id, queue_id);
 408        act = RREG32(mmCP_HQD_ACTIVE);
 409        if (act) {
 410                low = lower_32_bits(queue_address >> 8);
 411                high = upper_32_bits(queue_address >> 8);
 412
 413                if (low == RREG32(mmCP_HQD_PQ_BASE) &&
 414                                high == RREG32(mmCP_HQD_PQ_BASE_HI))
 415                        retval = true;
 416        }
 417        release_queue(kgd);
 418        return retval;
 419}
 420
 421static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
 422{
 423        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 424        struct cik_sdma_rlc_registers *m;
 425        uint32_t sdma_base_addr;
 426        uint32_t sdma_rlc_rb_cntl;
 427
 428        m = get_sdma_mqd(mqd);
 429        sdma_base_addr = get_sdma_base_addr(m);
 430
 431        sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
 432
 433        if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
 434                return true;
 435
 436        return false;
 437}
 438
 439static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
 440                                unsigned int timeout, uint32_t pipe_id,
 441                                uint32_t queue_id)
 442{
 443        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 444        uint32_t temp;
 445
 446        acquire_queue(kgd, pipe_id, queue_id);
 447        WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
 448
 449        WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type);
 450
 451        while (true) {
 452                temp = RREG32(mmCP_HQD_ACTIVE);
 453                if (temp & CP_HQD_ACTIVE__ACTIVE_MASK)
 454                        break;
 455                if (timeout == 0) {
 456                        pr_err("kfd: cp queue preemption time out (%dms)\n",
 457                                temp);
 458                        release_queue(kgd);
 459                        return -ETIME;
 460                }
 461                msleep(20);
 462                timeout -= 20;
 463        }
 464
 465        release_queue(kgd);
 466        return 0;
 467}
 468
 469static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 470                                unsigned int timeout)
 471{
 472        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 473        struct cik_sdma_rlc_registers *m;
 474        uint32_t sdma_base_addr;
 475        uint32_t temp;
 476
 477        m = get_sdma_mqd(mqd);
 478        sdma_base_addr = get_sdma_base_addr(m);
 479
 480        temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
 481        temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
 482        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
 483
 484        while (true) {
 485                temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
 486                if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
 487                        break;
 488                if (timeout == 0)
 489                        return -ETIME;
 490                msleep(20);
 491                timeout -= 20;
 492        }
 493
 494        WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
 495        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0);
 496        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0);
 497        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0);
 498
 499        return 0;
 500}
 501
 502static int kgd_address_watch_disable(struct kgd_dev *kgd)
 503{
 504        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 505        union TCP_WATCH_CNTL_BITS cntl;
 506        unsigned int i;
 507
 508        cntl.u32All = 0;
 509
 510        cntl.bitfields.valid = 0;
 511        cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
 512        cntl.bitfields.atc = 1;
 513
 514        /* Turning off this address until we set all the registers */
 515        for (i = 0; i < MAX_WATCH_ADDRESSES; i++)
 516                WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX +
 517                        ADDRESS_WATCH_REG_CNTL], cntl.u32All);
 518
 519        return 0;
 520}
 521
 522static int kgd_address_watch_execute(struct kgd_dev *kgd,
 523                                        unsigned int watch_point_id,
 524                                        uint32_t cntl_val,
 525                                        uint32_t addr_hi,
 526                                        uint32_t addr_lo)
 527{
 528        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 529        union TCP_WATCH_CNTL_BITS cntl;
 530
 531        cntl.u32All = cntl_val;
 532
 533        /* Turning off this watch point until we set all the registers */
 534        cntl.bitfields.valid = 0;
 535        WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 536                ADDRESS_WATCH_REG_CNTL], cntl.u32All);
 537
 538        WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 539                ADDRESS_WATCH_REG_ADDR_HI], addr_hi);
 540
 541        WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 542                ADDRESS_WATCH_REG_ADDR_LO], addr_lo);
 543
 544        /* Enable the watch point */
 545        cntl.bitfields.valid = 1;
 546
 547        WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 548                ADDRESS_WATCH_REG_CNTL], cntl.u32All);
 549
 550        return 0;
 551}
 552
 553static int kgd_wave_control_execute(struct kgd_dev *kgd,
 554                                        uint32_t gfx_index_val,
 555                                        uint32_t sq_cmd)
 556{
 557        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 558        uint32_t data;
 559
 560        mutex_lock(&adev->grbm_idx_mutex);
 561
 562        WREG32(mmGRBM_GFX_INDEX, gfx_index_val);
 563        WREG32(mmSQ_CMD, sq_cmd);
 564
 565        /*  Restore the GRBM_GFX_INDEX register  */
 566
 567        data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK |
 568                GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
 569                GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK;
 570
 571        WREG32(mmGRBM_GFX_INDEX, data);
 572
 573        mutex_unlock(&adev->grbm_idx_mutex);
 574
 575        return 0;
 576}
 577
 578static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 579                                        unsigned int watch_point_id,
 580                                        unsigned int reg_offset)
 581{
 582        return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset];
 583}
 584
 585static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
 586                                                        uint8_t vmid)
 587{
 588        uint32_t reg;
 589        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 590
 591        reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
 592        return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
 593}
 594
 595static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 596                                                                uint8_t vmid)
 597{
 598        uint32_t reg;
 599        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 600
 601        reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
 602        return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
 603}
 604
 605static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
 606{
 607        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 608
 609        WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
 610}
 611
 612static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 613{
 614        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 615        const union amdgpu_firmware_header *hdr;
 616
 617        BUG_ON(kgd == NULL);
 618
 619        switch (type) {
 620        case KGD_ENGINE_PFP:
 621                hdr = (const union amdgpu_firmware_header *)
 622                                                        adev->gfx.pfp_fw->data;
 623                break;
 624
 625        case KGD_ENGINE_ME:
 626                hdr = (const union amdgpu_firmware_header *)
 627                                                        adev->gfx.me_fw->data;
 628                break;
 629
 630        case KGD_ENGINE_CE:
 631                hdr = (const union amdgpu_firmware_header *)
 632                                                        adev->gfx.ce_fw->data;
 633                break;
 634
 635        case KGD_ENGINE_MEC1:
 636                hdr = (const union amdgpu_firmware_header *)
 637                                                        adev->gfx.mec_fw->data;
 638                break;
 639
 640        case KGD_ENGINE_MEC2:
 641                hdr = (const union amdgpu_firmware_header *)
 642                                                        adev->gfx.mec2_fw->data;
 643                break;
 644
 645        case KGD_ENGINE_RLC:
 646                hdr = (const union amdgpu_firmware_header *)
 647                                                        adev->gfx.rlc_fw->data;
 648                break;
 649
 650        case KGD_ENGINE_SDMA1:
 651                hdr = (const union amdgpu_firmware_header *)
 652                                                        adev->sdma.instance[0].fw->data;
 653                break;
 654
 655        case KGD_ENGINE_SDMA2:
 656                hdr = (const union amdgpu_firmware_header *)
 657                                                        adev->sdma.instance[1].fw->data;
 658                break;
 659
 660        default:
 661                return 0;
 662        }
 663
 664        if (hdr == NULL)
 665                return 0;
 666
 667        /* Only 12 bit in use*/
 668        return hdr->common.ucode_version;
 669}
 670
 671