linux/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22
  23#include <linux/fdtable.h>
  24#include <linux/uaccess.h>
  25#include <linux/firmware.h>
  26#include <drm/drmP.h>
  27#include "amdgpu.h"
  28#include "amdgpu_amdkfd.h"
  29#include "cikd.h"
  30#include "cik_sdma.h"
  31#include "amdgpu_ucode.h"
  32#include "gfx_v7_0.h"
  33#include "gca/gfx_7_2_d.h"
  34#include "gca/gfx_7_2_enum.h"
  35#include "gca/gfx_7_2_sh_mask.h"
  36#include "oss/oss_2_0_d.h"
  37#include "oss/oss_2_0_sh_mask.h"
  38#include "gmc/gmc_7_1_d.h"
  39#include "gmc/gmc_7_1_sh_mask.h"
  40#include "cik_structs.h"
  41
  42enum hqd_dequeue_request_type {
  43        NO_ACTION = 0,
  44        DRAIN_PIPE,
  45        RESET_WAVES
  46};
  47
  48enum {
  49        MAX_TRAPID = 8,         /* 3 bits in the bitfield. */
  50        MAX_WATCH_ADDRESSES = 4
  51};
  52
  53enum {
  54        ADDRESS_WATCH_REG_ADDR_HI = 0,
  55        ADDRESS_WATCH_REG_ADDR_LO,
  56        ADDRESS_WATCH_REG_CNTL,
  57        ADDRESS_WATCH_REG_MAX
  58};
  59
  60/*  not defined in the CI/KV reg file  */
  61enum {
  62        ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL,
  63        ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF,
  64        ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000,
  65        /* extend the mask to 26 bits to match the low address field */
  66        ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6,
  67        ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF
  68};
  69
  70static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = {
  71        mmTCP_WATCH0_ADDR_H, mmTCP_WATCH0_ADDR_L, mmTCP_WATCH0_CNTL,
  72        mmTCP_WATCH1_ADDR_H, mmTCP_WATCH1_ADDR_L, mmTCP_WATCH1_CNTL,
  73        mmTCP_WATCH2_ADDR_H, mmTCP_WATCH2_ADDR_L, mmTCP_WATCH2_CNTL,
  74        mmTCP_WATCH3_ADDR_H, mmTCP_WATCH3_ADDR_L, mmTCP_WATCH3_CNTL
  75};
  76
  77union TCP_WATCH_CNTL_BITS {
  78        struct {
  79                uint32_t mask:24;
  80                uint32_t vmid:4;
  81                uint32_t atc:1;
  82                uint32_t mode:2;
  83                uint32_t valid:1;
  84        } bitfields, bits;
  85        uint32_t u32All;
  86        signed int i32All;
  87        float f32All;
  88};
  89
  90/*
  91 * Register access functions
  92 */
  93
  94static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
  95                uint32_t sh_mem_config, uint32_t sh_mem_ape1_base,
  96                uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
  97
  98static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
  99                                        unsigned int vmid);
 100
 101static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 102                                uint32_t hpd_size, uint64_t hpd_gpu_addr);
 103static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
 104static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 105                        uint32_t queue_id, uint32_t __user *wptr,
 106                        uint32_t wptr_shift, uint32_t wptr_mask,
 107                        struct mm_struct *mm);
 108static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
 109static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
 110                                uint32_t pipe_id, uint32_t queue_id);
 111
 112static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 113                                enum kfd_preempt_type reset_type,
 114                                unsigned int utimeout, uint32_t pipe_id,
 115                                uint32_t queue_id);
 116static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
 117static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 118                                unsigned int utimeout);
 119static int kgd_address_watch_disable(struct kgd_dev *kgd);
 120static int kgd_address_watch_execute(struct kgd_dev *kgd,
 121                                        unsigned int watch_point_id,
 122                                        uint32_t cntl_val,
 123                                        uint32_t addr_hi,
 124                                        uint32_t addr_lo);
 125static int kgd_wave_control_execute(struct kgd_dev *kgd,
 126                                        uint32_t gfx_index_val,
 127                                        uint32_t sq_cmd);
 128static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 129                                        unsigned int watch_point_id,
 130                                        unsigned int reg_offset);
 131
 132static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
 133static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 134                                                        uint8_t vmid);
 135static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
 136
 137static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
 138static void set_scratch_backing_va(struct kgd_dev *kgd,
 139                                        uint64_t va, uint32_t vmid);
 140
 141/* Because of REG_GET_FIELD() being used, we put this function in the
 142 * asic specific file.
 143 */
 144static int get_tile_config(struct kgd_dev *kgd,
 145                struct tile_config *config)
 146{
 147        struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 148
 149        config->gb_addr_config = adev->gfx.config.gb_addr_config;
 150        config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
 151                                MC_ARB_RAMCFG, NOOFBANK);
 152        config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
 153                                MC_ARB_RAMCFG, NOOFRANKS);
 154
 155        config->tile_config_ptr = adev->gfx.config.tile_mode_array;
 156        config->num_tile_configs =
 157                        ARRAY_SIZE(adev->gfx.config.tile_mode_array);
 158        config->macro_tile_config_ptr =
 159                        adev->gfx.config.macrotile_mode_array;
 160        config->num_macro_tile_configs =
 161                        ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
 162
 163        return 0;
 164}
 165
 166static const struct kfd2kgd_calls kfd2kgd = {
 167        .init_gtt_mem_allocation = alloc_gtt_mem,
 168        .free_gtt_mem = free_gtt_mem,
 169        .get_vmem_size = get_vmem_size,
 170        .get_gpu_clock_counter = get_gpu_clock_counter,
 171        .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
 172        .program_sh_mem_settings = kgd_program_sh_mem_settings,
 173        .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
 174        .init_pipeline = kgd_init_pipeline,
 175        .init_interrupts = kgd_init_interrupts,
 176        .hqd_load = kgd_hqd_load,
 177        .hqd_sdma_load = kgd_hqd_sdma_load,
 178        .hqd_is_occupied = kgd_hqd_is_occupied,
 179        .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
 180        .hqd_destroy = kgd_hqd_destroy,
 181        .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
 182        .address_watch_disable = kgd_address_watch_disable,
 183        .address_watch_execute = kgd_address_watch_execute,
 184        .wave_control_execute = kgd_wave_control_execute,
 185        .address_watch_get_offset = kgd_address_watch_get_offset,
 186        .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
 187        .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
 188        .write_vmid_invalidate_request = write_vmid_invalidate_request,
 189        .get_fw_version = get_fw_version,
 190        .set_scratch_backing_va = set_scratch_backing_va,
 191        .get_tile_config = get_tile_config,
 192};
 193
 194struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
 195{
 196        return (struct kfd2kgd_calls *)&kfd2kgd;
 197}
 198
 199static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
 200{
 201        return (struct amdgpu_device *)kgd;
 202}
 203
 204static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
 205                        uint32_t queue, uint32_t vmid)
 206{
 207        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 208        uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
 209
 210        mutex_lock(&adev->srbm_mutex);
 211        WREG32(mmSRBM_GFX_CNTL, value);
 212}
 213
 214static void unlock_srbm(struct kgd_dev *kgd)
 215{
 216        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 217
 218        WREG32(mmSRBM_GFX_CNTL, 0);
 219        mutex_unlock(&adev->srbm_mutex);
 220}
 221
 222static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
 223                                uint32_t queue_id)
 224{
 225        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 226
 227        uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
 228        uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 229
 230        lock_srbm(kgd, mec, pipe, queue_id, 0);
 231}
 232
 233static void release_queue(struct kgd_dev *kgd)
 234{
 235        unlock_srbm(kgd);
 236}
 237
 238static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
 239                                        uint32_t sh_mem_config,
 240                                        uint32_t sh_mem_ape1_base,
 241                                        uint32_t sh_mem_ape1_limit,
 242                                        uint32_t sh_mem_bases)
 243{
 244        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 245
 246        lock_srbm(kgd, 0, 0, 0, vmid);
 247
 248        WREG32(mmSH_MEM_CONFIG, sh_mem_config);
 249        WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base);
 250        WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
 251        WREG32(mmSH_MEM_BASES, sh_mem_bases);
 252
 253        unlock_srbm(kgd);
 254}
 255
 256static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 257                                        unsigned int vmid)
 258{
 259        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 260
 261        /*
 262         * We have to assume that there is no outstanding mapping.
 263         * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
 264         * a mapping is in progress or because a mapping finished and the
 265         * SW cleared it. So the protocol is to always wait & clear.
 266         */
 267        uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
 268                        ATC_VMID0_PASID_MAPPING__VALID_MASK;
 269
 270        WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping);
 271
 272        while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
 273                cpu_relax();
 274        WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
 275
 276        /* Mapping vmid to pasid also for IH block */
 277        WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
 278
 279        return 0;
 280}
 281
 282static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 283                                uint32_t hpd_size, uint64_t hpd_gpu_addr)
 284{
 285        /* amdgpu owns the per-pipe state */
 286        return 0;
 287}
 288
 289static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 290{
 291        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 292        uint32_t mec;
 293        uint32_t pipe;
 294
 295        mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
 296        pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 297
 298        lock_srbm(kgd, mec, pipe, 0, 0);
 299
 300        WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
 301                        CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
 302
 303        unlock_srbm(kgd);
 304
 305        return 0;
 306}
 307
 308static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
 309{
 310        uint32_t retval;
 311
 312        retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
 313                        m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
 314
 315        pr_debug("kfd: sdma base address: 0x%x\n", retval);
 316
 317        return retval;
 318}
 319
 320static inline struct cik_mqd *get_mqd(void *mqd)
 321{
 322        return (struct cik_mqd *)mqd;
 323}
 324
 325static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
 326{
 327        return (struct cik_sdma_rlc_registers *)mqd;
 328}
 329
 330static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 331                        uint32_t queue_id, uint32_t __user *wptr,
 332                        uint32_t wptr_shift, uint32_t wptr_mask,
 333                        struct mm_struct *mm)
 334{
 335        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 336        struct cik_mqd *m;
 337        uint32_t *mqd_hqd;
 338        uint32_t reg, wptr_val, data;
 339
 340        m = get_mqd(mqd);
 341
 342        acquire_queue(kgd, pipe_id, queue_id);
 343
 344        /* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */
 345        mqd_hqd = &m->cp_mqd_base_addr_lo;
 346
 347        for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++)
 348                WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
 349
 350        /* Copy userspace write pointer value to register.
 351         * Activate doorbell logic to monitor subsequent changes.
 352         */
 353        data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
 354                             CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
 355        WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
 356
 357        if (read_user_wptr(mm, wptr, wptr_val))
 358                WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
 359
 360        data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
 361        WREG32(mmCP_HQD_ACTIVE, data);
 362
 363        release_queue(kgd);
 364
 365        return 0;
 366}
 367
 368static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
 369{
 370        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 371        struct cik_sdma_rlc_registers *m;
 372        uint32_t sdma_base_addr;
 373
 374        m = get_sdma_mqd(mqd);
 375        sdma_base_addr = get_sdma_base_addr(m);
 376
 377        WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
 378                        m->sdma_rlc_virtual_addr);
 379
 380        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE,
 381                        m->sdma_rlc_rb_base);
 382
 383        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
 384                        m->sdma_rlc_rb_base_hi);
 385
 386        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
 387                        m->sdma_rlc_rb_rptr_addr_lo);
 388
 389        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
 390                        m->sdma_rlc_rb_rptr_addr_hi);
 391
 392        WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL,
 393                        m->sdma_rlc_doorbell);
 394
 395        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
 396                        m->sdma_rlc_rb_cntl);
 397
 398        return 0;
 399}
 400
 401static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
 402                                uint32_t pipe_id, uint32_t queue_id)
 403{
 404        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 405        uint32_t act;
 406        bool retval = false;
 407        uint32_t low, high;
 408
 409        acquire_queue(kgd, pipe_id, queue_id);
 410        act = RREG32(mmCP_HQD_ACTIVE);
 411        if (act) {
 412                low = lower_32_bits(queue_address >> 8);
 413                high = upper_32_bits(queue_address >> 8);
 414
 415                if (low == RREG32(mmCP_HQD_PQ_BASE) &&
 416                                high == RREG32(mmCP_HQD_PQ_BASE_HI))
 417                        retval = true;
 418        }
 419        release_queue(kgd);
 420        return retval;
 421}
 422
 423static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
 424{
 425        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 426        struct cik_sdma_rlc_registers *m;
 427        uint32_t sdma_base_addr;
 428        uint32_t sdma_rlc_rb_cntl;
 429
 430        m = get_sdma_mqd(mqd);
 431        sdma_base_addr = get_sdma_base_addr(m);
 432
 433        sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
 434
 435        if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
 436                return true;
 437
 438        return false;
 439}
 440
 441static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 442                                enum kfd_preempt_type reset_type,
 443                                unsigned int utimeout, uint32_t pipe_id,
 444                                uint32_t queue_id)
 445{
 446        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 447        uint32_t temp;
 448        enum hqd_dequeue_request_type type;
 449        unsigned long flags, end_jiffies;
 450        int retry;
 451
 452        acquire_queue(kgd, pipe_id, queue_id);
 453        WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
 454
 455        switch (reset_type) {
 456        case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
 457                type = DRAIN_PIPE;
 458                break;
 459        case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
 460                type = RESET_WAVES;
 461                break;
 462        default:
 463                type = DRAIN_PIPE;
 464                break;
 465        }
 466
 467        /* Workaround: If IQ timer is active and the wait time is close to or
 468         * equal to 0, dequeueing is not safe. Wait until either the wait time
 469         * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
 470         * cleared before continuing. Also, ensure wait times are set to at
 471         * least 0x3.
 472         */
 473        local_irq_save(flags);
 474        preempt_disable();
 475        retry = 5000; /* wait for 500 usecs at maximum */
 476        while (true) {
 477                temp = RREG32(mmCP_HQD_IQ_TIMER);
 478                if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
 479                        pr_debug("HW is processing IQ\n");
 480                        goto loop;
 481                }
 482                if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
 483                        if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
 484                                        == 3) /* SEM-rearm is safe */
 485                                break;
 486                        /* Wait time 3 is safe for CP, but our MMIO read/write
 487                         * time is close to 1 microsecond, so check for 10 to
 488                         * leave more buffer room
 489                         */
 490                        if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
 491                                        >= 10)
 492                                break;
 493                        pr_debug("IQ timer is active\n");
 494                } else
 495                        break;
 496loop:
 497                if (!retry) {
 498                        pr_err("CP HQD IQ timer status time out\n");
 499                        break;
 500                }
 501                ndelay(100);
 502                --retry;
 503        }
 504        retry = 1000;
 505        while (true) {
 506                temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
 507                if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
 508                        break;
 509                pr_debug("Dequeue request is pending\n");
 510
 511                if (!retry) {
 512                        pr_err("CP HQD dequeue request time out\n");
 513                        break;
 514                }
 515                ndelay(100);
 516                --retry;
 517        }
 518        local_irq_restore(flags);
 519        preempt_enable();
 520
 521        WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
 522
 523        end_jiffies = (utimeout * HZ / 1000) + jiffies;
 524        while (true) {
 525                temp = RREG32(mmCP_HQD_ACTIVE);
 526                if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
 527                        break;
 528                if (time_after(jiffies, end_jiffies)) {
 529                        pr_err("cp queue preemption time out\n");
 530                        release_queue(kgd);
 531                        return -ETIME;
 532                }
 533                usleep_range(500, 1000);
 534        }
 535
 536        release_queue(kgd);
 537        return 0;
 538}
 539
 540static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 541                                unsigned int utimeout)
 542{
 543        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 544        struct cik_sdma_rlc_registers *m;
 545        uint32_t sdma_base_addr;
 546        uint32_t temp;
 547        int timeout = utimeout;
 548
 549        m = get_sdma_mqd(mqd);
 550        sdma_base_addr = get_sdma_base_addr(m);
 551
 552        temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
 553        temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
 554        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
 555
 556        while (true) {
 557                temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
 558                if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
 559                        break;
 560                if (timeout <= 0)
 561                        return -ETIME;
 562                msleep(20);
 563                timeout -= 20;
 564        }
 565
 566        WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
 567        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0);
 568        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0);
 569        WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0);
 570
 571        return 0;
 572}
 573
 574static int kgd_address_watch_disable(struct kgd_dev *kgd)
 575{
 576        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 577        union TCP_WATCH_CNTL_BITS cntl;
 578        unsigned int i;
 579
 580        cntl.u32All = 0;
 581
 582        cntl.bitfields.valid = 0;
 583        cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
 584        cntl.bitfields.atc = 1;
 585
 586        /* Turning off this address until we set all the registers */
 587        for (i = 0; i < MAX_WATCH_ADDRESSES; i++)
 588                WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX +
 589                        ADDRESS_WATCH_REG_CNTL], cntl.u32All);
 590
 591        return 0;
 592}
 593
 594static int kgd_address_watch_execute(struct kgd_dev *kgd,
 595                                        unsigned int watch_point_id,
 596                                        uint32_t cntl_val,
 597                                        uint32_t addr_hi,
 598                                        uint32_t addr_lo)
 599{
 600        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 601        union TCP_WATCH_CNTL_BITS cntl;
 602
 603        cntl.u32All = cntl_val;
 604
 605        /* Turning off this watch point until we set all the registers */
 606        cntl.bitfields.valid = 0;
 607        WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 608                ADDRESS_WATCH_REG_CNTL], cntl.u32All);
 609
 610        WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 611                ADDRESS_WATCH_REG_ADDR_HI], addr_hi);
 612
 613        WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 614                ADDRESS_WATCH_REG_ADDR_LO], addr_lo);
 615
 616        /* Enable the watch point */
 617        cntl.bitfields.valid = 1;
 618
 619        WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 620                ADDRESS_WATCH_REG_CNTL], cntl.u32All);
 621
 622        return 0;
 623}
 624
 625static int kgd_wave_control_execute(struct kgd_dev *kgd,
 626                                        uint32_t gfx_index_val,
 627                                        uint32_t sq_cmd)
 628{
 629        struct amdgpu_device *adev = get_amdgpu_device(kgd);
 630        uint32_t data;
 631
 632        mutex_lock(&adev->grbm_idx_mutex);
 633
 634        WREG32(mmGRBM_GFX_INDEX, gfx_index_val);
 635        WREG32(mmSQ_CMD, sq_cmd);
 636
 637        /*  Restore the GRBM_GFX_INDEX register  */
 638
 639        data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK |
 640                GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
 641                GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK;
 642
 643        WREG32(mmGRBM_GFX_INDEX, data);
 644
 645        mutex_unlock(&adev->grbm_idx_mutex);
 646
 647        return 0;
 648}
 649
 650static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 651                                        unsigned int watch_point_id,
 652                                        unsigned int reg_offset)
 653{
 654        return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset];
 655}
 656
 657static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
 658                                                        uint8_t vmid)
 659{
 660        uint32_t reg;
 661        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 662
 663        reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
 664        return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
 665}
 666
 667static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 668                                                                uint8_t vmid)
 669{
 670        uint32_t reg;
 671        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 672
 673        reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
 674        return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
 675}
 676
 677static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
 678{
 679        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 680
 681        WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
 682}
 683
 684static void set_scratch_backing_va(struct kgd_dev *kgd,
 685                                        uint64_t va, uint32_t vmid)
 686{
 687        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 688
 689        lock_srbm(kgd, 0, 0, 0, vmid);
 690        WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
 691        unlock_srbm(kgd);
 692}
 693
 694static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 695{
 696        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 697        const union amdgpu_firmware_header *hdr;
 698
 699        BUG_ON(kgd == NULL);
 700
 701        switch (type) {
 702        case KGD_ENGINE_PFP:
 703                hdr = (const union amdgpu_firmware_header *)
 704                                                adev->gfx.pfp_fw->data;
 705                break;
 706
 707        case KGD_ENGINE_ME:
 708                hdr = (const union amdgpu_firmware_header *)
 709                                                adev->gfx.me_fw->data;
 710                break;
 711
 712        case KGD_ENGINE_CE:
 713                hdr = (const union amdgpu_firmware_header *)
 714                                                adev->gfx.ce_fw->data;
 715                break;
 716
 717        case KGD_ENGINE_MEC1:
 718                hdr = (const union amdgpu_firmware_header *)
 719                                                adev->gfx.mec_fw->data;
 720                break;
 721
 722        case KGD_ENGINE_MEC2:
 723                hdr = (const union amdgpu_firmware_header *)
 724                                                adev->gfx.mec2_fw->data;
 725                break;
 726
 727        case KGD_ENGINE_RLC:
 728                hdr = (const union amdgpu_firmware_header *)
 729                                                adev->gfx.rlc_fw->data;
 730                break;
 731
 732        case KGD_ENGINE_SDMA1:
 733                hdr = (const union amdgpu_firmware_header *)
 734                                                adev->sdma.instance[0].fw->data;
 735                break;
 736
 737        case KGD_ENGINE_SDMA2:
 738                hdr = (const union amdgpu_firmware_header *)
 739                                                adev->sdma.instance[1].fw->data;
 740                break;
 741
 742        default:
 743                return 0;
 744        }
 745
 746        if (hdr == NULL)
 747                return 0;
 748
 749        /* Only 12 bit in use*/
 750        return hdr->common.ucode_version;
 751}
 752
 753