linux/drivers/gpu/drm/radeon/radeon_kfd.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22
  23#include <linux/module.h>
  24#include <linux/fdtable.h>
  25#include <linux/uaccess.h>
  26#include <drm/drmP.h>
  27#include "radeon.h"
  28#include "cikd.h"
  29#include "cik_reg.h"
  30#include "radeon_kfd.h"
  31#include "radeon_ucode.h"
  32#include <linux/firmware.h>
  33#include "cik_structs.h"
  34
  35#define CIK_PIPE_PER_MEC        (4)
  36
  37static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = {
  38        TCP_WATCH0_ADDR_H, TCP_WATCH0_ADDR_L, TCP_WATCH0_CNTL,
  39        TCP_WATCH1_ADDR_H, TCP_WATCH1_ADDR_L, TCP_WATCH1_CNTL,
  40        TCP_WATCH2_ADDR_H, TCP_WATCH2_ADDR_L, TCP_WATCH2_CNTL,
  41        TCP_WATCH3_ADDR_H, TCP_WATCH3_ADDR_L, TCP_WATCH3_CNTL
  42};
  43
  44struct kgd_mem {
  45        struct radeon_bo *bo;
  46        uint64_t gpu_addr;
  47        void *cpu_ptr;
  48};
  49
  50
  51static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
  52                        void **mem_obj, uint64_t *gpu_addr,
  53                        void **cpu_ptr);
  54
  55static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
  56
  57static uint64_t get_vmem_size(struct kgd_dev *kgd);
  58static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
  59
  60static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
  61static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
  62
  63/*
  64 * Register access functions
  65 */
  66
  67static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
  68                uint32_t sh_mem_config, uint32_t sh_mem_ape1_base,
  69                uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
  70
  71static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
  72                                        unsigned int vmid);
  73
  74static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
  75                                uint32_t hpd_size, uint64_t hpd_gpu_addr);
  76static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
  77static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
  78                        uint32_t queue_id, uint32_t __user *wptr);
  79static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
  80static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
  81                                uint32_t pipe_id, uint32_t queue_id);
  82
  83static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
  84                                unsigned int timeout, uint32_t pipe_id,
  85                                uint32_t queue_id);
  86static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
  87static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
  88                                unsigned int timeout);
  89static int kgd_address_watch_disable(struct kgd_dev *kgd);
  90static int kgd_address_watch_execute(struct kgd_dev *kgd,
  91                                        unsigned int watch_point_id,
  92                                        uint32_t cntl_val,
  93                                        uint32_t addr_hi,
  94                                        uint32_t addr_lo);
  95static int kgd_wave_control_execute(struct kgd_dev *kgd,
  96                                        uint32_t gfx_index_val,
  97                                        uint32_t sq_cmd);
  98static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
  99                                        unsigned int watch_point_id,
 100                                        unsigned int reg_offset);
 101
 102static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
 103static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 104                                                        uint8_t vmid);
 105static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
 106
 107static const struct kfd2kgd_calls kfd2kgd = {
 108        .init_gtt_mem_allocation = alloc_gtt_mem,
 109        .free_gtt_mem = free_gtt_mem,
 110        .get_vmem_size = get_vmem_size,
 111        .get_gpu_clock_counter = get_gpu_clock_counter,
 112        .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
 113        .program_sh_mem_settings = kgd_program_sh_mem_settings,
 114        .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
 115        .init_pipeline = kgd_init_pipeline,
 116        .init_interrupts = kgd_init_interrupts,
 117        .hqd_load = kgd_hqd_load,
 118        .hqd_sdma_load = kgd_hqd_sdma_load,
 119        .hqd_is_occupied = kgd_hqd_is_occupied,
 120        .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
 121        .hqd_destroy = kgd_hqd_destroy,
 122        .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
 123        .address_watch_disable = kgd_address_watch_disable,
 124        .address_watch_execute = kgd_address_watch_execute,
 125        .wave_control_execute = kgd_wave_control_execute,
 126        .address_watch_get_offset = kgd_address_watch_get_offset,
 127        .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
 128        .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
 129        .write_vmid_invalidate_request = write_vmid_invalidate_request,
 130        .get_fw_version = get_fw_version
 131};
 132
 133static const struct kgd2kfd_calls *kgd2kfd;
 134
 135bool radeon_kfd_init(void)
 136{
 137#if defined(CONFIG_HSA_AMD_MODULE)
 138        bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
 139
 140        kgd2kfd_init_p = symbol_request(kgd2kfd_init);
 141
 142        if (kgd2kfd_init_p == NULL)
 143                return false;
 144
 145        if (!kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd)) {
 146                symbol_put(kgd2kfd_init);
 147                kgd2kfd = NULL;
 148
 149                return false;
 150        }
 151
 152        return true;
 153#elif defined(CONFIG_HSA_AMD)
 154        if (!kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd)) {
 155                kgd2kfd = NULL;
 156
 157                return false;
 158        }
 159
 160        return true;
 161#else
 162        return false;
 163#endif
 164}
 165
 166void radeon_kfd_fini(void)
 167{
 168        if (kgd2kfd) {
 169                kgd2kfd->exit();
 170                symbol_put(kgd2kfd_init);
 171        }
 172}
 173
 174void radeon_kfd_device_probe(struct radeon_device *rdev)
 175{
 176        if (kgd2kfd)
 177                rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev,
 178                        rdev->pdev, &kfd2kgd);
 179}
 180
 181void radeon_kfd_device_init(struct radeon_device *rdev)
 182{
 183        if (rdev->kfd) {
 184                struct kgd2kfd_shared_resources gpu_resources = {
 185                        .compute_vmid_bitmap = 0xFF00,
 186
 187                        .first_compute_pipe = 1,
 188                        .compute_pipe_count = 4 - 1,
 189                };
 190
 191                radeon_doorbell_get_kfd_info(rdev,
 192                                &gpu_resources.doorbell_physical_address,
 193                                &gpu_resources.doorbell_aperture_size,
 194                                &gpu_resources.doorbell_start_offset);
 195
 196                kgd2kfd->device_init(rdev->kfd, &gpu_resources);
 197        }
 198}
 199
 200void radeon_kfd_device_fini(struct radeon_device *rdev)
 201{
 202        if (rdev->kfd) {
 203                kgd2kfd->device_exit(rdev->kfd);
 204                rdev->kfd = NULL;
 205        }
 206}
 207
 208void radeon_kfd_interrupt(struct radeon_device *rdev, const void *ih_ring_entry)
 209{
 210        if (rdev->kfd)
 211                kgd2kfd->interrupt(rdev->kfd, ih_ring_entry);
 212}
 213
 214void radeon_kfd_suspend(struct radeon_device *rdev)
 215{
 216        if (rdev->kfd)
 217                kgd2kfd->suspend(rdev->kfd);
 218}
 219
 220int radeon_kfd_resume(struct radeon_device *rdev)
 221{
 222        int r = 0;
 223
 224        if (rdev->kfd)
 225                r = kgd2kfd->resume(rdev->kfd);
 226
 227        return r;
 228}
 229
 230static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 231                        void **mem_obj, uint64_t *gpu_addr,
 232                        void **cpu_ptr)
 233{
 234        struct radeon_device *rdev = (struct radeon_device *)kgd;
 235        struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
 236        int r;
 237
 238        BUG_ON(kgd == NULL);
 239        BUG_ON(gpu_addr == NULL);
 240        BUG_ON(cpu_ptr == NULL);
 241
 242        *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
 243        if ((*mem) == NULL)
 244                return -ENOMEM;
 245
 246        r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT,
 247                                RADEON_GEM_GTT_WC, NULL, NULL, &(*mem)->bo);
 248        if (r) {
 249                dev_err(rdev->dev,
 250                        "failed to allocate BO for amdkfd (%d)\n", r);
 251                return r;
 252        }
 253
 254        /* map the buffer */
 255        r = radeon_bo_reserve((*mem)->bo, true);
 256        if (r) {
 257                dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
 258                goto allocate_mem_reserve_bo_failed;
 259        }
 260
 261        r = radeon_bo_pin((*mem)->bo, RADEON_GEM_DOMAIN_GTT,
 262                                &(*mem)->gpu_addr);
 263        if (r) {
 264                dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r);
 265                goto allocate_mem_pin_bo_failed;
 266        }
 267        *gpu_addr = (*mem)->gpu_addr;
 268
 269        r = radeon_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
 270        if (r) {
 271                dev_err(rdev->dev,
 272                        "(%d) failed to map bo to kernel for amdkfd\n", r);
 273                goto allocate_mem_kmap_bo_failed;
 274        }
 275        *cpu_ptr = (*mem)->cpu_ptr;
 276
 277        radeon_bo_unreserve((*mem)->bo);
 278
 279        return 0;
 280
 281allocate_mem_kmap_bo_failed:
 282        radeon_bo_unpin((*mem)->bo);
 283allocate_mem_pin_bo_failed:
 284        radeon_bo_unreserve((*mem)->bo);
 285allocate_mem_reserve_bo_failed:
 286        radeon_bo_unref(&(*mem)->bo);
 287
 288        return r;
 289}
 290
 291static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
 292{
 293        struct kgd_mem *mem = (struct kgd_mem *) mem_obj;
 294
 295        BUG_ON(mem == NULL);
 296
 297        radeon_bo_reserve(mem->bo, true);
 298        radeon_bo_kunmap(mem->bo);
 299        radeon_bo_unpin(mem->bo);
 300        radeon_bo_unreserve(mem->bo);
 301        radeon_bo_unref(&(mem->bo));
 302        kfree(mem);
 303}
 304
 305static uint64_t get_vmem_size(struct kgd_dev *kgd)
 306{
 307        struct radeon_device *rdev = (struct radeon_device *)kgd;
 308
 309        BUG_ON(kgd == NULL);
 310
 311        return rdev->mc.real_vram_size;
 312}
 313
 314static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
 315{
 316        struct radeon_device *rdev = (struct radeon_device *)kgd;
 317
 318        return rdev->asic->get_gpu_clock_counter(rdev);
 319}
 320
 321static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
 322{
 323        struct radeon_device *rdev = (struct radeon_device *)kgd;
 324
 325        /* The sclk is in quantas of 10kHz */
 326        return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
 327}
 328
 329static inline struct radeon_device *get_radeon_device(struct kgd_dev *kgd)
 330{
 331        return (struct radeon_device *)kgd;
 332}
 333
 334static void write_register(struct kgd_dev *kgd, uint32_t offset, uint32_t value)
 335{
 336        struct radeon_device *rdev = get_radeon_device(kgd);
 337
 338        writel(value, (void __iomem *)(rdev->rmmio + offset));
 339}
 340
 341static uint32_t read_register(struct kgd_dev *kgd, uint32_t offset)
 342{
 343        struct radeon_device *rdev = get_radeon_device(kgd);
 344
 345        return readl((void __iomem *)(rdev->rmmio + offset));
 346}
 347
 348static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
 349                        uint32_t queue, uint32_t vmid)
 350{
 351        struct radeon_device *rdev = get_radeon_device(kgd);
 352        uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
 353
 354        mutex_lock(&rdev->srbm_mutex);
 355        write_register(kgd, SRBM_GFX_CNTL, value);
 356}
 357
 358static void unlock_srbm(struct kgd_dev *kgd)
 359{
 360        struct radeon_device *rdev = get_radeon_device(kgd);
 361
 362        write_register(kgd, SRBM_GFX_CNTL, 0);
 363        mutex_unlock(&rdev->srbm_mutex);
 364}
 365
 366static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
 367                                uint32_t queue_id)
 368{
 369        uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
 370        uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
 371
 372        lock_srbm(kgd, mec, pipe, queue_id, 0);
 373}
 374
 375static void release_queue(struct kgd_dev *kgd)
 376{
 377        unlock_srbm(kgd);
 378}
 379
 380static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
 381                                        uint32_t sh_mem_config,
 382                                        uint32_t sh_mem_ape1_base,
 383                                        uint32_t sh_mem_ape1_limit,
 384                                        uint32_t sh_mem_bases)
 385{
 386        lock_srbm(kgd, 0, 0, 0, vmid);
 387
 388        write_register(kgd, SH_MEM_CONFIG, sh_mem_config);
 389        write_register(kgd, SH_MEM_APE1_BASE, sh_mem_ape1_base);
 390        write_register(kgd, SH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
 391        write_register(kgd, SH_MEM_BASES, sh_mem_bases);
 392
 393        unlock_srbm(kgd);
 394}
 395
 396static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 397                                        unsigned int vmid)
 398{
 399        /*
 400         * We have to assume that there is no outstanding mapping.
 401         * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0
 402         * because a mapping is in progress or because a mapping finished and
 403         * the SW cleared it.
 404         * So the protocol is to always wait & clear.
 405         */
 406        uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
 407                                        ATC_VMID_PASID_MAPPING_VALID_MASK;
 408
 409        write_register(kgd, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t),
 410                        pasid_mapping);
 411
 412        while (!(read_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS) &
 413                                                                (1U << vmid)))
 414                cpu_relax();
 415        write_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
 416
 417        /* Mapping vmid to pasid also for IH block */
 418        write_register(kgd, IH_VMID_0_LUT + vmid * sizeof(uint32_t),
 419                        pasid_mapping);
 420
 421        return 0;
 422}
 423
 424static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 425                                uint32_t hpd_size, uint64_t hpd_gpu_addr)
 426{
 427        uint32_t mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
 428        uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
 429
 430        lock_srbm(kgd, mec, pipe, 0, 0);
 431        write_register(kgd, CP_HPD_EOP_BASE_ADDR,
 432                        lower_32_bits(hpd_gpu_addr >> 8));
 433        write_register(kgd, CP_HPD_EOP_BASE_ADDR_HI,
 434                        upper_32_bits(hpd_gpu_addr >> 8));
 435        write_register(kgd, CP_HPD_EOP_VMID, 0);
 436        write_register(kgd, CP_HPD_EOP_CONTROL, hpd_size);
 437        unlock_srbm(kgd);
 438
 439        return 0;
 440}
 441
 442static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 443{
 444        uint32_t mec;
 445        uint32_t pipe;
 446
 447        mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
 448        pipe = (pipe_id % CIK_PIPE_PER_MEC);
 449
 450        lock_srbm(kgd, mec, pipe, 0, 0);
 451
 452        write_register(kgd, CPC_INT_CNTL,
 453                        TIME_STAMP_INT_ENABLE | OPCODE_ERROR_INT_ENABLE);
 454
 455        unlock_srbm(kgd);
 456
 457        return 0;
 458}
 459
 460static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
 461{
 462        uint32_t retval;
 463
 464        retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
 465                        m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
 466
 467        pr_debug("kfd: sdma base address: 0x%x\n", retval);
 468
 469        return retval;
 470}
 471
 472static inline struct cik_mqd *get_mqd(void *mqd)
 473{
 474        return (struct cik_mqd *)mqd;
 475}
 476
 477static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
 478{
 479        return (struct cik_sdma_rlc_registers *)mqd;
 480}
 481
 482static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 483                        uint32_t queue_id, uint32_t __user *wptr)
 484{
 485        uint32_t wptr_shadow, is_wptr_shadow_valid;
 486        struct cik_mqd *m;
 487
 488        m = get_mqd(mqd);
 489
 490        is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
 491
 492        acquire_queue(kgd, pipe_id, queue_id);
 493        write_register(kgd, CP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
 494        write_register(kgd, CP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
 495        write_register(kgd, CP_MQD_CONTROL, m->cp_mqd_control);
 496
 497        write_register(kgd, CP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
 498        write_register(kgd, CP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
 499        write_register(kgd, CP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
 500
 501        write_register(kgd, CP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
 502        write_register(kgd, CP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo);
 503        write_register(kgd, CP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi);
 504
 505        write_register(kgd, CP_HQD_IB_RPTR, m->cp_hqd_ib_rptr);
 506
 507        write_register(kgd, CP_HQD_PERSISTENT_STATE,
 508                        m->cp_hqd_persistent_state);
 509        write_register(kgd, CP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd);
 510        write_register(kgd, CP_HQD_MSG_TYPE, m->cp_hqd_msg_type);
 511
 512        write_register(kgd, CP_HQD_ATOMIC0_PREOP_LO,
 513                        m->cp_hqd_atomic0_preop_lo);
 514
 515        write_register(kgd, CP_HQD_ATOMIC0_PREOP_HI,
 516                        m->cp_hqd_atomic0_preop_hi);
 517
 518        write_register(kgd, CP_HQD_ATOMIC1_PREOP_LO,
 519                        m->cp_hqd_atomic1_preop_lo);
 520
 521        write_register(kgd, CP_HQD_ATOMIC1_PREOP_HI,
 522                        m->cp_hqd_atomic1_preop_hi);
 523
 524        write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR,
 525                        m->cp_hqd_pq_rptr_report_addr_lo);
 526
 527        write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
 528                        m->cp_hqd_pq_rptr_report_addr_hi);
 529
 530        write_register(kgd, CP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
 531
 532        write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR,
 533                        m->cp_hqd_pq_wptr_poll_addr_lo);
 534
 535        write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR_HI,
 536                        m->cp_hqd_pq_wptr_poll_addr_hi);
 537
 538        write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL,
 539                        m->cp_hqd_pq_doorbell_control);
 540
 541        write_register(kgd, CP_HQD_VMID, m->cp_hqd_vmid);
 542
 543        write_register(kgd, CP_HQD_QUANTUM, m->cp_hqd_quantum);
 544
 545        write_register(kgd, CP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
 546        write_register(kgd, CP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
 547
 548        write_register(kgd, CP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr);
 549
 550        if (is_wptr_shadow_valid)
 551                write_register(kgd, CP_HQD_PQ_WPTR, wptr_shadow);
 552
 553        write_register(kgd, CP_HQD_ACTIVE, m->cp_hqd_active);
 554        release_queue(kgd);
 555
 556        return 0;
 557}
 558
 559static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
 560{
 561        struct cik_sdma_rlc_registers *m;
 562        uint32_t sdma_base_addr;
 563
 564        m = get_sdma_mqd(mqd);
 565        sdma_base_addr = get_sdma_base_addr(m);
 566
 567        write_register(kgd,
 568                        sdma_base_addr + SDMA0_RLC0_VIRTUAL_ADDR,
 569                        m->sdma_rlc_virtual_addr);
 570
 571        write_register(kgd,
 572                        sdma_base_addr + SDMA0_RLC0_RB_BASE,
 573                        m->sdma_rlc_rb_base);
 574
 575        write_register(kgd,
 576                        sdma_base_addr + SDMA0_RLC0_RB_BASE_HI,
 577                        m->sdma_rlc_rb_base_hi);
 578
 579        write_register(kgd,
 580                        sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_LO,
 581                        m->sdma_rlc_rb_rptr_addr_lo);
 582
 583        write_register(kgd,
 584                        sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_HI,
 585                        m->sdma_rlc_rb_rptr_addr_hi);
 586
 587        write_register(kgd,
 588                        sdma_base_addr + SDMA0_RLC0_DOORBELL,
 589                        m->sdma_rlc_doorbell);
 590
 591        write_register(kgd,
 592                        sdma_base_addr + SDMA0_RLC0_RB_CNTL,
 593                        m->sdma_rlc_rb_cntl);
 594
 595        return 0;
 596}
 597
 598static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
 599                                uint32_t pipe_id, uint32_t queue_id)
 600{
 601        uint32_t act;
 602        bool retval = false;
 603        uint32_t low, high;
 604
 605        acquire_queue(kgd, pipe_id, queue_id);
 606        act = read_register(kgd, CP_HQD_ACTIVE);
 607        if (act) {
 608                low = lower_32_bits(queue_address >> 8);
 609                high = upper_32_bits(queue_address >> 8);
 610
 611                if (low == read_register(kgd, CP_HQD_PQ_BASE) &&
 612                                high == read_register(kgd, CP_HQD_PQ_BASE_HI))
 613                        retval = true;
 614        }
 615        release_queue(kgd);
 616        return retval;
 617}
 618
 619static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
 620{
 621        struct cik_sdma_rlc_registers *m;
 622        uint32_t sdma_base_addr;
 623        uint32_t sdma_rlc_rb_cntl;
 624
 625        m = get_sdma_mqd(mqd);
 626        sdma_base_addr = get_sdma_base_addr(m);
 627
 628        sdma_rlc_rb_cntl = read_register(kgd,
 629                                        sdma_base_addr + SDMA0_RLC0_RB_CNTL);
 630
 631        if (sdma_rlc_rb_cntl & SDMA_RB_ENABLE)
 632                return true;
 633
 634        return false;
 635}
 636
 637static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
 638                                unsigned int timeout, uint32_t pipe_id,
 639                                uint32_t queue_id)
 640{
 641        uint32_t temp;
 642
 643        acquire_queue(kgd, pipe_id, queue_id);
 644        write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL, 0);
 645
 646        write_register(kgd, CP_HQD_DEQUEUE_REQUEST, reset_type);
 647
 648        while (true) {
 649                temp = read_register(kgd, CP_HQD_ACTIVE);
 650                if (temp & 0x1)
 651                        break;
 652                if (timeout == 0) {
 653                        pr_err("kfd: cp queue preemption time out (%dms)\n",
 654                                temp);
 655                        release_queue(kgd);
 656                        return -ETIME;
 657                }
 658                msleep(20);
 659                timeout -= 20;
 660        }
 661
 662        release_queue(kgd);
 663        return 0;
 664}
 665
 666static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 667                                unsigned int timeout)
 668{
 669        struct cik_sdma_rlc_registers *m;
 670        uint32_t sdma_base_addr;
 671        uint32_t temp;
 672
 673        m = get_sdma_mqd(mqd);
 674        sdma_base_addr = get_sdma_base_addr(m);
 675
 676        temp = read_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL);
 677        temp = temp & ~SDMA_RB_ENABLE;
 678        write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL, temp);
 679
 680        while (true) {
 681                temp = read_register(kgd, sdma_base_addr +
 682                                                SDMA0_RLC0_CONTEXT_STATUS);
 683                if (temp & SDMA_RLC_IDLE)
 684                        break;
 685                if (timeout == 0)
 686                        return -ETIME;
 687                msleep(20);
 688                timeout -= 20;
 689        }
 690
 691        write_register(kgd, sdma_base_addr + SDMA0_RLC0_DOORBELL, 0);
 692        write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_RPTR, 0);
 693        write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_WPTR, 0);
 694        write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_BASE, 0);
 695
 696        return 0;
 697}
 698
 699static int kgd_address_watch_disable(struct kgd_dev *kgd)
 700{
 701        union TCP_WATCH_CNTL_BITS cntl;
 702        unsigned int i;
 703
 704        cntl.u32All = 0;
 705
 706        cntl.bitfields.valid = 0;
 707        cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
 708        cntl.bitfields.atc = 1;
 709
 710        /* Turning off this address until we set all the registers */
 711        for (i = 0; i < MAX_WATCH_ADDRESSES; i++)
 712                write_register(kgd,
 713                                watchRegs[i * ADDRESS_WATCH_REG_MAX +
 714                                        ADDRESS_WATCH_REG_CNTL],
 715                                cntl.u32All);
 716
 717        return 0;
 718}
 719
 720static int kgd_address_watch_execute(struct kgd_dev *kgd,
 721                                        unsigned int watch_point_id,
 722                                        uint32_t cntl_val,
 723                                        uint32_t addr_hi,
 724                                        uint32_t addr_lo)
 725{
 726        union TCP_WATCH_CNTL_BITS cntl;
 727
 728        cntl.u32All = cntl_val;
 729
 730        /* Turning off this watch point until we set all the registers */
 731        cntl.bitfields.valid = 0;
 732        write_register(kgd,
 733                        watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 734                                ADDRESS_WATCH_REG_CNTL],
 735                        cntl.u32All);
 736
 737        write_register(kgd,
 738                        watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 739                                ADDRESS_WATCH_REG_ADDR_HI],
 740                        addr_hi);
 741
 742        write_register(kgd,
 743                        watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 744                                ADDRESS_WATCH_REG_ADDR_LO],
 745                        addr_lo);
 746
 747        /* Enable the watch point */
 748        cntl.bitfields.valid = 1;
 749
 750        write_register(kgd,
 751                        watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 752                                ADDRESS_WATCH_REG_CNTL],
 753                        cntl.u32All);
 754
 755        return 0;
 756}
 757
 758static int kgd_wave_control_execute(struct kgd_dev *kgd,
 759                                        uint32_t gfx_index_val,
 760                                        uint32_t sq_cmd)
 761{
 762        struct radeon_device *rdev = get_radeon_device(kgd);
 763        uint32_t data;
 764
 765        mutex_lock(&rdev->grbm_idx_mutex);
 766
 767        write_register(kgd, GRBM_GFX_INDEX, gfx_index_val);
 768        write_register(kgd, SQ_CMD, sq_cmd);
 769
 770        /*  Restore the GRBM_GFX_INDEX register  */
 771
 772        data = INSTANCE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
 773                SE_BROADCAST_WRITES;
 774
 775        write_register(kgd, GRBM_GFX_INDEX, data);
 776
 777        mutex_unlock(&rdev->grbm_idx_mutex);
 778
 779        return 0;
 780}
 781
 782static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 783                                        unsigned int watch_point_id,
 784                                        unsigned int reg_offset)
 785{
 786        return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset];
 787}
 788
 789static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid)
 790{
 791        uint32_t reg;
 792        struct radeon_device *rdev = (struct radeon_device *) kgd;
 793
 794        reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4);
 795        return reg & ATC_VMID_PASID_MAPPING_VALID_MASK;
 796}
 797
 798static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 799                                                        uint8_t vmid)
 800{
 801        uint32_t reg;
 802        struct radeon_device *rdev = (struct radeon_device *) kgd;
 803
 804        reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4);
 805        return reg & ATC_VMID_PASID_MAPPING_PASID_MASK;
 806}
 807
 808static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
 809{
 810        struct radeon_device *rdev = (struct radeon_device *) kgd;
 811
 812        return WREG32(VM_INVALIDATE_REQUEST, 1 << vmid);
 813}
 814
 815static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 816{
 817        struct radeon_device *rdev = (struct radeon_device *) kgd;
 818        const union radeon_firmware_header *hdr;
 819
 820        BUG_ON(kgd == NULL || rdev->mec_fw == NULL);
 821
 822        switch (type) {
 823        case KGD_ENGINE_PFP:
 824                hdr = (const union radeon_firmware_header *) rdev->pfp_fw->data;
 825                break;
 826
 827        case KGD_ENGINE_ME:
 828                hdr = (const union radeon_firmware_header *) rdev->me_fw->data;
 829                break;
 830
 831        case KGD_ENGINE_CE:
 832                hdr = (const union radeon_firmware_header *) rdev->ce_fw->data;
 833                break;
 834
 835        case KGD_ENGINE_MEC1:
 836                hdr = (const union radeon_firmware_header *) rdev->mec_fw->data;
 837                break;
 838
 839        case KGD_ENGINE_MEC2:
 840                hdr = (const union radeon_firmware_header *)
 841                                                        rdev->mec2_fw->data;
 842                break;
 843
 844        case KGD_ENGINE_RLC:
 845                hdr = (const union radeon_firmware_header *) rdev->rlc_fw->data;
 846                break;
 847
 848        case KGD_ENGINE_SDMA1:
 849        case KGD_ENGINE_SDMA2:
 850                hdr = (const union radeon_firmware_header *)
 851                                                        rdev->sdma_fw->data;
 852                break;
 853
 854        default:
 855                return 0;
 856        }
 857
 858        if (hdr == NULL)
 859                return 0;
 860
 861        /* Only 12 bit in use*/
 862        return hdr->common.ucode_version;
 863}
 864