linux/drivers/gpu/drm/radeon/radeon_kfd.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22
  23#include <linux/module.h>
  24#include <linux/fdtable.h>
  25#include <linux/uaccess.h>
  26#include <drm/drmP.h>
  27#include "radeon.h"
  28#include "cikd.h"
  29#include "cik_reg.h"
  30#include "radeon_kfd.h"
  31#include "radeon_ucode.h"
  32#include <linux/firmware.h>
  33#include "cik_structs.h"
  34
  35#define CIK_PIPE_PER_MEC        (4)
  36
  37static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = {
  38        TCP_WATCH0_ADDR_H, TCP_WATCH0_ADDR_L, TCP_WATCH0_CNTL,
  39        TCP_WATCH1_ADDR_H, TCP_WATCH1_ADDR_L, TCP_WATCH1_CNTL,
  40        TCP_WATCH2_ADDR_H, TCP_WATCH2_ADDR_L, TCP_WATCH2_CNTL,
  41        TCP_WATCH3_ADDR_H, TCP_WATCH3_ADDR_L, TCP_WATCH3_CNTL
  42};
  43
  44struct kgd_mem {
  45        struct radeon_bo *bo;
  46        uint64_t gpu_addr;
  47        void *cpu_ptr;
  48};
  49
  50
  51static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
  52                        void **mem_obj, uint64_t *gpu_addr,
  53                        void **cpu_ptr);
  54
  55static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
  56
  57static uint64_t get_vmem_size(struct kgd_dev *kgd);
  58static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
  59
  60static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
  61static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
  62
  63/*
  64 * Register access functions
  65 */
  66
  67static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
  68                uint32_t sh_mem_config, uint32_t sh_mem_ape1_base,
  69                uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
  70
  71static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
  72                                        unsigned int vmid);
  73
  74static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
  75                                uint32_t hpd_size, uint64_t hpd_gpu_addr);
  76static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
  77static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
  78                        uint32_t queue_id, uint32_t __user *wptr,
  79                        uint32_t wptr_shift, uint32_t wptr_mask,
  80                        struct mm_struct *mm);
  81static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
  82static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
  83                                uint32_t pipe_id, uint32_t queue_id);
  84
  85static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type,
  86                                unsigned int timeout, uint32_t pipe_id,
  87                                uint32_t queue_id);
  88static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
  89static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
  90                                unsigned int timeout);
  91static int kgd_address_watch_disable(struct kgd_dev *kgd);
  92static int kgd_address_watch_execute(struct kgd_dev *kgd,
  93                                        unsigned int watch_point_id,
  94                                        uint32_t cntl_val,
  95                                        uint32_t addr_hi,
  96                                        uint32_t addr_lo);
  97static int kgd_wave_control_execute(struct kgd_dev *kgd,
  98                                        uint32_t gfx_index_val,
  99                                        uint32_t sq_cmd);
 100static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 101                                        unsigned int watch_point_id,
 102                                        unsigned int reg_offset);
 103
 104static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
 105static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 106                                                        uint8_t vmid);
 107static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
 108
 109static const struct kfd2kgd_calls kfd2kgd = {
 110        .init_gtt_mem_allocation = alloc_gtt_mem,
 111        .free_gtt_mem = free_gtt_mem,
 112        .get_vmem_size = get_vmem_size,
 113        .get_gpu_clock_counter = get_gpu_clock_counter,
 114        .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
 115        .program_sh_mem_settings = kgd_program_sh_mem_settings,
 116        .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
 117        .init_pipeline = kgd_init_pipeline,
 118        .init_interrupts = kgd_init_interrupts,
 119        .hqd_load = kgd_hqd_load,
 120        .hqd_sdma_load = kgd_hqd_sdma_load,
 121        .hqd_is_occupied = kgd_hqd_is_occupied,
 122        .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
 123        .hqd_destroy = kgd_hqd_destroy,
 124        .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
 125        .address_watch_disable = kgd_address_watch_disable,
 126        .address_watch_execute = kgd_address_watch_execute,
 127        .wave_control_execute = kgd_wave_control_execute,
 128        .address_watch_get_offset = kgd_address_watch_get_offset,
 129        .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
 130        .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
 131        .write_vmid_invalidate_request = write_vmid_invalidate_request,
 132        .get_fw_version = get_fw_version
 133};
 134
 135static const struct kgd2kfd_calls *kgd2kfd;
 136
 137int radeon_kfd_init(void)
 138{
 139        int ret;
 140
 141#if defined(CONFIG_HSA_AMD_MODULE)
 142        int (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
 143
 144        kgd2kfd_init_p = symbol_request(kgd2kfd_init);
 145
 146        if (kgd2kfd_init_p == NULL)
 147                return -ENOENT;
 148
 149        ret = kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd);
 150        if (ret) {
 151                symbol_put(kgd2kfd_init);
 152                kgd2kfd = NULL;
 153        }
 154
 155#elif defined(CONFIG_HSA_AMD)
 156        ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
 157        if (ret)
 158                kgd2kfd = NULL;
 159
 160#else
 161        ret = -ENOENT;
 162#endif
 163
 164        return ret;
 165}
 166
 167void radeon_kfd_fini(void)
 168{
 169        if (kgd2kfd) {
 170                kgd2kfd->exit();
 171                symbol_put(kgd2kfd_init);
 172        }
 173}
 174
 175void radeon_kfd_device_probe(struct radeon_device *rdev)
 176{
 177        if (kgd2kfd)
 178                rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev,
 179                        rdev->pdev, &kfd2kgd);
 180}
 181
 182void radeon_kfd_device_init(struct radeon_device *rdev)
 183{
 184        int i, queue, pipe, mec;
 185
 186        if (rdev->kfd) {
 187                struct kgd2kfd_shared_resources gpu_resources = {
 188                        .compute_vmid_bitmap = 0xFF00,
 189                        .num_pipe_per_mec = 4,
 190                        .num_queue_per_pipe = 8
 191                };
 192
 193                bitmap_zero(gpu_resources.queue_bitmap, KGD_MAX_QUEUES);
 194
 195                for (i = 0; i < KGD_MAX_QUEUES; ++i) {
 196                        queue = i % gpu_resources.num_queue_per_pipe;
 197                        pipe = (i / gpu_resources.num_queue_per_pipe)
 198                                % gpu_resources.num_pipe_per_mec;
 199                        mec = (i / gpu_resources.num_queue_per_pipe)
 200                                / gpu_resources.num_pipe_per_mec;
 201
 202                        if (mec == 0 && pipe > 0)
 203                                set_bit(i, gpu_resources.queue_bitmap);
 204                }
 205
 206                radeon_doorbell_get_kfd_info(rdev,
 207                                &gpu_resources.doorbell_physical_address,
 208                                &gpu_resources.doorbell_aperture_size,
 209                                &gpu_resources.doorbell_start_offset);
 210
 211                kgd2kfd->device_init(rdev->kfd, &gpu_resources);
 212        }
 213}
 214
 215void radeon_kfd_device_fini(struct radeon_device *rdev)
 216{
 217        if (rdev->kfd) {
 218                kgd2kfd->device_exit(rdev->kfd);
 219                rdev->kfd = NULL;
 220        }
 221}
 222
 223void radeon_kfd_interrupt(struct radeon_device *rdev, const void *ih_ring_entry)
 224{
 225        if (rdev->kfd)
 226                kgd2kfd->interrupt(rdev->kfd, ih_ring_entry);
 227}
 228
 229void radeon_kfd_suspend(struct radeon_device *rdev)
 230{
 231        if (rdev->kfd)
 232                kgd2kfd->suspend(rdev->kfd);
 233}
 234
 235int radeon_kfd_resume(struct radeon_device *rdev)
 236{
 237        int r = 0;
 238
 239        if (rdev->kfd)
 240                r = kgd2kfd->resume(rdev->kfd);
 241
 242        return r;
 243}
 244
 245static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 246                        void **mem_obj, uint64_t *gpu_addr,
 247                        void **cpu_ptr)
 248{
 249        struct radeon_device *rdev = (struct radeon_device *)kgd;
 250        struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
 251        int r;
 252
 253        BUG_ON(kgd == NULL);
 254        BUG_ON(gpu_addr == NULL);
 255        BUG_ON(cpu_ptr == NULL);
 256
 257        *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
 258        if ((*mem) == NULL)
 259                return -ENOMEM;
 260
 261        r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT,
 262                                RADEON_GEM_GTT_WC, NULL, NULL, &(*mem)->bo);
 263        if (r) {
 264                dev_err(rdev->dev,
 265                        "failed to allocate BO for amdkfd (%d)\n", r);
 266                return r;
 267        }
 268
 269        /* map the buffer */
 270        r = radeon_bo_reserve((*mem)->bo, true);
 271        if (r) {
 272                dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
 273                goto allocate_mem_reserve_bo_failed;
 274        }
 275
 276        r = radeon_bo_pin((*mem)->bo, RADEON_GEM_DOMAIN_GTT,
 277                                &(*mem)->gpu_addr);
 278        if (r) {
 279                dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r);
 280                goto allocate_mem_pin_bo_failed;
 281        }
 282        *gpu_addr = (*mem)->gpu_addr;
 283
 284        r = radeon_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
 285        if (r) {
 286                dev_err(rdev->dev,
 287                        "(%d) failed to map bo to kernel for amdkfd\n", r);
 288                goto allocate_mem_kmap_bo_failed;
 289        }
 290        *cpu_ptr = (*mem)->cpu_ptr;
 291
 292        radeon_bo_unreserve((*mem)->bo);
 293
 294        return 0;
 295
 296allocate_mem_kmap_bo_failed:
 297        radeon_bo_unpin((*mem)->bo);
 298allocate_mem_pin_bo_failed:
 299        radeon_bo_unreserve((*mem)->bo);
 300allocate_mem_reserve_bo_failed:
 301        radeon_bo_unref(&(*mem)->bo);
 302
 303        return r;
 304}
 305
 306static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
 307{
 308        struct kgd_mem *mem = (struct kgd_mem *) mem_obj;
 309
 310        BUG_ON(mem == NULL);
 311
 312        radeon_bo_reserve(mem->bo, true);
 313        radeon_bo_kunmap(mem->bo);
 314        radeon_bo_unpin(mem->bo);
 315        radeon_bo_unreserve(mem->bo);
 316        radeon_bo_unref(&(mem->bo));
 317        kfree(mem);
 318}
 319
 320static uint64_t get_vmem_size(struct kgd_dev *kgd)
 321{
 322        struct radeon_device *rdev = (struct radeon_device *)kgd;
 323
 324        BUG_ON(kgd == NULL);
 325
 326        return rdev->mc.real_vram_size;
 327}
 328
 329static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
 330{
 331        struct radeon_device *rdev = (struct radeon_device *)kgd;
 332
 333        return rdev->asic->get_gpu_clock_counter(rdev);
 334}
 335
 336static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
 337{
 338        struct radeon_device *rdev = (struct radeon_device *)kgd;
 339
 340        /* The sclk is in quantas of 10kHz */
 341        return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
 342}
 343
 344static inline struct radeon_device *get_radeon_device(struct kgd_dev *kgd)
 345{
 346        return (struct radeon_device *)kgd;
 347}
 348
 349static void write_register(struct kgd_dev *kgd, uint32_t offset, uint32_t value)
 350{
 351        struct radeon_device *rdev = get_radeon_device(kgd);
 352
 353        writel(value, (void __iomem *)(rdev->rmmio + offset));
 354}
 355
 356static uint32_t read_register(struct kgd_dev *kgd, uint32_t offset)
 357{
 358        struct radeon_device *rdev = get_radeon_device(kgd);
 359
 360        return readl((void __iomem *)(rdev->rmmio + offset));
 361}
 362
 363static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
 364                        uint32_t queue, uint32_t vmid)
 365{
 366        struct radeon_device *rdev = get_radeon_device(kgd);
 367        uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
 368
 369        mutex_lock(&rdev->srbm_mutex);
 370        write_register(kgd, SRBM_GFX_CNTL, value);
 371}
 372
 373static void unlock_srbm(struct kgd_dev *kgd)
 374{
 375        struct radeon_device *rdev = get_radeon_device(kgd);
 376
 377        write_register(kgd, SRBM_GFX_CNTL, 0);
 378        mutex_unlock(&rdev->srbm_mutex);
 379}
 380
 381static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
 382                                uint32_t queue_id)
 383{
 384        uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
 385        uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
 386
 387        lock_srbm(kgd, mec, pipe, queue_id, 0);
 388}
 389
 390static void release_queue(struct kgd_dev *kgd)
 391{
 392        unlock_srbm(kgd);
 393}
 394
 395static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
 396                                        uint32_t sh_mem_config,
 397                                        uint32_t sh_mem_ape1_base,
 398                                        uint32_t sh_mem_ape1_limit,
 399                                        uint32_t sh_mem_bases)
 400{
 401        lock_srbm(kgd, 0, 0, 0, vmid);
 402
 403        write_register(kgd, SH_MEM_CONFIG, sh_mem_config);
 404        write_register(kgd, SH_MEM_APE1_BASE, sh_mem_ape1_base);
 405        write_register(kgd, SH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
 406        write_register(kgd, SH_MEM_BASES, sh_mem_bases);
 407
 408        unlock_srbm(kgd);
 409}
 410
 411static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 412                                        unsigned int vmid)
 413{
 414        /*
 415         * We have to assume that there is no outstanding mapping.
 416         * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0
 417         * because a mapping is in progress or because a mapping finished and
 418         * the SW cleared it.
 419         * So the protocol is to always wait & clear.
 420         */
 421        uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
 422                                        ATC_VMID_PASID_MAPPING_VALID_MASK;
 423
 424        write_register(kgd, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t),
 425                        pasid_mapping);
 426
 427        while (!(read_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS) &
 428                                                                (1U << vmid)))
 429                cpu_relax();
 430        write_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
 431
 432        /* Mapping vmid to pasid also for IH block */
 433        write_register(kgd, IH_VMID_0_LUT + vmid * sizeof(uint32_t),
 434                        pasid_mapping);
 435
 436        return 0;
 437}
 438
 439static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 440                                uint32_t hpd_size, uint64_t hpd_gpu_addr)
 441{
 442        /* nothing to do here */
 443        return 0;
 444}
 445
 446static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 447{
 448        uint32_t mec;
 449        uint32_t pipe;
 450
 451        mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
 452        pipe = (pipe_id % CIK_PIPE_PER_MEC);
 453
 454        lock_srbm(kgd, mec, pipe, 0, 0);
 455
 456        write_register(kgd, CPC_INT_CNTL,
 457                        TIME_STAMP_INT_ENABLE | OPCODE_ERROR_INT_ENABLE);
 458
 459        unlock_srbm(kgd);
 460
 461        return 0;
 462}
 463
 464static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
 465{
 466        uint32_t retval;
 467
 468        retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
 469                        m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
 470
 471        pr_debug("kfd: sdma base address: 0x%x\n", retval);
 472
 473        return retval;
 474}
 475
 476static inline struct cik_mqd *get_mqd(void *mqd)
 477{
 478        return (struct cik_mqd *)mqd;
 479}
 480
 481static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
 482{
 483        return (struct cik_sdma_rlc_registers *)mqd;
 484}
 485
 486static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 487                        uint32_t queue_id, uint32_t __user *wptr,
 488                        uint32_t wptr_shift, uint32_t wptr_mask,
 489                        struct mm_struct *mm)
 490{
 491        uint32_t wptr_shadow, is_wptr_shadow_valid;
 492        struct cik_mqd *m;
 493
 494        m = get_mqd(mqd);
 495
 496        is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
 497
 498        acquire_queue(kgd, pipe_id, queue_id);
 499        write_register(kgd, CP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
 500        write_register(kgd, CP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
 501        write_register(kgd, CP_MQD_CONTROL, m->cp_mqd_control);
 502
 503        write_register(kgd, CP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
 504        write_register(kgd, CP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
 505        write_register(kgd, CP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
 506
 507        write_register(kgd, CP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
 508        write_register(kgd, CP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo);
 509        write_register(kgd, CP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi);
 510
 511        write_register(kgd, CP_HQD_IB_RPTR, m->cp_hqd_ib_rptr);
 512
 513        write_register(kgd, CP_HQD_PERSISTENT_STATE,
 514                        m->cp_hqd_persistent_state);
 515        write_register(kgd, CP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd);
 516        write_register(kgd, CP_HQD_MSG_TYPE, m->cp_hqd_msg_type);
 517
 518        write_register(kgd, CP_HQD_ATOMIC0_PREOP_LO,
 519                        m->cp_hqd_atomic0_preop_lo);
 520
 521        write_register(kgd, CP_HQD_ATOMIC0_PREOP_HI,
 522                        m->cp_hqd_atomic0_preop_hi);
 523
 524        write_register(kgd, CP_HQD_ATOMIC1_PREOP_LO,
 525                        m->cp_hqd_atomic1_preop_lo);
 526
 527        write_register(kgd, CP_HQD_ATOMIC1_PREOP_HI,
 528                        m->cp_hqd_atomic1_preop_hi);
 529
 530        write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR,
 531                        m->cp_hqd_pq_rptr_report_addr_lo);
 532
 533        write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
 534                        m->cp_hqd_pq_rptr_report_addr_hi);
 535
 536        write_register(kgd, CP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
 537
 538        write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR,
 539                        m->cp_hqd_pq_wptr_poll_addr_lo);
 540
 541        write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR_HI,
 542                        m->cp_hqd_pq_wptr_poll_addr_hi);
 543
 544        write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL,
 545                        m->cp_hqd_pq_doorbell_control);
 546
 547        write_register(kgd, CP_HQD_VMID, m->cp_hqd_vmid);
 548
 549        write_register(kgd, CP_HQD_QUANTUM, m->cp_hqd_quantum);
 550
 551        write_register(kgd, CP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
 552        write_register(kgd, CP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
 553
 554        write_register(kgd, CP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr);
 555
 556        if (is_wptr_shadow_valid)
 557                write_register(kgd, CP_HQD_PQ_WPTR, wptr_shadow);
 558
 559        write_register(kgd, CP_HQD_ACTIVE, m->cp_hqd_active);
 560        release_queue(kgd);
 561
 562        return 0;
 563}
 564
 565static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
 566{
 567        struct cik_sdma_rlc_registers *m;
 568        uint32_t sdma_base_addr;
 569
 570        m = get_sdma_mqd(mqd);
 571        sdma_base_addr = get_sdma_base_addr(m);
 572
 573        write_register(kgd,
 574                        sdma_base_addr + SDMA0_RLC0_VIRTUAL_ADDR,
 575                        m->sdma_rlc_virtual_addr);
 576
 577        write_register(kgd,
 578                        sdma_base_addr + SDMA0_RLC0_RB_BASE,
 579                        m->sdma_rlc_rb_base);
 580
 581        write_register(kgd,
 582                        sdma_base_addr + SDMA0_RLC0_RB_BASE_HI,
 583                        m->sdma_rlc_rb_base_hi);
 584
 585        write_register(kgd,
 586                        sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_LO,
 587                        m->sdma_rlc_rb_rptr_addr_lo);
 588
 589        write_register(kgd,
 590                        sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_HI,
 591                        m->sdma_rlc_rb_rptr_addr_hi);
 592
 593        write_register(kgd,
 594                        sdma_base_addr + SDMA0_RLC0_DOORBELL,
 595                        m->sdma_rlc_doorbell);
 596
 597        write_register(kgd,
 598                        sdma_base_addr + SDMA0_RLC0_RB_CNTL,
 599                        m->sdma_rlc_rb_cntl);
 600
 601        return 0;
 602}
 603
 604static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
 605                                uint32_t pipe_id, uint32_t queue_id)
 606{
 607        uint32_t act;
 608        bool retval = false;
 609        uint32_t low, high;
 610
 611        acquire_queue(kgd, pipe_id, queue_id);
 612        act = read_register(kgd, CP_HQD_ACTIVE);
 613        if (act) {
 614                low = lower_32_bits(queue_address >> 8);
 615                high = upper_32_bits(queue_address >> 8);
 616
 617                if (low == read_register(kgd, CP_HQD_PQ_BASE) &&
 618                                high == read_register(kgd, CP_HQD_PQ_BASE_HI))
 619                        retval = true;
 620        }
 621        release_queue(kgd);
 622        return retval;
 623}
 624
 625static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
 626{
 627        struct cik_sdma_rlc_registers *m;
 628        uint32_t sdma_base_addr;
 629        uint32_t sdma_rlc_rb_cntl;
 630
 631        m = get_sdma_mqd(mqd);
 632        sdma_base_addr = get_sdma_base_addr(m);
 633
 634        sdma_rlc_rb_cntl = read_register(kgd,
 635                                        sdma_base_addr + SDMA0_RLC0_RB_CNTL);
 636
 637        if (sdma_rlc_rb_cntl & SDMA_RB_ENABLE)
 638                return true;
 639
 640        return false;
 641}
 642
 643static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type,
 644                                unsigned int timeout, uint32_t pipe_id,
 645                                uint32_t queue_id)
 646{
 647        uint32_t temp;
 648
 649        acquire_queue(kgd, pipe_id, queue_id);
 650        write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL, 0);
 651
 652        write_register(kgd, CP_HQD_DEQUEUE_REQUEST, reset_type);
 653
 654        while (true) {
 655                temp = read_register(kgd, CP_HQD_ACTIVE);
 656                if (temp & 0x1)
 657                        break;
 658                if (timeout == 0) {
 659                        pr_err("kfd: cp queue preemption time out (%dms)\n",
 660                                temp);
 661                        release_queue(kgd);
 662                        return -ETIME;
 663                }
 664                msleep(20);
 665                timeout -= 20;
 666        }
 667
 668        release_queue(kgd);
 669        return 0;
 670}
 671
 672static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 673                                unsigned int timeout)
 674{
 675        struct cik_sdma_rlc_registers *m;
 676        uint32_t sdma_base_addr;
 677        uint32_t temp;
 678
 679        m = get_sdma_mqd(mqd);
 680        sdma_base_addr = get_sdma_base_addr(m);
 681
 682        temp = read_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL);
 683        temp = temp & ~SDMA_RB_ENABLE;
 684        write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL, temp);
 685
 686        while (true) {
 687                temp = read_register(kgd, sdma_base_addr +
 688                                                SDMA0_RLC0_CONTEXT_STATUS);
 689                if (temp & SDMA_RLC_IDLE)
 690                        break;
 691                if (timeout == 0)
 692                        return -ETIME;
 693                msleep(20);
 694                timeout -= 20;
 695        }
 696
 697        write_register(kgd, sdma_base_addr + SDMA0_RLC0_DOORBELL, 0);
 698        write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_RPTR, 0);
 699        write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_WPTR, 0);
 700        write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_BASE, 0);
 701
 702        return 0;
 703}
 704
 705static int kgd_address_watch_disable(struct kgd_dev *kgd)
 706{
 707        union TCP_WATCH_CNTL_BITS cntl;
 708        unsigned int i;
 709
 710        cntl.u32All = 0;
 711
 712        cntl.bitfields.valid = 0;
 713        cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
 714        cntl.bitfields.atc = 1;
 715
 716        /* Turning off this address until we set all the registers */
 717        for (i = 0; i < MAX_WATCH_ADDRESSES; i++)
 718                write_register(kgd,
 719                                watchRegs[i * ADDRESS_WATCH_REG_MAX +
 720                                        ADDRESS_WATCH_REG_CNTL],
 721                                cntl.u32All);
 722
 723        return 0;
 724}
 725
 726static int kgd_address_watch_execute(struct kgd_dev *kgd,
 727                                        unsigned int watch_point_id,
 728                                        uint32_t cntl_val,
 729                                        uint32_t addr_hi,
 730                                        uint32_t addr_lo)
 731{
 732        union TCP_WATCH_CNTL_BITS cntl;
 733
 734        cntl.u32All = cntl_val;
 735
 736        /* Turning off this watch point until we set all the registers */
 737        cntl.bitfields.valid = 0;
 738        write_register(kgd,
 739                        watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 740                                ADDRESS_WATCH_REG_CNTL],
 741                        cntl.u32All);
 742
 743        write_register(kgd,
 744                        watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 745                                ADDRESS_WATCH_REG_ADDR_HI],
 746                        addr_hi);
 747
 748        write_register(kgd,
 749                        watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 750                                ADDRESS_WATCH_REG_ADDR_LO],
 751                        addr_lo);
 752
 753        /* Enable the watch point */
 754        cntl.bitfields.valid = 1;
 755
 756        write_register(kgd,
 757                        watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 758                                ADDRESS_WATCH_REG_CNTL],
 759                        cntl.u32All);
 760
 761        return 0;
 762}
 763
 764static int kgd_wave_control_execute(struct kgd_dev *kgd,
 765                                        uint32_t gfx_index_val,
 766                                        uint32_t sq_cmd)
 767{
 768        struct radeon_device *rdev = get_radeon_device(kgd);
 769        uint32_t data;
 770
 771        mutex_lock(&rdev->grbm_idx_mutex);
 772
 773        write_register(kgd, GRBM_GFX_INDEX, gfx_index_val);
 774        write_register(kgd, SQ_CMD, sq_cmd);
 775
 776        /*  Restore the GRBM_GFX_INDEX register  */
 777
 778        data = INSTANCE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
 779                SE_BROADCAST_WRITES;
 780
 781        write_register(kgd, GRBM_GFX_INDEX, data);
 782
 783        mutex_unlock(&rdev->grbm_idx_mutex);
 784
 785        return 0;
 786}
 787
 788static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 789                                        unsigned int watch_point_id,
 790                                        unsigned int reg_offset)
 791{
 792        return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]
 793                / 4;
 794}
 795
 796static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid)
 797{
 798        uint32_t reg;
 799        struct radeon_device *rdev = (struct radeon_device *) kgd;
 800
 801        reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4);
 802        return reg & ATC_VMID_PASID_MAPPING_VALID_MASK;
 803}
 804
 805static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 806                                                        uint8_t vmid)
 807{
 808        uint32_t reg;
 809        struct radeon_device *rdev = (struct radeon_device *) kgd;
 810
 811        reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4);
 812        return reg & ATC_VMID_PASID_MAPPING_PASID_MASK;
 813}
 814
 815static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
 816{
 817        struct radeon_device *rdev = (struct radeon_device *) kgd;
 818
 819        return WREG32(VM_INVALIDATE_REQUEST, 1 << vmid);
 820}
 821
 822static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 823{
 824        struct radeon_device *rdev = (struct radeon_device *) kgd;
 825        const union radeon_firmware_header *hdr;
 826
 827        BUG_ON(kgd == NULL || rdev->mec_fw == NULL);
 828
 829        switch (type) {
 830        case KGD_ENGINE_PFP:
 831                hdr = (const union radeon_firmware_header *) rdev->pfp_fw->data;
 832                break;
 833
 834        case KGD_ENGINE_ME:
 835                hdr = (const union radeon_firmware_header *) rdev->me_fw->data;
 836                break;
 837
 838        case KGD_ENGINE_CE:
 839                hdr = (const union radeon_firmware_header *) rdev->ce_fw->data;
 840                break;
 841
 842        case KGD_ENGINE_MEC1:
 843                hdr = (const union radeon_firmware_header *) rdev->mec_fw->data;
 844                break;
 845
 846        case KGD_ENGINE_MEC2:
 847                hdr = (const union radeon_firmware_header *)
 848                                                        rdev->mec2_fw->data;
 849                break;
 850
 851        case KGD_ENGINE_RLC:
 852                hdr = (const union radeon_firmware_header *) rdev->rlc_fw->data;
 853                break;
 854
 855        case KGD_ENGINE_SDMA1:
 856        case KGD_ENGINE_SDMA2:
 857                hdr = (const union radeon_firmware_header *)
 858                                                        rdev->sdma_fw->data;
 859                break;
 860
 861        default:
 862                return 0;
 863        }
 864
 865        if (hdr == NULL)
 866                return 0;
 867
 868        /* Only 12 bit in use*/
 869        return hdr->common.ucode_version;
 870}
 871