linux/drivers/gpu/drm/radeon/radeon_kfd.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22
  23#include <linux/module.h>
  24#include <linux/fdtable.h>
  25#include <linux/uaccess.h>
  26#include <drm/drmP.h>
  27#include "radeon.h"
  28#include "cikd.h"
  29#include "cik_reg.h"
  30#include "radeon_kfd.h"
  31#include "radeon_ucode.h"
  32#include <linux/firmware.h>
  33#include "cik_structs.h"
  34
  35#define CIK_PIPE_PER_MEC        (4)
  36
  37static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = {
  38        TCP_WATCH0_ADDR_H, TCP_WATCH0_ADDR_L, TCP_WATCH0_CNTL,
  39        TCP_WATCH1_ADDR_H, TCP_WATCH1_ADDR_L, TCP_WATCH1_CNTL,
  40        TCP_WATCH2_ADDR_H, TCP_WATCH2_ADDR_L, TCP_WATCH2_CNTL,
  41        TCP_WATCH3_ADDR_H, TCP_WATCH3_ADDR_L, TCP_WATCH3_CNTL
  42};
  43
  44struct kgd_mem {
  45        struct radeon_bo *bo;
  46        uint64_t gpu_addr;
  47        void *cpu_ptr;
  48};
  49
  50
  51static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
  52                        void **mem_obj, uint64_t *gpu_addr,
  53                        void **cpu_ptr);
  54
  55static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
  56
  57static uint64_t get_vmem_size(struct kgd_dev *kgd);
  58static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
  59
  60static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
  61static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
  62
  63/*
  64 * Register access functions
  65 */
  66
  67static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
  68                uint32_t sh_mem_config, uint32_t sh_mem_ape1_base,
  69                uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
  70
  71static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
  72                                        unsigned int vmid);
  73
  74static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
  75                                uint32_t hpd_size, uint64_t hpd_gpu_addr);
  76static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
  77static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
  78                        uint32_t queue_id, uint32_t __user *wptr);
  79static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
  80static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
  81                                uint32_t pipe_id, uint32_t queue_id);
  82
  83static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
  84                                unsigned int timeout, uint32_t pipe_id,
  85                                uint32_t queue_id);
  86static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
  87static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
  88                                unsigned int timeout);
  89static int kgd_address_watch_disable(struct kgd_dev *kgd);
  90static int kgd_address_watch_execute(struct kgd_dev *kgd,
  91                                        unsigned int watch_point_id,
  92                                        uint32_t cntl_val,
  93                                        uint32_t addr_hi,
  94                                        uint32_t addr_lo);
  95static int kgd_wave_control_execute(struct kgd_dev *kgd,
  96                                        uint32_t gfx_index_val,
  97                                        uint32_t sq_cmd);
  98static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
  99                                        unsigned int watch_point_id,
 100                                        unsigned int reg_offset);
 101
 102static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
 103static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 104                                                        uint8_t vmid);
 105static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
 106
 107static const struct kfd2kgd_calls kfd2kgd = {
 108        .init_gtt_mem_allocation = alloc_gtt_mem,
 109        .free_gtt_mem = free_gtt_mem,
 110        .get_vmem_size = get_vmem_size,
 111        .get_gpu_clock_counter = get_gpu_clock_counter,
 112        .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
 113        .program_sh_mem_settings = kgd_program_sh_mem_settings,
 114        .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
 115        .init_pipeline = kgd_init_pipeline,
 116        .init_interrupts = kgd_init_interrupts,
 117        .hqd_load = kgd_hqd_load,
 118        .hqd_sdma_load = kgd_hqd_sdma_load,
 119        .hqd_is_occupied = kgd_hqd_is_occupied,
 120        .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
 121        .hqd_destroy = kgd_hqd_destroy,
 122        .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
 123        .address_watch_disable = kgd_address_watch_disable,
 124        .address_watch_execute = kgd_address_watch_execute,
 125        .wave_control_execute = kgd_wave_control_execute,
 126        .address_watch_get_offset = kgd_address_watch_get_offset,
 127        .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
 128        .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
 129        .write_vmid_invalidate_request = write_vmid_invalidate_request,
 130        .get_fw_version = get_fw_version
 131};
 132
 133static const struct kgd2kfd_calls *kgd2kfd;
 134
 135int radeon_kfd_init(void)
 136{
 137        int ret;
 138
 139#if defined(CONFIG_HSA_AMD_MODULE)
 140        int (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
 141
 142        kgd2kfd_init_p = symbol_request(kgd2kfd_init);
 143
 144        if (kgd2kfd_init_p == NULL)
 145                return -ENOENT;
 146
 147        ret = kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd);
 148        if (ret) {
 149                symbol_put(kgd2kfd_init);
 150                kgd2kfd = NULL;
 151        }
 152
 153#elif defined(CONFIG_HSA_AMD)
 154        ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
 155        if (ret)
 156                kgd2kfd = NULL;
 157
 158#else
 159        ret = -ENOENT;
 160#endif
 161
 162        return ret;
 163}
 164
 165void radeon_kfd_fini(void)
 166{
 167        if (kgd2kfd) {
 168                kgd2kfd->exit();
 169                symbol_put(kgd2kfd_init);
 170        }
 171}
 172
 173void radeon_kfd_device_probe(struct radeon_device *rdev)
 174{
 175        if (kgd2kfd)
 176                rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev,
 177                        rdev->pdev, &kfd2kgd);
 178}
 179
 180void radeon_kfd_device_init(struct radeon_device *rdev)
 181{
 182        if (rdev->kfd) {
 183                struct kgd2kfd_shared_resources gpu_resources = {
 184                        .compute_vmid_bitmap = 0xFF00,
 185
 186                        .first_compute_pipe = 1,
 187                        .compute_pipe_count = 4 - 1,
 188                };
 189
 190                radeon_doorbell_get_kfd_info(rdev,
 191                                &gpu_resources.doorbell_physical_address,
 192                                &gpu_resources.doorbell_aperture_size,
 193                                &gpu_resources.doorbell_start_offset);
 194
 195                kgd2kfd->device_init(rdev->kfd, &gpu_resources);
 196        }
 197}
 198
 199void radeon_kfd_device_fini(struct radeon_device *rdev)
 200{
 201        if (rdev->kfd) {
 202                kgd2kfd->device_exit(rdev->kfd);
 203                rdev->kfd = NULL;
 204        }
 205}
 206
 207void radeon_kfd_interrupt(struct radeon_device *rdev, const void *ih_ring_entry)
 208{
 209        if (rdev->kfd)
 210                kgd2kfd->interrupt(rdev->kfd, ih_ring_entry);
 211}
 212
 213void radeon_kfd_suspend(struct radeon_device *rdev)
 214{
 215        if (rdev->kfd)
 216                kgd2kfd->suspend(rdev->kfd);
 217}
 218
 219int radeon_kfd_resume(struct radeon_device *rdev)
 220{
 221        int r = 0;
 222
 223        if (rdev->kfd)
 224                r = kgd2kfd->resume(rdev->kfd);
 225
 226        return r;
 227}
 228
 229static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 230                        void **mem_obj, uint64_t *gpu_addr,
 231                        void **cpu_ptr)
 232{
 233        struct radeon_device *rdev = (struct radeon_device *)kgd;
 234        struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
 235        int r;
 236
 237        BUG_ON(kgd == NULL);
 238        BUG_ON(gpu_addr == NULL);
 239        BUG_ON(cpu_ptr == NULL);
 240
 241        *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
 242        if ((*mem) == NULL)
 243                return -ENOMEM;
 244
 245        r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT,
 246                                RADEON_GEM_GTT_WC, NULL, NULL, &(*mem)->bo);
 247        if (r) {
 248                dev_err(rdev->dev,
 249                        "failed to allocate BO for amdkfd (%d)\n", r);
 250                return r;
 251        }
 252
 253        /* map the buffer */
 254        r = radeon_bo_reserve((*mem)->bo, true);
 255        if (r) {
 256                dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
 257                goto allocate_mem_reserve_bo_failed;
 258        }
 259
 260        r = radeon_bo_pin((*mem)->bo, RADEON_GEM_DOMAIN_GTT,
 261                                &(*mem)->gpu_addr);
 262        if (r) {
 263                dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r);
 264                goto allocate_mem_pin_bo_failed;
 265        }
 266        *gpu_addr = (*mem)->gpu_addr;
 267
 268        r = radeon_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
 269        if (r) {
 270                dev_err(rdev->dev,
 271                        "(%d) failed to map bo to kernel for amdkfd\n", r);
 272                goto allocate_mem_kmap_bo_failed;
 273        }
 274        *cpu_ptr = (*mem)->cpu_ptr;
 275
 276        radeon_bo_unreserve((*mem)->bo);
 277
 278        return 0;
 279
 280allocate_mem_kmap_bo_failed:
 281        radeon_bo_unpin((*mem)->bo);
 282allocate_mem_pin_bo_failed:
 283        radeon_bo_unreserve((*mem)->bo);
 284allocate_mem_reserve_bo_failed:
 285        radeon_bo_unref(&(*mem)->bo);
 286
 287        return r;
 288}
 289
 290static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
 291{
 292        struct kgd_mem *mem = (struct kgd_mem *) mem_obj;
 293
 294        BUG_ON(mem == NULL);
 295
 296        radeon_bo_reserve(mem->bo, true);
 297        radeon_bo_kunmap(mem->bo);
 298        radeon_bo_unpin(mem->bo);
 299        radeon_bo_unreserve(mem->bo);
 300        radeon_bo_unref(&(mem->bo));
 301        kfree(mem);
 302}
 303
 304static uint64_t get_vmem_size(struct kgd_dev *kgd)
 305{
 306        struct radeon_device *rdev = (struct radeon_device *)kgd;
 307
 308        BUG_ON(kgd == NULL);
 309
 310        return rdev->mc.real_vram_size;
 311}
 312
 313static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
 314{
 315        struct radeon_device *rdev = (struct radeon_device *)kgd;
 316
 317        return rdev->asic->get_gpu_clock_counter(rdev);
 318}
 319
 320static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
 321{
 322        struct radeon_device *rdev = (struct radeon_device *)kgd;
 323
 324        /* The sclk is in quantas of 10kHz */
 325        return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
 326}
 327
 328static inline struct radeon_device *get_radeon_device(struct kgd_dev *kgd)
 329{
 330        return (struct radeon_device *)kgd;
 331}
 332
 333static void write_register(struct kgd_dev *kgd, uint32_t offset, uint32_t value)
 334{
 335        struct radeon_device *rdev = get_radeon_device(kgd);
 336
 337        writel(value, (void __iomem *)(rdev->rmmio + offset));
 338}
 339
 340static uint32_t read_register(struct kgd_dev *kgd, uint32_t offset)
 341{
 342        struct radeon_device *rdev = get_radeon_device(kgd);
 343
 344        return readl((void __iomem *)(rdev->rmmio + offset));
 345}
 346
 347static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
 348                        uint32_t queue, uint32_t vmid)
 349{
 350        struct radeon_device *rdev = get_radeon_device(kgd);
 351        uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
 352
 353        mutex_lock(&rdev->srbm_mutex);
 354        write_register(kgd, SRBM_GFX_CNTL, value);
 355}
 356
 357static void unlock_srbm(struct kgd_dev *kgd)
 358{
 359        struct radeon_device *rdev = get_radeon_device(kgd);
 360
 361        write_register(kgd, SRBM_GFX_CNTL, 0);
 362        mutex_unlock(&rdev->srbm_mutex);
 363}
 364
 365static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
 366                                uint32_t queue_id)
 367{
 368        uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
 369        uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
 370
 371        lock_srbm(kgd, mec, pipe, queue_id, 0);
 372}
 373
 374static void release_queue(struct kgd_dev *kgd)
 375{
 376        unlock_srbm(kgd);
 377}
 378
 379static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
 380                                        uint32_t sh_mem_config,
 381                                        uint32_t sh_mem_ape1_base,
 382                                        uint32_t sh_mem_ape1_limit,
 383                                        uint32_t sh_mem_bases)
 384{
 385        lock_srbm(kgd, 0, 0, 0, vmid);
 386
 387        write_register(kgd, SH_MEM_CONFIG, sh_mem_config);
 388        write_register(kgd, SH_MEM_APE1_BASE, sh_mem_ape1_base);
 389        write_register(kgd, SH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
 390        write_register(kgd, SH_MEM_BASES, sh_mem_bases);
 391
 392        unlock_srbm(kgd);
 393}
 394
 395static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 396                                        unsigned int vmid)
 397{
 398        /*
 399         * We have to assume that there is no outstanding mapping.
 400         * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0
 401         * because a mapping is in progress or because a mapping finished and
 402         * the SW cleared it.
 403         * So the protocol is to always wait & clear.
 404         */
 405        uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
 406                                        ATC_VMID_PASID_MAPPING_VALID_MASK;
 407
 408        write_register(kgd, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t),
 409                        pasid_mapping);
 410
 411        while (!(read_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS) &
 412                                                                (1U << vmid)))
 413                cpu_relax();
 414        write_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
 415
 416        /* Mapping vmid to pasid also for IH block */
 417        write_register(kgd, IH_VMID_0_LUT + vmid * sizeof(uint32_t),
 418                        pasid_mapping);
 419
 420        return 0;
 421}
 422
 423static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 424                                uint32_t hpd_size, uint64_t hpd_gpu_addr)
 425{
 426        uint32_t mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
 427        uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
 428
 429        lock_srbm(kgd, mec, pipe, 0, 0);
 430        write_register(kgd, CP_HPD_EOP_BASE_ADDR,
 431                        lower_32_bits(hpd_gpu_addr >> 8));
 432        write_register(kgd, CP_HPD_EOP_BASE_ADDR_HI,
 433                        upper_32_bits(hpd_gpu_addr >> 8));
 434        write_register(kgd, CP_HPD_EOP_VMID, 0);
 435        write_register(kgd, CP_HPD_EOP_CONTROL, hpd_size);
 436        unlock_srbm(kgd);
 437
 438        return 0;
 439}
 440
 441static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 442{
 443        uint32_t mec;
 444        uint32_t pipe;
 445
 446        mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
 447        pipe = (pipe_id % CIK_PIPE_PER_MEC);
 448
 449        lock_srbm(kgd, mec, pipe, 0, 0);
 450
 451        write_register(kgd, CPC_INT_CNTL,
 452                        TIME_STAMP_INT_ENABLE | OPCODE_ERROR_INT_ENABLE);
 453
 454        unlock_srbm(kgd);
 455
 456        return 0;
 457}
 458
 459static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
 460{
 461        uint32_t retval;
 462
 463        retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
 464                        m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
 465
 466        pr_debug("kfd: sdma base address: 0x%x\n", retval);
 467
 468        return retval;
 469}
 470
 471static inline struct cik_mqd *get_mqd(void *mqd)
 472{
 473        return (struct cik_mqd *)mqd;
 474}
 475
 476static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
 477{
 478        return (struct cik_sdma_rlc_registers *)mqd;
 479}
 480
 481static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 482                        uint32_t queue_id, uint32_t __user *wptr)
 483{
 484        uint32_t wptr_shadow, is_wptr_shadow_valid;
 485        struct cik_mqd *m;
 486
 487        m = get_mqd(mqd);
 488
 489        is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
 490
 491        acquire_queue(kgd, pipe_id, queue_id);
 492        write_register(kgd, CP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
 493        write_register(kgd, CP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
 494        write_register(kgd, CP_MQD_CONTROL, m->cp_mqd_control);
 495
 496        write_register(kgd, CP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
 497        write_register(kgd, CP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
 498        write_register(kgd, CP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
 499
 500        write_register(kgd, CP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
 501        write_register(kgd, CP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo);
 502        write_register(kgd, CP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi);
 503
 504        write_register(kgd, CP_HQD_IB_RPTR, m->cp_hqd_ib_rptr);
 505
 506        write_register(kgd, CP_HQD_PERSISTENT_STATE,
 507                        m->cp_hqd_persistent_state);
 508        write_register(kgd, CP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd);
 509        write_register(kgd, CP_HQD_MSG_TYPE, m->cp_hqd_msg_type);
 510
 511        write_register(kgd, CP_HQD_ATOMIC0_PREOP_LO,
 512                        m->cp_hqd_atomic0_preop_lo);
 513
 514        write_register(kgd, CP_HQD_ATOMIC0_PREOP_HI,
 515                        m->cp_hqd_atomic0_preop_hi);
 516
 517        write_register(kgd, CP_HQD_ATOMIC1_PREOP_LO,
 518                        m->cp_hqd_atomic1_preop_lo);
 519
 520        write_register(kgd, CP_HQD_ATOMIC1_PREOP_HI,
 521                        m->cp_hqd_atomic1_preop_hi);
 522
 523        write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR,
 524                        m->cp_hqd_pq_rptr_report_addr_lo);
 525
 526        write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
 527                        m->cp_hqd_pq_rptr_report_addr_hi);
 528
 529        write_register(kgd, CP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
 530
 531        write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR,
 532                        m->cp_hqd_pq_wptr_poll_addr_lo);
 533
 534        write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR_HI,
 535                        m->cp_hqd_pq_wptr_poll_addr_hi);
 536
 537        write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL,
 538                        m->cp_hqd_pq_doorbell_control);
 539
 540        write_register(kgd, CP_HQD_VMID, m->cp_hqd_vmid);
 541
 542        write_register(kgd, CP_HQD_QUANTUM, m->cp_hqd_quantum);
 543
 544        write_register(kgd, CP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
 545        write_register(kgd, CP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
 546
 547        write_register(kgd, CP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr);
 548
 549        if (is_wptr_shadow_valid)
 550                write_register(kgd, CP_HQD_PQ_WPTR, wptr_shadow);
 551
 552        write_register(kgd, CP_HQD_ACTIVE, m->cp_hqd_active);
 553        release_queue(kgd);
 554
 555        return 0;
 556}
 557
 558static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
 559{
 560        struct cik_sdma_rlc_registers *m;
 561        uint32_t sdma_base_addr;
 562
 563        m = get_sdma_mqd(mqd);
 564        sdma_base_addr = get_sdma_base_addr(m);
 565
 566        write_register(kgd,
 567                        sdma_base_addr + SDMA0_RLC0_VIRTUAL_ADDR,
 568                        m->sdma_rlc_virtual_addr);
 569
 570        write_register(kgd,
 571                        sdma_base_addr + SDMA0_RLC0_RB_BASE,
 572                        m->sdma_rlc_rb_base);
 573
 574        write_register(kgd,
 575                        sdma_base_addr + SDMA0_RLC0_RB_BASE_HI,
 576                        m->sdma_rlc_rb_base_hi);
 577
 578        write_register(kgd,
 579                        sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_LO,
 580                        m->sdma_rlc_rb_rptr_addr_lo);
 581
 582        write_register(kgd,
 583                        sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_HI,
 584                        m->sdma_rlc_rb_rptr_addr_hi);
 585
 586        write_register(kgd,
 587                        sdma_base_addr + SDMA0_RLC0_DOORBELL,
 588                        m->sdma_rlc_doorbell);
 589
 590        write_register(kgd,
 591                        sdma_base_addr + SDMA0_RLC0_RB_CNTL,
 592                        m->sdma_rlc_rb_cntl);
 593
 594        return 0;
 595}
 596
 597static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
 598                                uint32_t pipe_id, uint32_t queue_id)
 599{
 600        uint32_t act;
 601        bool retval = false;
 602        uint32_t low, high;
 603
 604        acquire_queue(kgd, pipe_id, queue_id);
 605        act = read_register(kgd, CP_HQD_ACTIVE);
 606        if (act) {
 607                low = lower_32_bits(queue_address >> 8);
 608                high = upper_32_bits(queue_address >> 8);
 609
 610                if (low == read_register(kgd, CP_HQD_PQ_BASE) &&
 611                                high == read_register(kgd, CP_HQD_PQ_BASE_HI))
 612                        retval = true;
 613        }
 614        release_queue(kgd);
 615        return retval;
 616}
 617
 618static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
 619{
 620        struct cik_sdma_rlc_registers *m;
 621        uint32_t sdma_base_addr;
 622        uint32_t sdma_rlc_rb_cntl;
 623
 624        m = get_sdma_mqd(mqd);
 625        sdma_base_addr = get_sdma_base_addr(m);
 626
 627        sdma_rlc_rb_cntl = read_register(kgd,
 628                                        sdma_base_addr + SDMA0_RLC0_RB_CNTL);
 629
 630        if (sdma_rlc_rb_cntl & SDMA_RB_ENABLE)
 631                return true;
 632
 633        return false;
 634}
 635
 636static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
 637                                unsigned int timeout, uint32_t pipe_id,
 638                                uint32_t queue_id)
 639{
 640        uint32_t temp;
 641
 642        acquire_queue(kgd, pipe_id, queue_id);
 643        write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL, 0);
 644
 645        write_register(kgd, CP_HQD_DEQUEUE_REQUEST, reset_type);
 646
 647        while (true) {
 648                temp = read_register(kgd, CP_HQD_ACTIVE);
 649                if (temp & 0x1)
 650                        break;
 651                if (timeout == 0) {
 652                        pr_err("kfd: cp queue preemption time out (%dms)\n",
 653                                temp);
 654                        release_queue(kgd);
 655                        return -ETIME;
 656                }
 657                msleep(20);
 658                timeout -= 20;
 659        }
 660
 661        release_queue(kgd);
 662        return 0;
 663}
 664
 665static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 666                                unsigned int timeout)
 667{
 668        struct cik_sdma_rlc_registers *m;
 669        uint32_t sdma_base_addr;
 670        uint32_t temp;
 671
 672        m = get_sdma_mqd(mqd);
 673        sdma_base_addr = get_sdma_base_addr(m);
 674
 675        temp = read_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL);
 676        temp = temp & ~SDMA_RB_ENABLE;
 677        write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL, temp);
 678
 679        while (true) {
 680                temp = read_register(kgd, sdma_base_addr +
 681                                                SDMA0_RLC0_CONTEXT_STATUS);
 682                if (temp & SDMA_RLC_IDLE)
 683                        break;
 684                if (timeout == 0)
 685                        return -ETIME;
 686                msleep(20);
 687                timeout -= 20;
 688        }
 689
 690        write_register(kgd, sdma_base_addr + SDMA0_RLC0_DOORBELL, 0);
 691        write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_RPTR, 0);
 692        write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_WPTR, 0);
 693        write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_BASE, 0);
 694
 695        return 0;
 696}
 697
 698static int kgd_address_watch_disable(struct kgd_dev *kgd)
 699{
 700        union TCP_WATCH_CNTL_BITS cntl;
 701        unsigned int i;
 702
 703        cntl.u32All = 0;
 704
 705        cntl.bitfields.valid = 0;
 706        cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
 707        cntl.bitfields.atc = 1;
 708
 709        /* Turning off this address until we set all the registers */
 710        for (i = 0; i < MAX_WATCH_ADDRESSES; i++)
 711                write_register(kgd,
 712                                watchRegs[i * ADDRESS_WATCH_REG_MAX +
 713                                        ADDRESS_WATCH_REG_CNTL],
 714                                cntl.u32All);
 715
 716        return 0;
 717}
 718
 719static int kgd_address_watch_execute(struct kgd_dev *kgd,
 720                                        unsigned int watch_point_id,
 721                                        uint32_t cntl_val,
 722                                        uint32_t addr_hi,
 723                                        uint32_t addr_lo)
 724{
 725        union TCP_WATCH_CNTL_BITS cntl;
 726
 727        cntl.u32All = cntl_val;
 728
 729        /* Turning off this watch point until we set all the registers */
 730        cntl.bitfields.valid = 0;
 731        write_register(kgd,
 732                        watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 733                                ADDRESS_WATCH_REG_CNTL],
 734                        cntl.u32All);
 735
 736        write_register(kgd,
 737                        watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 738                                ADDRESS_WATCH_REG_ADDR_HI],
 739                        addr_hi);
 740
 741        write_register(kgd,
 742                        watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 743                                ADDRESS_WATCH_REG_ADDR_LO],
 744                        addr_lo);
 745
 746        /* Enable the watch point */
 747        cntl.bitfields.valid = 1;
 748
 749        write_register(kgd,
 750                        watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
 751                                ADDRESS_WATCH_REG_CNTL],
 752                        cntl.u32All);
 753
 754        return 0;
 755}
 756
 757static int kgd_wave_control_execute(struct kgd_dev *kgd,
 758                                        uint32_t gfx_index_val,
 759                                        uint32_t sq_cmd)
 760{
 761        struct radeon_device *rdev = get_radeon_device(kgd);
 762        uint32_t data;
 763
 764        mutex_lock(&rdev->grbm_idx_mutex);
 765
 766        write_register(kgd, GRBM_GFX_INDEX, gfx_index_val);
 767        write_register(kgd, SQ_CMD, sq_cmd);
 768
 769        /*  Restore the GRBM_GFX_INDEX register  */
 770
 771        data = INSTANCE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
 772                SE_BROADCAST_WRITES;
 773
 774        write_register(kgd, GRBM_GFX_INDEX, data);
 775
 776        mutex_unlock(&rdev->grbm_idx_mutex);
 777
 778        return 0;
 779}
 780
 781static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 782                                        unsigned int watch_point_id,
 783                                        unsigned int reg_offset)
 784{
 785        return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset];
 786}
 787
 788static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid)
 789{
 790        uint32_t reg;
 791        struct radeon_device *rdev = (struct radeon_device *) kgd;
 792
 793        reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4);
 794        return reg & ATC_VMID_PASID_MAPPING_VALID_MASK;
 795}
 796
 797static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 798                                                        uint8_t vmid)
 799{
 800        uint32_t reg;
 801        struct radeon_device *rdev = (struct radeon_device *) kgd;
 802
 803        reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4);
 804        return reg & ATC_VMID_PASID_MAPPING_PASID_MASK;
 805}
 806
 807static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
 808{
 809        struct radeon_device *rdev = (struct radeon_device *) kgd;
 810
 811        return WREG32(VM_INVALIDATE_REQUEST, 1 << vmid);
 812}
 813
 814static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 815{
 816        struct radeon_device *rdev = (struct radeon_device *) kgd;
 817        const union radeon_firmware_header *hdr;
 818
 819        BUG_ON(kgd == NULL || rdev->mec_fw == NULL);
 820
 821        switch (type) {
 822        case KGD_ENGINE_PFP:
 823                hdr = (const union radeon_firmware_header *) rdev->pfp_fw->data;
 824                break;
 825
 826        case KGD_ENGINE_ME:
 827                hdr = (const union radeon_firmware_header *) rdev->me_fw->data;
 828                break;
 829
 830        case KGD_ENGINE_CE:
 831                hdr = (const union radeon_firmware_header *) rdev->ce_fw->data;
 832                break;
 833
 834        case KGD_ENGINE_MEC1:
 835                hdr = (const union radeon_firmware_header *) rdev->mec_fw->data;
 836                break;
 837
 838        case KGD_ENGINE_MEC2:
 839                hdr = (const union radeon_firmware_header *)
 840                                                        rdev->mec2_fw->data;
 841                break;
 842
 843        case KGD_ENGINE_RLC:
 844                hdr = (const union radeon_firmware_header *) rdev->rlc_fw->data;
 845                break;
 846
 847        case KGD_ENGINE_SDMA1:
 848        case KGD_ENGINE_SDMA2:
 849                hdr = (const union radeon_firmware_header *)
 850                                                        rdev->sdma_fw->data;
 851                break;
 852
 853        default:
 854                return 0;
 855        }
 856
 857        if (hdr == NULL)
 858                return 0;
 859
 860        /* Only 12 bit in use*/
 861        return hdr->common.ucode_version;
 862}
 863