linux/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
<<
>>
Prefs
   1/*
   2 * Copyright 2019 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23
  24#include <linux/firmware.h>
  25#include <linux/module.h>
  26#include "amdgpu.h"
  27#include "soc15_common.h"
  28#include "nv.h"
  29#include "gc/gc_10_1_0_offset.h"
  30#include "gc/gc_10_1_0_sh_mask.h"
  31#include "v10_structs.h"
  32#include "mes_api_def.h"
  33
  34#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid               0x2820
  35#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid_BASE_IDX      1
  36
  37MODULE_FIRMWARE("amdgpu/navi10_mes.bin");
  38MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes.bin");
  39
  40static int mes_v10_1_hw_fini(void *handle);
  41
  42#define MES_EOP_SIZE   2048
  43
  44static void mes_v10_1_ring_set_wptr(struct amdgpu_ring *ring)
  45{
  46        struct amdgpu_device *adev = ring->adev;
  47
  48        if (ring->use_doorbell) {
  49                atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs],
  50                             ring->wptr);
  51                WDOORBELL64(ring->doorbell_index, ring->wptr);
  52        } else {
  53                BUG();
  54        }
  55}
  56
  57static u64 mes_v10_1_ring_get_rptr(struct amdgpu_ring *ring)
  58{
  59        return ring->adev->wb.wb[ring->rptr_offs];
  60}
  61
  62static u64 mes_v10_1_ring_get_wptr(struct amdgpu_ring *ring)
  63{
  64        u64 wptr;
  65
  66        if (ring->use_doorbell)
  67                wptr = atomic64_read((atomic64_t *)
  68                                     &ring->adev->wb.wb[ring->wptr_offs]);
  69        else
  70                BUG();
  71        return wptr;
  72}
  73
  74static const struct amdgpu_ring_funcs mes_v10_1_ring_funcs = {
  75        .type = AMDGPU_RING_TYPE_MES,
  76        .align_mask = 1,
  77        .nop = 0,
  78        .support_64bit_ptrs = true,
  79        .get_rptr = mes_v10_1_ring_get_rptr,
  80        .get_wptr = mes_v10_1_ring_get_wptr,
  81        .set_wptr = mes_v10_1_ring_set_wptr,
  82        .insert_nop = amdgpu_ring_insert_nop,
  83};
  84
  85static int mes_v10_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
  86                                                    void *pkt, int size)
  87{
  88        int ndw = size / 4;
  89        signed long r;
  90        union MESAPI__ADD_QUEUE *x_pkt = pkt;
  91        struct amdgpu_device *adev = mes->adev;
  92        struct amdgpu_ring *ring = &mes->ring;
  93
  94        BUG_ON(size % 4 != 0);
  95
  96        if (amdgpu_ring_alloc(ring, ndw))
  97                return -ENOMEM;
  98
  99        amdgpu_ring_write_multiple(ring, pkt, ndw);
 100        amdgpu_ring_commit(ring);
 101
 102        DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
 103
 104        r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
 105                                      adev->usec_timeout);
 106        if (r < 1) {
 107                DRM_ERROR("MES failed to response msg=%d\n",
 108                          x_pkt->header.opcode);
 109                return -ETIMEDOUT;
 110        }
 111
 112        return 0;
 113}
 114
 115static int convert_to_mes_queue_type(int queue_type)
 116{
 117        if (queue_type == AMDGPU_RING_TYPE_GFX)
 118                return MES_QUEUE_TYPE_GFX;
 119        else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
 120                return MES_QUEUE_TYPE_COMPUTE;
 121        else if (queue_type == AMDGPU_RING_TYPE_SDMA)
 122                return MES_QUEUE_TYPE_SDMA;
 123        else
 124                BUG();
 125        return -1;
 126}
 127
 128static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
 129                                  struct mes_add_queue_input *input)
 130{
 131        struct amdgpu_device *adev = mes->adev;
 132        union MESAPI__ADD_QUEUE mes_add_queue_pkt;
 133
 134        memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
 135
 136        mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
 137        mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
 138        mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
 139
 140        mes_add_queue_pkt.process_id = input->process_id;
 141        mes_add_queue_pkt.page_table_base_addr =
 142                input->page_table_base_addr - adev->gmc.vram_start;
 143        mes_add_queue_pkt.process_va_start = input->process_va_start;
 144        mes_add_queue_pkt.process_va_end = input->process_va_end;
 145        mes_add_queue_pkt.process_quantum = input->process_quantum;
 146        mes_add_queue_pkt.process_context_addr = input->process_context_addr;
 147        mes_add_queue_pkt.gang_quantum = input->gang_quantum;
 148        mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
 149        mes_add_queue_pkt.inprocess_gang_priority =
 150                input->inprocess_gang_priority;
 151        mes_add_queue_pkt.gang_global_priority_level =
 152                input->gang_global_priority_level;
 153        mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
 154        mes_add_queue_pkt.mqd_addr = input->mqd_addr;
 155        mes_add_queue_pkt.wptr_addr = input->wptr_addr;
 156        mes_add_queue_pkt.queue_type =
 157                convert_to_mes_queue_type(input->queue_type);
 158        mes_add_queue_pkt.paging = input->paging;
 159
 160        mes_add_queue_pkt.api_status.api_completion_fence_addr =
 161                mes->ring.fence_drv.gpu_addr;
 162        mes_add_queue_pkt.api_status.api_completion_fence_value =
 163                ++mes->ring.fence_drv.sync_seq;
 164
 165        return mes_v10_1_submit_pkt_and_poll_completion(mes,
 166                        &mes_add_queue_pkt, sizeof(mes_add_queue_pkt));
 167}
 168
 169static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes,
 170                                     struct mes_remove_queue_input *input)
 171{
 172        union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
 173
 174        memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
 175
 176        mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
 177        mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
 178        mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
 179
 180        mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
 181        mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
 182
 183        mes_remove_queue_pkt.api_status.api_completion_fence_addr =
 184                mes->ring.fence_drv.gpu_addr;
 185        mes_remove_queue_pkt.api_status.api_completion_fence_value =
 186                ++mes->ring.fence_drv.sync_seq;
 187
 188        return mes_v10_1_submit_pkt_and_poll_completion(mes,
 189                        &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
 190}
 191
 192static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes,
 193                                  struct mes_suspend_gang_input *input)
 194{
 195        return 0;
 196}
 197
 198static int mes_v10_1_resume_gang(struct amdgpu_mes *mes,
 199                                 struct mes_resume_gang_input *input)
 200{
 201        return 0;
 202}
 203
 204static int mes_v10_1_query_sched_status(struct amdgpu_mes *mes)
 205{
 206        union MESAPI__QUERY_MES_STATUS mes_status_pkt;
 207
 208        memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
 209
 210        mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
 211        mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
 212        mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
 213
 214        mes_status_pkt.api_status.api_completion_fence_addr =
 215                mes->ring.fence_drv.gpu_addr;
 216        mes_status_pkt.api_status.api_completion_fence_value =
 217                ++mes->ring.fence_drv.sync_seq;
 218
 219        return mes_v10_1_submit_pkt_and_poll_completion(mes,
 220                        &mes_status_pkt, sizeof(mes_status_pkt));
 221}
 222
 223static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes)
 224{
 225        int i;
 226        struct amdgpu_device *adev = mes->adev;
 227        union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
 228
 229        memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
 230
 231        mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
 232        mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
 233        mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
 234
 235        mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
 236        mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
 237        mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
 238        mes_set_hw_res_pkt.paging_vmid = 0;
 239        mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr;
 240        mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
 241                mes->query_status_fence_gpu_addr;
 242
 243        for (i = 0; i < MAX_COMPUTE_PIPES; i++)
 244                mes_set_hw_res_pkt.compute_hqd_mask[i] =
 245                        mes->compute_hqd_mask[i];
 246
 247        for (i = 0; i < MAX_GFX_PIPES; i++)
 248                mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i];
 249
 250        for (i = 0; i < MAX_SDMA_PIPES; i++)
 251                mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
 252
 253        for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
 254                mes_set_hw_res_pkt.agreegated_doorbells[i] =
 255                        mes->agreegated_doorbells[i];
 256
 257        mes_set_hw_res_pkt.api_status.api_completion_fence_addr =
 258                mes->ring.fence_drv.gpu_addr;
 259        mes_set_hw_res_pkt.api_status.api_completion_fence_value =
 260                ++mes->ring.fence_drv.sync_seq;
 261
 262        return mes_v10_1_submit_pkt_and_poll_completion(mes,
 263                        &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt));
 264}
 265
 266static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
 267        .add_hw_queue = mes_v10_1_add_hw_queue,
 268        .remove_hw_queue = mes_v10_1_remove_hw_queue,
 269        .suspend_gang = mes_v10_1_suspend_gang,
 270        .resume_gang = mes_v10_1_resume_gang,
 271};
 272
 273static int mes_v10_1_init_microcode(struct amdgpu_device *adev)
 274{
 275        const char *chip_name;
 276        char fw_name[30];
 277        int err;
 278        const struct mes_firmware_header_v1_0 *mes_hdr;
 279        struct amdgpu_firmware_info *info;
 280
 281        switch (adev->asic_type) {
 282        case CHIP_NAVI10:
 283                chip_name = "navi10";
 284                break;
 285        case CHIP_SIENNA_CICHLID:
 286                chip_name = "sienna_cichlid";
 287                break;
 288        default:
 289                BUG();
 290        }
 291
 292        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin", chip_name);
 293        err = request_firmware(&adev->mes.fw, fw_name, adev->dev);
 294        if (err)
 295                return err;
 296
 297        err = amdgpu_ucode_validate(adev->mes.fw);
 298        if (err) {
 299                release_firmware(adev->mes.fw);
 300                adev->mes.fw = NULL;
 301                return err;
 302        }
 303
 304        mes_hdr = (const struct mes_firmware_header_v1_0 *)adev->mes.fw->data;
 305        adev->mes.ucode_fw_version = le32_to_cpu(mes_hdr->mes_ucode_version);
 306        adev->mes.ucode_fw_version =
 307                le32_to_cpu(mes_hdr->mes_ucode_data_version);
 308        adev->mes.uc_start_addr =
 309                le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
 310                ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
 311        adev->mes.data_start_addr =
 312                le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
 313                ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32);
 314
 315        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 316                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MES];
 317                info->ucode_id = AMDGPU_UCODE_ID_CP_MES;
 318                info->fw = adev->mes.fw;
 319                adev->firmware.fw_size +=
 320                        ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes),
 321                              PAGE_SIZE);
 322
 323                info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MES_DATA];
 324                info->ucode_id = AMDGPU_UCODE_ID_CP_MES_DATA;
 325                info->fw = adev->mes.fw;
 326                adev->firmware.fw_size +=
 327                        ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes),
 328                              PAGE_SIZE);
 329        }
 330
 331        return 0;
 332}
 333
 334static void mes_v10_1_free_microcode(struct amdgpu_device *adev)
 335{
 336        release_firmware(adev->mes.fw);
 337        adev->mes.fw = NULL;
 338}
 339
 340static int mes_v10_1_allocate_ucode_buffer(struct amdgpu_device *adev)
 341{
 342        int r;
 343        const struct mes_firmware_header_v1_0 *mes_hdr;
 344        const __le32 *fw_data;
 345        unsigned fw_size;
 346
 347        mes_hdr = (const struct mes_firmware_header_v1_0 *)
 348                adev->mes.fw->data;
 349
 350        fw_data = (const __le32 *)(adev->mes.fw->data +
 351                   le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
 352        fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
 353
 354        r = amdgpu_bo_create_reserved(adev, fw_size,
 355                                      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
 356                                      &adev->mes.ucode_fw_obj,
 357                                      &adev->mes.ucode_fw_gpu_addr,
 358                                      (void **)&adev->mes.ucode_fw_ptr);
 359        if (r) {
 360                dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
 361                return r;
 362        }
 363
 364        memcpy(adev->mes.ucode_fw_ptr, fw_data, fw_size);
 365
 366        amdgpu_bo_kunmap(adev->mes.ucode_fw_obj);
 367        amdgpu_bo_unreserve(adev->mes.ucode_fw_obj);
 368
 369        return 0;
 370}
 371
 372static int mes_v10_1_allocate_ucode_data_buffer(struct amdgpu_device *adev)
 373{
 374        int r;
 375        const struct mes_firmware_header_v1_0 *mes_hdr;
 376        const __le32 *fw_data;
 377        unsigned fw_size;
 378
 379        mes_hdr = (const struct mes_firmware_header_v1_0 *)
 380                adev->mes.fw->data;
 381
 382        fw_data = (const __le32 *)(adev->mes.fw->data +
 383                   le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
 384        fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
 385
 386        r = amdgpu_bo_create_reserved(adev, fw_size,
 387                                      64 * 1024, AMDGPU_GEM_DOMAIN_GTT,
 388                                      &adev->mes.data_fw_obj,
 389                                      &adev->mes.data_fw_gpu_addr,
 390                                      (void **)&adev->mes.data_fw_ptr);
 391        if (r) {
 392                dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
 393                return r;
 394        }
 395
 396        memcpy(adev->mes.data_fw_ptr, fw_data, fw_size);
 397
 398        amdgpu_bo_kunmap(adev->mes.data_fw_obj);
 399        amdgpu_bo_unreserve(adev->mes.data_fw_obj);
 400
 401        return 0;
 402}
 403
 404static void mes_v10_1_free_ucode_buffers(struct amdgpu_device *adev)
 405{
 406        amdgpu_bo_free_kernel(&adev->mes.data_fw_obj,
 407                              &adev->mes.data_fw_gpu_addr,
 408                              (void **)&adev->mes.data_fw_ptr);
 409
 410        amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj,
 411                              &adev->mes.ucode_fw_gpu_addr,
 412                              (void **)&adev->mes.ucode_fw_ptr);
 413}
 414
 415static void mes_v10_1_enable(struct amdgpu_device *adev, bool enable)
 416{
 417        uint32_t data = 0;
 418
 419        if (enable) {
 420                data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL);
 421                data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
 422                WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
 423
 424                /* set ucode start address */
 425                WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START,
 426                             (uint32_t)(adev->mes.uc_start_addr) >> 2);
 427
 428                /* clear BYPASS_UNCACHED to avoid hangs after interrupt. */
 429                data = RREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL);
 430                data = REG_SET_FIELD(data, CP_MES_DC_OP_CNTL,
 431                                     BYPASS_UNCACHED, 0);
 432                WREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL, data);
 433
 434                /* unhalt MES and activate pipe0 */
 435                data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
 436                WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
 437        } else {
 438                data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL);
 439                data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
 440                data = REG_SET_FIELD(data, CP_MES_CNTL,
 441                                     MES_INVALIDATE_ICACHE, 1);
 442                data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
 443                data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
 444                WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
 445        }
 446}
 447
 448/* This function is for backdoor MES firmware */
 449static int mes_v10_1_load_microcode(struct amdgpu_device *adev)
 450{
 451        int r;
 452        uint32_t data;
 453
 454        if (!adev->mes.fw)
 455                return -EINVAL;
 456
 457        r = mes_v10_1_allocate_ucode_buffer(adev);
 458        if (r)
 459                return r;
 460
 461        r = mes_v10_1_allocate_ucode_data_buffer(adev);
 462        if (r) {
 463                mes_v10_1_free_ucode_buffers(adev);
 464                return r;
 465        }
 466
 467        mes_v10_1_enable(adev, false);
 468
 469        WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_CNTL, 0);
 470
 471        mutex_lock(&adev->srbm_mutex);
 472        /* me=3, pipe=0, queue=0 */
 473        nv_grbm_select(adev, 3, 0, 0, 0);
 474
 475        /* set ucode start address */
 476        WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START,
 477                     (uint32_t)(adev->mes.uc_start_addr) >> 2);
 478
 479        /* set ucode fimrware address */
 480        WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_LO,
 481                     lower_32_bits(adev->mes.ucode_fw_gpu_addr));
 482        WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_HI,
 483                     upper_32_bits(adev->mes.ucode_fw_gpu_addr));
 484
 485        /* set ucode instruction cache boundary to 2M-1 */
 486        WREG32_SOC15(GC, 0, mmCP_MES_MIBOUND_LO, 0x1FFFFF);
 487
 488        /* set ucode data firmware address */
 489        WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_LO,
 490                     lower_32_bits(adev->mes.data_fw_gpu_addr));
 491        WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_HI,
 492                     upper_32_bits(adev->mes.data_fw_gpu_addr));
 493
 494        /* Set 0x3FFFF (256K-1) to CP_MES_MDBOUND_LO */
 495        WREG32_SOC15(GC, 0, mmCP_MES_MDBOUND_LO, 0x3FFFF);
 496
 497        /* invalidate ICACHE */
 498        switch (adev->asic_type) {
 499        case CHIP_SIENNA_CICHLID:
 500                data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
 501                break;
 502        default:
 503                data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
 504                break;
 505        }
 506        data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
 507        data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
 508        switch (adev->asic_type) {
 509        case CHIP_SIENNA_CICHLID:
 510                WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
 511                break;
 512        default:
 513                WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
 514                break;
 515        }
 516
 517        /* prime the ICACHE. */
 518        switch (adev->asic_type) {
 519        case CHIP_SIENNA_CICHLID:
 520                data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
 521                break;
 522        default:
 523                data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
 524                break;
 525        }
 526        data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
 527        switch (adev->asic_type) {
 528        case CHIP_SIENNA_CICHLID:
 529                WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
 530                break;
 531        default:
 532                WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
 533                break;
 534        }
 535
 536        nv_grbm_select(adev, 0, 0, 0, 0);
 537        mutex_unlock(&adev->srbm_mutex);
 538
 539        return 0;
 540}
 541
 542static int mes_v10_1_allocate_eop_buf(struct amdgpu_device *adev)
 543{
 544        int r;
 545        u32 *eop;
 546
 547        r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
 548                                      AMDGPU_GEM_DOMAIN_GTT,
 549                                      &adev->mes.eop_gpu_obj,
 550                                      &adev->mes.eop_gpu_addr,
 551                                      (void **)&eop);
 552        if (r) {
 553                dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
 554                return r;
 555        }
 556
 557        memset(eop, 0, adev->mes.eop_gpu_obj->tbo.base.size);
 558
 559        amdgpu_bo_kunmap(adev->mes.eop_gpu_obj);
 560        amdgpu_bo_unreserve(adev->mes.eop_gpu_obj);
 561
 562        return 0;
 563}
 564
 565static int mes_v10_1_allocate_mem_slots(struct amdgpu_device *adev)
 566{
 567        int r;
 568
 569        r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs);
 570        if (r) {
 571                dev_err(adev->dev,
 572                        "(%d) mes sch_ctx_offs wb alloc failed\n", r);
 573                return r;
 574        }
 575        adev->mes.sch_ctx_gpu_addr =
 576                adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4);
 577        adev->mes.sch_ctx_ptr =
 578                (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs];
 579
 580        r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs);
 581        if (r) {
 582                dev_err(adev->dev,
 583                        "(%d) query_status_fence_offs wb alloc failed\n", r);
 584                return r;
 585        }
 586        adev->mes.query_status_fence_gpu_addr =
 587                adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4);
 588        adev->mes.query_status_fence_ptr =
 589                (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs];
 590
 591        return 0;
 592}
 593
 594static int mes_v10_1_mqd_init(struct amdgpu_ring *ring)
 595{
 596        struct amdgpu_device *adev = ring->adev;
 597        struct v10_compute_mqd *mqd = ring->mqd_ptr;
 598        uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
 599        uint32_t tmp;
 600
 601        mqd->header = 0xC0310800;
 602        mqd->compute_pipelinestat_enable = 0x00000001;
 603        mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
 604        mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
 605        mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
 606        mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
 607        mqd->compute_misc_reserved = 0x00000003;
 608
 609        eop_base_addr = ring->eop_gpu_addr >> 8;
 610        mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
 611        mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
 612
 613        /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
 614        tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
 615        tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
 616                        (order_base_2(MES_EOP_SIZE / 4) - 1));
 617
 618        mqd->cp_hqd_eop_control = tmp;
 619
 620        /* enable doorbell? */
 621        tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
 622
 623        if (ring->use_doorbell) {
 624                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
 625                                    DOORBELL_OFFSET, ring->doorbell_index);
 626                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
 627                                    DOORBELL_EN, 1);
 628                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
 629                                    DOORBELL_SOURCE, 0);
 630                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
 631                                    DOORBELL_HIT, 0);
 632        }
 633        else
 634                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
 635                                    DOORBELL_EN, 0);
 636
 637        mqd->cp_hqd_pq_doorbell_control = tmp;
 638
 639        /* disable the queue if it's active */
 640        ring->wptr = 0;
 641        mqd->cp_hqd_dequeue_request = 0;
 642        mqd->cp_hqd_pq_rptr = 0;
 643        mqd->cp_hqd_pq_wptr_lo = 0;
 644        mqd->cp_hqd_pq_wptr_hi = 0;
 645
 646        /* set the pointer to the MQD */
 647        mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
 648        mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
 649
 650        /* set MQD vmid to 0 */
 651        tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
 652        tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
 653        mqd->cp_mqd_control = tmp;
 654
 655        /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
 656        hqd_gpu_addr = ring->gpu_addr >> 8;
 657        mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
 658        mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
 659
 660        /* set up the HQD, this is similar to CP_RB0_CNTL */
 661        tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
 662        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
 663                            (order_base_2(ring->ring_size / 4) - 1));
 664        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
 665                            ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
 666#ifdef __BIG_ENDIAN
 667        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
 668#endif
 669        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
 670        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
 671        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
 672        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
 673        mqd->cp_hqd_pq_control = tmp;
 674
 675        /* set the wb address whether it's enabled or not */
 676        wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
 677        mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
 678        mqd->cp_hqd_pq_rptr_report_addr_hi =
 679                upper_32_bits(wb_gpu_addr) & 0xffff;
 680
 681        /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
 682        wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
 683        mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
 684        mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
 685
 686        tmp = 0;
 687        /* enable the doorbell if requested */
 688        if (ring->use_doorbell) {
 689                tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
 690                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
 691                                DOORBELL_OFFSET, ring->doorbell_index);
 692
 693                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
 694                                    DOORBELL_EN, 1);
 695                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
 696                                    DOORBELL_SOURCE, 0);
 697                tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
 698                                    DOORBELL_HIT, 0);
 699        }
 700
 701        mqd->cp_hqd_pq_doorbell_control = tmp;
 702
 703        /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
 704        ring->wptr = 0;
 705        mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
 706
 707        /* set the vmid for the queue */
 708        mqd->cp_hqd_vmid = 0;
 709
 710        tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
 711        tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
 712        mqd->cp_hqd_persistent_state = tmp;
 713
 714        /* set MIN_IB_AVAIL_SIZE */
 715        tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
 716        tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
 717        mqd->cp_hqd_ib_control = tmp;
 718
 719        /* activate the queue */
 720        mqd->cp_hqd_active = 1;
 721        return 0;
 722}
 723
 724static void mes_v10_1_queue_init_register(struct amdgpu_ring *ring)
 725{
 726        struct v10_compute_mqd *mqd = ring->mqd_ptr;
 727        struct amdgpu_device *adev = ring->adev;
 728        uint32_t data = 0;
 729
 730        mutex_lock(&adev->srbm_mutex);
 731        nv_grbm_select(adev, 3, 0, 0, 0);
 732
 733        /* set CP_HQD_VMID.VMID = 0. */
 734        data = RREG32_SOC15(GC, 0, mmCP_HQD_VMID);
 735        data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
 736        WREG32_SOC15(GC, 0, mmCP_HQD_VMID, data);
 737
 738        /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
 739        data = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
 740        data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
 741                             DOORBELL_EN, 0);
 742        WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data);
 743
 744        /* set CP_MQD_BASE_ADDR/HI with the MQD base address */
 745        WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
 746        WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
 747
 748        /* set CP_MQD_CONTROL.VMID=0 */
 749        data = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
 750        data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
 751        WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, 0);
 752
 753        /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
 754        WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
 755        WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
 756
 757        /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
 758        WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
 759                     mqd->cp_hqd_pq_rptr_report_addr_lo);
 760        WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
 761                     mqd->cp_hqd_pq_rptr_report_addr_hi);
 762
 763        /* set CP_HQD_PQ_CONTROL */
 764        WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
 765
 766        /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
 767        WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
 768                     mqd->cp_hqd_pq_wptr_poll_addr_lo);
 769        WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
 770                     mqd->cp_hqd_pq_wptr_poll_addr_hi);
 771
 772        /* set CP_HQD_PQ_DOORBELL_CONTROL */
 773        WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
 774                     mqd->cp_hqd_pq_doorbell_control);
 775
 776        /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
 777        WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
 778
 779        /* set CP_HQD_ACTIVE.ACTIVE=1 */
 780        WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
 781
 782        nv_grbm_select(adev, 0, 0, 0, 0);
 783        mutex_unlock(&adev->srbm_mutex);
 784}
 785
 786#if 0
 787static int mes_v10_1_kiq_enable_queue(struct amdgpu_device *adev)
 788{
 789        struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 790        struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
 791        int r;
 792
 793        if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
 794                return -EINVAL;
 795
 796        r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
 797        if (r) {
 798                DRM_ERROR("Failed to lock KIQ (%d).\n", r);
 799                return r;
 800        }
 801
 802        kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring);
 803
 804        r = amdgpu_ring_test_ring(kiq_ring);
 805        if (r) {
 806                DRM_ERROR("kfq enable failed\n");
 807                kiq_ring->sched.ready = false;
 808        }
 809        return r;
 810}
 811#endif
 812
 813static int mes_v10_1_queue_init(struct amdgpu_device *adev)
 814{
 815        int r;
 816
 817        r = mes_v10_1_mqd_init(&adev->mes.ring);
 818        if (r)
 819                return r;
 820
 821#if 0
 822        r = mes_v10_1_kiq_enable_queue(adev);
 823        if (r)
 824                return r;
 825#else
 826        mes_v10_1_queue_init_register(&adev->mes.ring);
 827#endif
 828
 829        return 0;
 830}
 831
 832static int mes_v10_1_ring_init(struct amdgpu_device *adev)
 833{
 834        struct amdgpu_ring *ring;
 835
 836        ring = &adev->mes.ring;
 837
 838        ring->funcs = &mes_v10_1_ring_funcs;
 839
 840        ring->me = 3;
 841        ring->pipe = 0;
 842        ring->queue = 0;
 843
 844        ring->ring_obj = NULL;
 845        ring->use_doorbell = true;
 846        ring->doorbell_index = adev->doorbell_index.mes_ring << 1;
 847        ring->eop_gpu_addr = adev->mes.eop_gpu_addr;
 848        ring->no_scheduler = true;
 849        sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue);
 850
 851        return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
 852                                AMDGPU_RING_PRIO_DEFAULT, NULL);
 853}
 854
 855static int mes_v10_1_mqd_sw_init(struct amdgpu_device *adev)
 856{
 857        int r, mqd_size = sizeof(struct v10_compute_mqd);
 858        struct amdgpu_ring *ring = &adev->mes.ring;
 859
 860        if (ring->mqd_obj)
 861                return 0;
 862
 863        r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
 864                                    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
 865                                    &ring->mqd_gpu_addr, &ring->mqd_ptr);
 866        if (r) {
 867                dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
 868                return r;
 869        }
 870
 871        /* prepare MQD backup */
 872        adev->mes.mqd_backup = kmalloc(mqd_size, GFP_KERNEL);
 873        if (!adev->mes.mqd_backup)
 874                dev_warn(adev->dev,
 875                         "no memory to create MQD backup for ring %s\n",
 876                         ring->name);
 877
 878        return 0;
 879}
 880
 881static int mes_v10_1_sw_init(void *handle)
 882{
 883        int r;
 884        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 885
 886        adev->mes.adev = adev;
 887        adev->mes.funcs = &mes_v10_1_funcs;
 888
 889        r = mes_v10_1_init_microcode(adev);
 890        if (r)
 891                return r;
 892
 893        r = mes_v10_1_allocate_eop_buf(adev);
 894        if (r)
 895                return r;
 896
 897        r = mes_v10_1_mqd_sw_init(adev);
 898        if (r)
 899                return r;
 900
 901        r = mes_v10_1_ring_init(adev);
 902        if (r)
 903                return r;
 904
 905        r = mes_v10_1_allocate_mem_slots(adev);
 906        if (r)
 907                return r;
 908
 909        return 0;
 910}
 911
 912static int mes_v10_1_sw_fini(void *handle)
 913{
 914        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 915
 916        amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
 917        amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
 918
 919        kfree(adev->mes.mqd_backup);
 920
 921        amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj,
 922                              &adev->mes.ring.mqd_gpu_addr,
 923                              &adev->mes.ring.mqd_ptr);
 924
 925        amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj,
 926                              &adev->mes.eop_gpu_addr,
 927                              NULL);
 928
 929        mes_v10_1_free_microcode(adev);
 930
 931        return 0;
 932}
 933
 934static int mes_v10_1_hw_init(void *handle)
 935{
 936        int r;
 937        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 938
 939        if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
 940                r = mes_v10_1_load_microcode(adev);
 941                if (r) {
 942                        DRM_ERROR("failed to MES fw, r=%d\n", r);
 943                        return r;
 944                }
 945        }
 946
 947        mes_v10_1_enable(adev, true);
 948
 949        r = mes_v10_1_queue_init(adev);
 950        if (r)
 951                goto failure;
 952
 953        r = mes_v10_1_set_hw_resources(&adev->mes);
 954        if (r)
 955                goto failure;
 956
 957        r = mes_v10_1_query_sched_status(&adev->mes);
 958        if (r) {
 959                DRM_ERROR("MES is busy\n");
 960                goto failure;
 961        }
 962
 963        return 0;
 964
 965failure:
 966        mes_v10_1_hw_fini(adev);
 967        return r;
 968}
 969
 970static int mes_v10_1_hw_fini(void *handle)
 971{
 972        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 973
 974        mes_v10_1_enable(adev, false);
 975
 976        if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)
 977                mes_v10_1_free_ucode_buffers(adev);
 978
 979        return 0;
 980}
 981
 982static int mes_v10_1_suspend(void *handle)
 983{
 984        return 0;
 985}
 986
 987static int mes_v10_1_resume(void *handle)
 988{
 989        return 0;
 990}
 991
 992static const struct amd_ip_funcs mes_v10_1_ip_funcs = {
 993        .name = "mes_v10_1",
 994        .sw_init = mes_v10_1_sw_init,
 995        .sw_fini = mes_v10_1_sw_fini,
 996        .hw_init = mes_v10_1_hw_init,
 997        .hw_fini = mes_v10_1_hw_fini,
 998        .suspend = mes_v10_1_suspend,
 999        .resume = mes_v10_1_resume,
1000};
1001
1002const struct amdgpu_ip_block_version mes_v10_1_ip_block = {
1003        .type = AMD_IP_BLOCK_TYPE_MES,
1004        .major = 10,
1005        .minor = 1,
1006        .rev = 0,
1007        .funcs = &mes_v10_1_ip_funcs,
1008};
1009