linux/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
<<
>>
Prefs
   1/*
   2 * Copyright 2016 Advanced Micro Devices, Inc.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the
   7 * "Software"), to deal in the Software without restriction, including
   8 * without limitation the rights to use, copy, modify, merge, publish,
   9 * distribute, sub license, and/or sell copies of the Software, and to
  10 * permit persons to whom the Software is furnished to do so, subject to
  11 * the following conditions:
  12 *
  13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20 *
  21 * The above copyright notice and this permission notice (including the
  22 * next paragraph) shall be included in all copies or substantial portions
  23 * of the Software.
  24 *
  25 */
  26
  27#include <linux/firmware.h>
  28#include <drm/drmP.h>
  29#include "amdgpu.h"
  30#include "amdgpu_vce.h"
  31#include "soc15.h"
  32#include "soc15d.h"
  33#include "soc15_common.h"
  34#include "mmsch_v1_0.h"
  35
  36#include "vce/vce_4_0_offset.h"
  37#include "vce/vce_4_0_default.h"
  38#include "vce/vce_4_0_sh_mask.h"
  39#include "mmhub/mmhub_1_0_offset.h"
  40#include "mmhub/mmhub_1_0_sh_mask.h"
  41
  42#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK   0x02
  43
  44#define VCE_V4_0_FW_SIZE        (384 * 1024)
  45#define VCE_V4_0_STACK_SIZE     (64 * 1024)
  46#define VCE_V4_0_DATA_SIZE      ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
  47
  48static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
  49static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
  50static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
  51
  52/**
  53 * vce_v4_0_ring_get_rptr - get read pointer
  54 *
  55 * @ring: amdgpu_ring pointer
  56 *
  57 * Returns the current hardware read pointer
  58 */
  59static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
  60{
  61        struct amdgpu_device *adev = ring->adev;
  62
  63        if (ring == &adev->vce.ring[0])
  64                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
  65        else if (ring == &adev->vce.ring[1])
  66                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
  67        else
  68                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
  69}
  70
  71/**
  72 * vce_v4_0_ring_get_wptr - get write pointer
  73 *
  74 * @ring: amdgpu_ring pointer
  75 *
  76 * Returns the current hardware write pointer
  77 */
  78static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
  79{
  80        struct amdgpu_device *adev = ring->adev;
  81
  82        if (ring->use_doorbell)
  83                return adev->wb.wb[ring->wptr_offs];
  84
  85        if (ring == &adev->vce.ring[0])
  86                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
  87        else if (ring == &adev->vce.ring[1])
  88                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
  89        else
  90                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
  91}
  92
  93/**
  94 * vce_v4_0_ring_set_wptr - set write pointer
  95 *
  96 * @ring: amdgpu_ring pointer
  97 *
  98 * Commits the write pointer to the hardware
  99 */
 100static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
 101{
 102        struct amdgpu_device *adev = ring->adev;
 103
 104        if (ring->use_doorbell) {
 105                /* XXX check if swapping is necessary on BE */
 106                adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
 107                WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
 108                return;
 109        }
 110
 111        if (ring == &adev->vce.ring[0])
 112                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
 113                        lower_32_bits(ring->wptr));
 114        else if (ring == &adev->vce.ring[1])
 115                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
 116                        lower_32_bits(ring->wptr));
 117        else
 118                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
 119                        lower_32_bits(ring->wptr));
 120}
 121
 122static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
 123{
 124        int i, j;
 125
 126        for (i = 0; i < 10; ++i) {
 127                for (j = 0; j < 100; ++j) {
 128                        uint32_t status =
 129                                RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
 130
 131                        if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
 132                                return 0;
 133                        mdelay(10);
 134                }
 135
 136                DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
 137                WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
 138                                VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
 139                                ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
 140                mdelay(10);
 141                WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
 142                                ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
 143                mdelay(10);
 144
 145        }
 146
 147        return -ETIMEDOUT;
 148}
 149
 150static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
 151                                struct amdgpu_mm_table *table)
 152{
 153        uint32_t data = 0, loop;
 154        uint64_t addr = table->gpu_addr;
 155        struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
 156        uint32_t size;
 157
 158        size = header->header_size + header->vce_table_size + header->uvd_table_size;
 159
 160        /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
 161        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
 162        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
 163
 164        /* 2, update vmid of descriptor */
 165        data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
 166        data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
 167        data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
 168        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
 169
 170        /* 3, notify mmsch about the size of this descriptor */
 171        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
 172
 173        /* 4, set resp to zero */
 174        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
 175
 176        WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
 177        adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
 178        adev->vce.ring[0].wptr = 0;
 179        adev->vce.ring[0].wptr_old = 0;
 180
 181        /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
 182        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
 183
 184        data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
 185        loop = 1000;
 186        while ((data & 0x10000002) != 0x10000002) {
 187                udelay(10);
 188                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
 189                loop--;
 190                if (!loop)
 191                        break;
 192        }
 193
 194        if (!loop) {
 195                dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
 196                return -EBUSY;
 197        }
 198
 199        return 0;
 200}
 201
 202static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
 203{
 204        struct amdgpu_ring *ring;
 205        uint32_t offset, size;
 206        uint32_t table_size = 0;
 207        struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
 208        struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
 209        struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
 210        struct mmsch_v1_0_cmd_end end = { { 0 } };
 211        uint32_t *init_table = adev->virt.mm_table.cpu_addr;
 212        struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
 213
 214        direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
 215        direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
 216        direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
 217        end.cmd_header.command_type = MMSCH_COMMAND__END;
 218
 219        if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
 220                header->version = MMSCH_VERSION;
 221                header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
 222
 223                if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
 224                        header->vce_table_offset = header->header_size;
 225                else
 226                        header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
 227
 228                init_table += header->vce_table_offset;
 229
 230                ring = &adev->vce.ring[0];
 231                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
 232                                            lower_32_bits(ring->gpu_addr));
 233                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
 234                                            upper_32_bits(ring->gpu_addr));
 235                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
 236                                            ring->ring_size / 4);
 237
 238                /* BEGING OF MC_RESUME */
 239                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
 240                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
 241                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
 242                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
 243                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
 244
 245                if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 246                        MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 247                                                mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
 248                                                adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
 249                        MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 250                                                mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
 251                                                (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
 252                } else {
 253                        MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 254                                                mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
 255                                                adev->vce.gpu_addr >> 8);
 256                        MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 257                                                mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
 258                                                (adev->vce.gpu_addr >> 40) & 0xff);
 259                }
 260                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 261                                                mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
 262                                                adev->vce.gpu_addr >> 8);
 263                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 264                                                mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
 265                                                (adev->vce.gpu_addr >> 40) & 0xff);
 266                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 267                                                mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
 268                                                adev->vce.gpu_addr >> 8);
 269                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 270                                                mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
 271                                                (adev->vce.gpu_addr >> 40) & 0xff);
 272
 273                offset = AMDGPU_VCE_FIRMWARE_OFFSET;
 274                size = VCE_V4_0_FW_SIZE;
 275                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
 276                                        offset & ~0x0f000000);
 277                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
 278
 279                offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
 280                size = VCE_V4_0_STACK_SIZE;
 281                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
 282                                        (offset & ~0x0f000000) | (1 << 24));
 283                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
 284
 285                offset += size;
 286                size = VCE_V4_0_DATA_SIZE;
 287                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
 288                                        (offset & ~0x0f000000) | (2 << 24));
 289                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
 290
 291                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
 292                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
 293                                                   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
 294                                                   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
 295
 296                /* end of MC_RESUME */
 297                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
 298                                                   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
 299                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
 300                                                   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
 301                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
 302                                                   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
 303
 304                MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
 305                                              VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
 306                                              VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
 307
 308                /* clear BUSY flag */
 309                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
 310                                                   ~VCE_STATUS__JOB_BUSY_MASK, 0);
 311
 312                /* add end packet */
 313                memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
 314                table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
 315                header->vce_table_size = table_size;
 316        }
 317
 318        return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
 319}
 320
 321/**
 322 * vce_v4_0_start - start VCE block
 323 *
 324 * @adev: amdgpu_device pointer
 325 *
 326 * Setup and start the VCE block
 327 */
 328static int vce_v4_0_start(struct amdgpu_device *adev)
 329{
 330        struct amdgpu_ring *ring;
 331        int r;
 332
 333        ring = &adev->vce.ring[0];
 334
 335        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
 336        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
 337        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
 338        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
 339        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
 340
 341        ring = &adev->vce.ring[1];
 342
 343        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
 344        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
 345        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
 346        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
 347        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
 348
 349        ring = &adev->vce.ring[2];
 350
 351        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
 352        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
 353        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
 354        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
 355        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
 356
 357        vce_v4_0_mc_resume(adev);
 358        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
 359                        ~VCE_STATUS__JOB_BUSY_MASK);
 360
 361        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
 362
 363        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
 364                        ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
 365        mdelay(100);
 366
 367        r = vce_v4_0_firmware_loaded(adev);
 368
 369        /* clear BUSY flag */
 370        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
 371
 372        if (r) {
 373                DRM_ERROR("VCE not responding, giving up!!!\n");
 374                return r;
 375        }
 376
 377        return 0;
 378}
 379
 380static int vce_v4_0_stop(struct amdgpu_device *adev)
 381{
 382
 383        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
 384
 385        /* hold on ECPU */
 386        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
 387                        VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
 388                        ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
 389
 390        /* clear BUSY flag */
 391        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
 392
 393        /* Set Clock-Gating off */
 394        /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
 395                vce_v4_0_set_vce_sw_clock_gating(adev, false);
 396        */
 397
 398        return 0;
 399}
 400
 401static int vce_v4_0_early_init(void *handle)
 402{
 403        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 404
 405        if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
 406                adev->vce.num_rings = 1;
 407        else
 408                adev->vce.num_rings = 3;
 409
 410        vce_v4_0_set_ring_funcs(adev);
 411        vce_v4_0_set_irq_funcs(adev);
 412
 413        return 0;
 414}
 415
 416static int vce_v4_0_sw_init(void *handle)
 417{
 418        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 419        struct amdgpu_ring *ring;
 420        unsigned size;
 421        int r, i;
 422
 423        r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
 424        if (r)
 425                return r;
 426
 427        size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
 428        if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
 429                size += VCE_V4_0_FW_SIZE;
 430
 431        r = amdgpu_vce_sw_init(adev, size);
 432        if (r)
 433                return r;
 434
 435        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 436                const struct common_firmware_header *hdr;
 437                unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
 438
 439                adev->vce.saved_bo = kmalloc(size, GFP_KERNEL);
 440                if (!adev->vce.saved_bo)
 441                        return -ENOMEM;
 442
 443                hdr = (const struct common_firmware_header *)adev->vce.fw->data;
 444                adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
 445                adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
 446                adev->firmware.fw_size +=
 447                        ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
 448                DRM_INFO("PSP loading VCE firmware\n");
 449        } else {
 450                r = amdgpu_vce_resume(adev);
 451                if (r)
 452                        return r;
 453        }
 454
 455        for (i = 0; i < adev->vce.num_rings; i++) {
 456                ring = &adev->vce.ring[i];
 457                sprintf(ring->name, "vce%d", i);
 458                if (amdgpu_sriov_vf(adev)) {
 459                        /* DOORBELL only works under SRIOV */
 460                        ring->use_doorbell = true;
 461
 462                        /* currently only use the first encoding ring for sriov,
 463                         * so set unused location for other unused rings.
 464                         */
 465                        if (i == 0)
 466                                ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING0_1 * 2;
 467                        else
 468                                ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING2_3 * 2 + 1;
 469                }
 470                r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
 471                if (r)
 472                        return r;
 473        }
 474
 475        r = amdgpu_virt_alloc_mm_table(adev);
 476        if (r)
 477                return r;
 478
 479        return r;
 480}
 481
 482static int vce_v4_0_sw_fini(void *handle)
 483{
 484        int r;
 485        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 486
 487        /* free MM table */
 488        amdgpu_virt_free_mm_table(adev);
 489
 490        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 491                kfree(adev->vce.saved_bo);
 492                adev->vce.saved_bo = NULL;
 493        }
 494
 495        r = amdgpu_vce_suspend(adev);
 496        if (r)
 497                return r;
 498
 499        return amdgpu_vce_sw_fini(adev);
 500}
 501
 502static int vce_v4_0_hw_init(void *handle)
 503{
 504        int r, i;
 505        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 506
 507        if (amdgpu_sriov_vf(adev))
 508                r = vce_v4_0_sriov_start(adev);
 509        else
 510                r = vce_v4_0_start(adev);
 511        if (r)
 512                return r;
 513
 514        for (i = 0; i < adev->vce.num_rings; i++)
 515                adev->vce.ring[i].ready = false;
 516
 517        for (i = 0; i < adev->vce.num_rings; i++) {
 518                r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
 519                if (r)
 520                        return r;
 521                else
 522                        adev->vce.ring[i].ready = true;
 523        }
 524
 525        DRM_INFO("VCE initialized successfully.\n");
 526
 527        return 0;
 528}
 529
 530static int vce_v4_0_hw_fini(void *handle)
 531{
 532        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 533        int i;
 534
 535        if (!amdgpu_sriov_vf(adev)) {
 536                /* vce_v4_0_wait_for_idle(handle); */
 537                vce_v4_0_stop(adev);
 538        } else {
 539                /* full access mode, so don't touch any VCE register */
 540                DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
 541        }
 542
 543        for (i = 0; i < adev->vce.num_rings; i++)
 544                adev->vce.ring[i].ready = false;
 545
 546        return 0;
 547}
 548
 549static int vce_v4_0_suspend(void *handle)
 550{
 551        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 552        int r;
 553
 554        if (adev->vce.vcpu_bo == NULL)
 555                return 0;
 556
 557        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 558                unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
 559                void *ptr = adev->vce.cpu_addr;
 560
 561                memcpy_fromio(adev->vce.saved_bo, ptr, size);
 562        }
 563
 564        r = vce_v4_0_hw_fini(adev);
 565        if (r)
 566                return r;
 567
 568        return amdgpu_vce_suspend(adev);
 569}
 570
 571static int vce_v4_0_resume(void *handle)
 572{
 573        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 574        int r;
 575
 576        if (adev->vce.vcpu_bo == NULL)
 577                return -EINVAL;
 578
 579        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 580                unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
 581                void *ptr = adev->vce.cpu_addr;
 582
 583                memcpy_toio(ptr, adev->vce.saved_bo, size);
 584        } else {
 585                r = amdgpu_vce_resume(adev);
 586                if (r)
 587                        return r;
 588        }
 589
 590        return vce_v4_0_hw_init(adev);
 591}
 592
 593static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
 594{
 595        uint32_t offset, size;
 596
 597        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
 598        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
 599        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
 600        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
 601
 602        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
 603        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
 604        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
 605        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
 606        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
 607
 608        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 609                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
 610                        (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8));
 611                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
 612                        (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
 613        } else {
 614                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
 615                        (adev->vce.gpu_addr >> 8));
 616                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
 617                        (adev->vce.gpu_addr >> 40) & 0xff);
 618        }
 619
 620        offset = AMDGPU_VCE_FIRMWARE_OFFSET;
 621        size = VCE_V4_0_FW_SIZE;
 622        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
 623        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
 624
 625        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
 626        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
 627        offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
 628        size = VCE_V4_0_STACK_SIZE;
 629        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
 630        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
 631
 632        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
 633        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
 634        offset += size;
 635        size = VCE_V4_0_DATA_SIZE;
 636        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
 637        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
 638
 639        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
 640        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
 641                        VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
 642                        ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
 643}
 644
 645static int vce_v4_0_set_clockgating_state(void *handle,
 646                                          enum amd_clockgating_state state)
 647{
 648        /* needed for driver unload*/
 649        return 0;
 650}
 651
 652#if 0
 653static bool vce_v4_0_is_idle(void *handle)
 654{
 655        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 656        u32 mask = 0;
 657
 658        mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
 659        mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
 660
 661        return !(RREG32(mmSRBM_STATUS2) & mask);
 662}
 663
 664static int vce_v4_0_wait_for_idle(void *handle)
 665{
 666        unsigned i;
 667        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 668
 669        for (i = 0; i < adev->usec_timeout; i++)
 670                if (vce_v4_0_is_idle(handle))
 671                        return 0;
 672
 673        return -ETIMEDOUT;
 674}
 675
 676#define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
 677#define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
 678#define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
 679#define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
 680                                      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
 681
 682static bool vce_v4_0_check_soft_reset(void *handle)
 683{
 684        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 685        u32 srbm_soft_reset = 0;
 686
 687        /* According to VCE team , we should use VCE_STATUS instead
 688         * SRBM_STATUS.VCE_BUSY bit for busy status checking.
 689         * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
 690         * instance's registers are accessed
 691         * (0 for 1st instance, 10 for 2nd instance).
 692         *
 693         *VCE_STATUS
 694         *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
 695         *|----+----+-----------+----+----+----+----------+---------+----|
 696         *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
 697         *
 698         * VCE team suggest use bit 3--bit 6 for busy status check
 699         */
 700        mutex_lock(&adev->grbm_idx_mutex);
 701        WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
 702        if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
 703                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
 704                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
 705        }
 706        WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
 707        if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
 708                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
 709                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
 710        }
 711        WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
 712        mutex_unlock(&adev->grbm_idx_mutex);
 713
 714        if (srbm_soft_reset) {
 715                adev->vce.srbm_soft_reset = srbm_soft_reset;
 716                return true;
 717        } else {
 718                adev->vce.srbm_soft_reset = 0;
 719                return false;
 720        }
 721}
 722
 723static int vce_v4_0_soft_reset(void *handle)
 724{
 725        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 726        u32 srbm_soft_reset;
 727
 728        if (!adev->vce.srbm_soft_reset)
 729                return 0;
 730        srbm_soft_reset = adev->vce.srbm_soft_reset;
 731
 732        if (srbm_soft_reset) {
 733                u32 tmp;
 734
 735                tmp = RREG32(mmSRBM_SOFT_RESET);
 736                tmp |= srbm_soft_reset;
 737                dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
 738                WREG32(mmSRBM_SOFT_RESET, tmp);
 739                tmp = RREG32(mmSRBM_SOFT_RESET);
 740
 741                udelay(50);
 742
 743                tmp &= ~srbm_soft_reset;
 744                WREG32(mmSRBM_SOFT_RESET, tmp);
 745                tmp = RREG32(mmSRBM_SOFT_RESET);
 746
 747                /* Wait a little for things to settle down */
 748                udelay(50);
 749        }
 750
 751        return 0;
 752}
 753
 754static int vce_v4_0_pre_soft_reset(void *handle)
 755{
 756        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 757
 758        if (!adev->vce.srbm_soft_reset)
 759                return 0;
 760
 761        mdelay(5);
 762
 763        return vce_v4_0_suspend(adev);
 764}
 765
 766
 767static int vce_v4_0_post_soft_reset(void *handle)
 768{
 769        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 770
 771        if (!adev->vce.srbm_soft_reset)
 772                return 0;
 773
 774        mdelay(5);
 775
 776        return vce_v4_0_resume(adev);
 777}
 778
 779static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
 780{
 781        u32 tmp, data;
 782
 783        tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
 784        if (override)
 785                data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
 786        else
 787                data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
 788
 789        if (tmp != data)
 790                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
 791}
 792
 793static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
 794                                             bool gated)
 795{
 796        u32 data;
 797
 798        /* Set Override to disable Clock Gating */
 799        vce_v4_0_override_vce_clock_gating(adev, true);
 800
 801        /* This function enables MGCG which is controlled by firmware.
 802           With the clocks in the gated state the core is still
 803           accessible but the firmware will throttle the clocks on the
 804           fly as necessary.
 805        */
 806        if (gated) {
 807                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
 808                data |= 0x1ff;
 809                data &= ~0xef0000;
 810                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
 811
 812                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
 813                data |= 0x3ff000;
 814                data &= ~0xffc00000;
 815                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
 816
 817                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
 818                data |= 0x2;
 819                data &= ~0x00010000;
 820                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
 821
 822                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
 823                data |= 0x37f;
 824                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
 825
 826                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
 827                data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
 828                        VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
 829                        VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
 830                        0x8;
 831                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
 832        } else {
 833                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
 834                data &= ~0x80010;
 835                data |= 0xe70008;
 836                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
 837
 838                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
 839                data |= 0xffc00000;
 840                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
 841
 842                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
 843                data |= 0x10000;
 844                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
 845
 846                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
 847                data &= ~0xffc00000;
 848                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
 849
 850                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
 851                data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
 852                          VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
 853                          VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
 854                          0x8);
 855                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
 856        }
 857        vce_v4_0_override_vce_clock_gating(adev, false);
 858}
 859
 860static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
 861{
 862        u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
 863
 864        if (enable)
 865                tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
 866        else
 867                tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
 868
 869        WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
 870}
 871
 872static int vce_v4_0_set_clockgating_state(void *handle,
 873                                          enum amd_clockgating_state state)
 874{
 875        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 876        bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
 877        int i;
 878
 879        if ((adev->asic_type == CHIP_POLARIS10) ||
 880                (adev->asic_type == CHIP_TONGA) ||
 881                (adev->asic_type == CHIP_FIJI))
 882                vce_v4_0_set_bypass_mode(adev, enable);
 883
 884        if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
 885                return 0;
 886
 887        mutex_lock(&adev->grbm_idx_mutex);
 888        for (i = 0; i < 2; i++) {
 889                /* Program VCE Instance 0 or 1 if not harvested */
 890                if (adev->vce.harvest_config & (1 << i))
 891                        continue;
 892
 893                WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
 894
 895                if (enable) {
 896                        /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
 897                        uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
 898                        data &= ~(0xf | 0xff0);
 899                        data |= ((0x0 << 0) | (0x04 << 4));
 900                        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
 901
 902                        /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
 903                        data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
 904                        data &= ~(0xf | 0xff0);
 905                        data |= ((0x0 << 0) | (0x04 << 4));
 906                        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
 907                }
 908
 909                vce_v4_0_set_vce_sw_clock_gating(adev, enable);
 910        }
 911
 912        WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
 913        mutex_unlock(&adev->grbm_idx_mutex);
 914
 915        return 0;
 916}
 917
 918static int vce_v4_0_set_powergating_state(void *handle,
 919                                          enum amd_powergating_state state)
 920{
 921        /* This doesn't actually powergate the VCE block.
 922         * That's done in the dpm code via the SMC.  This
 923         * just re-inits the block as necessary.  The actual
 924         * gating still happens in the dpm code.  We should
 925         * revisit this when there is a cleaner line between
 926         * the smc and the hw blocks
 927         */
 928        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 929
 930        if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
 931                return 0;
 932
 933        if (state == AMD_PG_STATE_GATE)
 934                /* XXX do we need a vce_v4_0_stop()? */
 935                return 0;
 936        else
 937                return vce_v4_0_start(adev);
 938}
 939#endif
 940
 941static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
 942                struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
 943{
 944        amdgpu_ring_write(ring, VCE_CMD_IB_VM);
 945        amdgpu_ring_write(ring, vmid);
 946        amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
 947        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
 948        amdgpu_ring_write(ring, ib->length_dw);
 949}
 950
 951static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
 952                        u64 seq, unsigned flags)
 953{
 954        WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
 955
 956        amdgpu_ring_write(ring, VCE_CMD_FENCE);
 957        amdgpu_ring_write(ring, addr);
 958        amdgpu_ring_write(ring, upper_32_bits(addr));
 959        amdgpu_ring_write(ring, seq);
 960        amdgpu_ring_write(ring, VCE_CMD_TRAP);
 961}
 962
 963static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
 964{
 965        amdgpu_ring_write(ring, VCE_CMD_END);
 966}
 967
 968static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
 969                                   uint32_t val, uint32_t mask)
 970{
 971        amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
 972        amdgpu_ring_write(ring, reg << 2);
 973        amdgpu_ring_write(ring, mask);
 974        amdgpu_ring_write(ring, val);
 975}
 976
 977static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
 978                                   unsigned int vmid, uint64_t pd_addr)
 979{
 980        struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 981
 982        pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
 983
 984        /* wait for reg writes */
 985        vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
 986                               lower_32_bits(pd_addr), 0xffffffff);
 987}
 988
 989static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
 990                               uint32_t reg, uint32_t val)
 991{
 992        amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
 993        amdgpu_ring_write(ring, reg << 2);
 994        amdgpu_ring_write(ring, val);
 995}
 996
 997static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
 998                                        struct amdgpu_irq_src *source,
 999                                        unsigned type,
1000                                        enum amdgpu_interrupt_state state)
1001{
1002        uint32_t val = 0;
1003
1004        if (!amdgpu_sriov_vf(adev)) {
1005                if (state == AMDGPU_IRQ_STATE_ENABLE)
1006                        val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1007
1008                WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1009                                ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1010        }
1011        return 0;
1012}
1013
1014static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1015                                      struct amdgpu_irq_src *source,
1016                                      struct amdgpu_iv_entry *entry)
1017{
1018        DRM_DEBUG("IH: VCE\n");
1019
1020        switch (entry->src_data[0]) {
1021        case 0:
1022        case 1:
1023        case 2:
1024                amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1025                break;
1026        default:
1027                DRM_ERROR("Unhandled interrupt: %d %d\n",
1028                          entry->src_id, entry->src_data[0]);
1029                break;
1030        }
1031
1032        return 0;
1033}
1034
1035const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1036        .name = "vce_v4_0",
1037        .early_init = vce_v4_0_early_init,
1038        .late_init = NULL,
1039        .sw_init = vce_v4_0_sw_init,
1040        .sw_fini = vce_v4_0_sw_fini,
1041        .hw_init = vce_v4_0_hw_init,
1042        .hw_fini = vce_v4_0_hw_fini,
1043        .suspend = vce_v4_0_suspend,
1044        .resume = vce_v4_0_resume,
1045        .is_idle = NULL /* vce_v4_0_is_idle */,
1046        .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1047        .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1048        .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1049        .soft_reset = NULL /* vce_v4_0_soft_reset */,
1050        .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1051        .set_clockgating_state = vce_v4_0_set_clockgating_state,
1052        .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */,
1053};
1054
1055static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1056        .type = AMDGPU_RING_TYPE_VCE,
1057        .align_mask = 0x3f,
1058        .nop = VCE_CMD_NO_OP,
1059        .support_64bit_ptrs = false,
1060        .vmhub = AMDGPU_MMHUB,
1061        .get_rptr = vce_v4_0_ring_get_rptr,
1062        .get_wptr = vce_v4_0_ring_get_wptr,
1063        .set_wptr = vce_v4_0_ring_set_wptr,
1064        .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1065        .emit_frame_size =
1066                SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1067                SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1068                4 + /* vce_v4_0_emit_vm_flush */
1069                5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1070                1, /* vce_v4_0_ring_insert_end */
1071        .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1072        .emit_ib = vce_v4_0_ring_emit_ib,
1073        .emit_vm_flush = vce_v4_0_emit_vm_flush,
1074        .emit_fence = vce_v4_0_ring_emit_fence,
1075        .test_ring = amdgpu_vce_ring_test_ring,
1076        .test_ib = amdgpu_vce_ring_test_ib,
1077        .insert_nop = amdgpu_ring_insert_nop,
1078        .insert_end = vce_v4_0_ring_insert_end,
1079        .pad_ib = amdgpu_ring_generic_pad_ib,
1080        .begin_use = amdgpu_vce_ring_begin_use,
1081        .end_use = amdgpu_vce_ring_end_use,
1082        .emit_wreg = vce_v4_0_emit_wreg,
1083        .emit_reg_wait = vce_v4_0_emit_reg_wait,
1084};
1085
1086static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1087{
1088        int i;
1089
1090        for (i = 0; i < adev->vce.num_rings; i++)
1091                adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1092        DRM_INFO("VCE enabled in VM mode\n");
1093}
1094
1095static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1096        .set = vce_v4_0_set_interrupt_state,
1097        .process = vce_v4_0_process_interrupt,
1098};
1099
1100static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1101{
1102        adev->vce.irq.num_types = 1;
1103        adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1104};
1105
1106const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1107{
1108        .type = AMD_IP_BLOCK_TYPE_VCE,
1109        .major = 4,
1110        .minor = 0,
1111        .rev = 0,
1112        .funcs = &vce_v4_0_ip_funcs,
1113};
1114