linux/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
<<
>>
Prefs
   1/*
   2 * Copyright 2016 Advanced Micro Devices, Inc.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the
   7 * "Software"), to deal in the Software without restriction, including
   8 * without limitation the rights to use, copy, modify, merge, publish,
   9 * distribute, sub license, and/or sell copies of the Software, and to
  10 * permit persons to whom the Software is furnished to do so, subject to
  11 * the following conditions:
  12 *
  13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20 *
  21 * The above copyright notice and this permission notice (including the
  22 * next paragraph) shall be included in all copies or substantial portions
  23 * of the Software.
  24 *
  25 */
  26
  27#include <linux/firmware.h>
  28#include <drm/drmP.h>
  29#include "amdgpu.h"
  30#include "amdgpu_vce.h"
  31#include "soc15d.h"
  32#include "soc15_common.h"
  33#include "mmsch_v1_0.h"
  34
  35#include "vega10/soc15ip.h"
  36#include "vega10/VCE/vce_4_0_offset.h"
  37#include "vega10/VCE/vce_4_0_default.h"
  38#include "vega10/VCE/vce_4_0_sh_mask.h"
  39#include "vega10/MMHUB/mmhub_1_0_offset.h"
  40#include "vega10/MMHUB/mmhub_1_0_sh_mask.h"
  41
  42#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK   0x02
  43
  44#define VCE_V4_0_FW_SIZE        (384 * 1024)
  45#define VCE_V4_0_STACK_SIZE     (64 * 1024)
  46#define VCE_V4_0_DATA_SIZE      ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
  47
  48static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
  49static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
  50static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
  51
  52/**
  53 * vce_v4_0_ring_get_rptr - get read pointer
  54 *
  55 * @ring: amdgpu_ring pointer
  56 *
  57 * Returns the current hardware read pointer
  58 */
  59static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
  60{
  61        struct amdgpu_device *adev = ring->adev;
  62
  63        if (ring == &adev->vce.ring[0])
  64                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
  65        else if (ring == &adev->vce.ring[1])
  66                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
  67        else
  68                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
  69}
  70
  71/**
  72 * vce_v4_0_ring_get_wptr - get write pointer
  73 *
  74 * @ring: amdgpu_ring pointer
  75 *
  76 * Returns the current hardware write pointer
  77 */
  78static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
  79{
  80        struct amdgpu_device *adev = ring->adev;
  81
  82        if (ring->use_doorbell)
  83                return adev->wb.wb[ring->wptr_offs];
  84
  85        if (ring == &adev->vce.ring[0])
  86                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
  87        else if (ring == &adev->vce.ring[1])
  88                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
  89        else
  90                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
  91}
  92
  93/**
  94 * vce_v4_0_ring_set_wptr - set write pointer
  95 *
  96 * @ring: amdgpu_ring pointer
  97 *
  98 * Commits the write pointer to the hardware
  99 */
 100static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
 101{
 102        struct amdgpu_device *adev = ring->adev;
 103
 104        if (ring->use_doorbell) {
 105                /* XXX check if swapping is necessary on BE */
 106                adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
 107                WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
 108                return;
 109        }
 110
 111        if (ring == &adev->vce.ring[0])
 112                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
 113                        lower_32_bits(ring->wptr));
 114        else if (ring == &adev->vce.ring[1])
 115                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
 116                        lower_32_bits(ring->wptr));
 117        else
 118                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
 119                        lower_32_bits(ring->wptr));
 120}
 121
 122static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
 123{
 124        int i, j;
 125
 126        for (i = 0; i < 10; ++i) {
 127                for (j = 0; j < 100; ++j) {
 128                        uint32_t status =
 129                                RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
 130
 131                        if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
 132                                return 0;
 133                        mdelay(10);
 134                }
 135
 136                DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
 137                WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
 138                                VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
 139                                ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
 140                mdelay(10);
 141                WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
 142                                ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
 143                mdelay(10);
 144
 145        }
 146
 147        return -ETIMEDOUT;
 148}
 149
 150static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
 151                                struct amdgpu_mm_table *table)
 152{
 153        uint32_t data = 0, loop;
 154        uint64_t addr = table->gpu_addr;
 155        struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
 156        uint32_t size;
 157
 158        size = header->header_size + header->vce_table_size + header->uvd_table_size;
 159
 160        /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
 161        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
 162        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
 163
 164        /* 2, update vmid of descriptor */
 165        data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
 166        data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
 167        data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
 168        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
 169
 170        /* 3, notify mmsch about the size of this descriptor */
 171        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
 172
 173        /* 4, set resp to zero */
 174        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
 175
 176        /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
 177        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
 178
 179        data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
 180        loop = 1000;
 181        while ((data & 0x10000002) != 0x10000002) {
 182                udelay(10);
 183                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
 184                loop--;
 185                if (!loop)
 186                        break;
 187        }
 188
 189        if (!loop) {
 190                dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
 191                return -EBUSY;
 192        }
 193        WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
 194
 195        return 0;
 196}
 197
 198static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
 199{
 200        struct amdgpu_ring *ring;
 201        uint32_t offset, size;
 202        uint32_t table_size = 0;
 203        struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
 204        struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
 205        struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
 206        struct mmsch_v1_0_cmd_end end = { { 0 } };
 207        uint32_t *init_table = adev->virt.mm_table.cpu_addr;
 208        struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
 209
 210        direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
 211        direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
 212        direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
 213        end.cmd_header.command_type = MMSCH_COMMAND__END;
 214
 215        if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
 216                header->version = MMSCH_VERSION;
 217                header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
 218
 219                if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
 220                        header->vce_table_offset = header->header_size;
 221                else
 222                        header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
 223
 224                init_table += header->vce_table_offset;
 225
 226                ring = &adev->vce.ring[0];
 227                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
 228                                            lower_32_bits(ring->gpu_addr));
 229                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
 230                                            upper_32_bits(ring->gpu_addr));
 231                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
 232                                            ring->ring_size / 4);
 233
 234                /* BEGING OF MC_RESUME */
 235                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
 236                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
 237                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
 238                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
 239                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
 240
 241                if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 242                    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
 243                                                adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
 244                    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
 245                                                adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
 246                    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
 247                                                adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
 248                } else {
 249                    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
 250                                                adev->vce.gpu_addr >> 8);
 251                    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
 252                                                adev->vce.gpu_addr >> 8);
 253                    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
 254                                                adev->vce.gpu_addr >> 8);
 255                }
 256
 257                offset = AMDGPU_VCE_FIRMWARE_OFFSET;
 258                size = VCE_V4_0_FW_SIZE;
 259                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
 260                                            offset & 0x7FFFFFFF);
 261                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
 262
 263                offset += size;
 264                size = VCE_V4_0_STACK_SIZE;
 265                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
 266                                            offset & 0x7FFFFFFF);
 267                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
 268
 269                offset += size;
 270                size = VCE_V4_0_DATA_SIZE;
 271                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
 272                                            offset & 0x7FFFFFFF);
 273                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
 274
 275                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
 276                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
 277                                                   0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
 278
 279                /* end of MC_RESUME */
 280                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
 281                                                   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
 282                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
 283                                                   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
 284                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
 285                                                   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
 286
 287                MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
 288                                              VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
 289                                              VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
 290
 291                /* clear BUSY flag */
 292                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
 293                                                   ~VCE_STATUS__JOB_BUSY_MASK, 0);
 294
 295                /* add end packet */
 296                memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
 297                table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
 298                header->vce_table_size = table_size;
 299
 300                return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
 301        }
 302
 303        return -EINVAL; /* already initializaed ? */
 304}
 305
 306/**
 307 * vce_v4_0_start - start VCE block
 308 *
 309 * @adev: amdgpu_device pointer
 310 *
 311 * Setup and start the VCE block
 312 */
 313static int vce_v4_0_start(struct amdgpu_device *adev)
 314{
 315        struct amdgpu_ring *ring;
 316        int r;
 317
 318        ring = &adev->vce.ring[0];
 319
 320        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
 321        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
 322        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
 323        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
 324        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
 325
 326        ring = &adev->vce.ring[1];
 327
 328        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
 329        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
 330        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
 331        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
 332        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
 333
 334        ring = &adev->vce.ring[2];
 335
 336        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
 337        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
 338        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
 339        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
 340        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
 341
 342        vce_v4_0_mc_resume(adev);
 343        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
 344                        ~VCE_STATUS__JOB_BUSY_MASK);
 345
 346        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
 347
 348        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
 349                        ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
 350        mdelay(100);
 351
 352        r = vce_v4_0_firmware_loaded(adev);
 353
 354        /* clear BUSY flag */
 355        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
 356
 357        if (r) {
 358                DRM_ERROR("VCE not responding, giving up!!!\n");
 359                return r;
 360        }
 361
 362        return 0;
 363}
 364
 365static int vce_v4_0_stop(struct amdgpu_device *adev)
 366{
 367
 368        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
 369
 370        /* hold on ECPU */
 371        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
 372                        VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
 373                        ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
 374
 375        /* clear BUSY flag */
 376        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
 377
 378        /* Set Clock-Gating off */
 379        /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
 380                vce_v4_0_set_vce_sw_clock_gating(adev, false);
 381        */
 382
 383        return 0;
 384}
 385
 386static int vce_v4_0_early_init(void *handle)
 387{
 388        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 389
 390        if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
 391                adev->vce.num_rings = 1;
 392        else
 393                adev->vce.num_rings = 3;
 394
 395        vce_v4_0_set_ring_funcs(adev);
 396        vce_v4_0_set_irq_funcs(adev);
 397
 398        return 0;
 399}
 400
 401static int vce_v4_0_sw_init(void *handle)
 402{
 403        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 404        struct amdgpu_ring *ring;
 405        unsigned size;
 406        int r, i;
 407
 408        r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
 409        if (r)
 410                return r;
 411
 412        size  = (VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE) * 2;
 413        if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
 414                size += VCE_V4_0_FW_SIZE;
 415
 416        r = amdgpu_vce_sw_init(adev, size);
 417        if (r)
 418                return r;
 419
 420        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 421                const struct common_firmware_header *hdr;
 422                unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
 423
 424                adev->vce.saved_bo = kmalloc(size, GFP_KERNEL);
 425                if (!adev->vce.saved_bo)
 426                        return -ENOMEM;
 427
 428                hdr = (const struct common_firmware_header *)adev->vce.fw->data;
 429                adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
 430                adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
 431                adev->firmware.fw_size +=
 432                        ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
 433                DRM_INFO("PSP loading VCE firmware\n");
 434        } else {
 435                r = amdgpu_vce_resume(adev);
 436                if (r)
 437                        return r;
 438        }
 439
 440        for (i = 0; i < adev->vce.num_rings; i++) {
 441                ring = &adev->vce.ring[i];
 442                sprintf(ring->name, "vce%d", i);
 443                if (amdgpu_sriov_vf(adev)) {
 444                        /* DOORBELL only works under SRIOV */
 445                        ring->use_doorbell = true;
 446                        if (i == 0)
 447                                ring->doorbell_index = AMDGPU_DOORBELL64_RING0_1 * 2;
 448                        else if (i == 1)
 449                                ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2;
 450                        else
 451                                ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2 + 1;
 452                }
 453                r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
 454                if (r)
 455                        return r;
 456        }
 457
 458        r = amdgpu_virt_alloc_mm_table(adev);
 459        if (r)
 460                return r;
 461
 462        return r;
 463}
 464
 465static int vce_v4_0_sw_fini(void *handle)
 466{
 467        int r;
 468        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 469
 470        /* free MM table */
 471        amdgpu_virt_free_mm_table(adev);
 472
 473        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 474                kfree(adev->vce.saved_bo);
 475                adev->vce.saved_bo = NULL;
 476        }
 477
 478        r = amdgpu_vce_suspend(adev);
 479        if (r)
 480                return r;
 481
 482        return amdgpu_vce_sw_fini(adev);
 483}
 484
 485static int vce_v4_0_hw_init(void *handle)
 486{
 487        int r, i;
 488        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 489
 490        if (amdgpu_sriov_vf(adev))
 491                r = vce_v4_0_sriov_start(adev);
 492        else
 493                r = vce_v4_0_start(adev);
 494        if (r)
 495                return r;
 496
 497        for (i = 0; i < adev->vce.num_rings; i++)
 498                adev->vce.ring[i].ready = false;
 499
 500        for (i = 0; i < adev->vce.num_rings; i++) {
 501                r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
 502                if (r)
 503                        return r;
 504                else
 505                        adev->vce.ring[i].ready = true;
 506        }
 507
 508        DRM_INFO("VCE initialized successfully.\n");
 509
 510        return 0;
 511}
 512
 513static int vce_v4_0_hw_fini(void *handle)
 514{
 515        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 516        int i;
 517
 518        if (!amdgpu_sriov_vf(adev)) {
 519                /* vce_v4_0_wait_for_idle(handle); */
 520                vce_v4_0_stop(adev);
 521        } else {
 522                /* full access mode, so don't touch any VCE register */
 523                DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
 524        }
 525
 526        for (i = 0; i < adev->vce.num_rings; i++)
 527                adev->vce.ring[i].ready = false;
 528
 529        return 0;
 530}
 531
 532static int vce_v4_0_suspend(void *handle)
 533{
 534        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 535        int r;
 536
 537        if (adev->vce.vcpu_bo == NULL)
 538                return 0;
 539
 540        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 541                unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
 542                void *ptr = adev->vce.cpu_addr;
 543
 544                memcpy_fromio(adev->vce.saved_bo, ptr, size);
 545        }
 546
 547        r = vce_v4_0_hw_fini(adev);
 548        if (r)
 549                return r;
 550
 551        return amdgpu_vce_suspend(adev);
 552}
 553
 554static int vce_v4_0_resume(void *handle)
 555{
 556        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 557        int r;
 558
 559        if (adev->vce.vcpu_bo == NULL)
 560                return -EINVAL;
 561
 562        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 563                unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
 564                void *ptr = adev->vce.cpu_addr;
 565
 566                memcpy_toio(ptr, adev->vce.saved_bo, size);
 567        } else {
 568                r = amdgpu_vce_resume(adev);
 569                if (r)
 570                        return r;
 571        }
 572
 573        return vce_v4_0_hw_init(adev);
 574}
 575
 576static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
 577{
 578        uint32_t offset, size;
 579
 580        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
 581        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
 582        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
 583        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
 584
 585        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
 586        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
 587        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
 588        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
 589        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
 590
 591        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 592                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
 593                        (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8));
 594                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
 595                        (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
 596        } else {
 597                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
 598                        (adev->vce.gpu_addr >> 8));
 599                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
 600                        (adev->vce.gpu_addr >> 40) & 0xff);
 601        }
 602
 603        offset = AMDGPU_VCE_FIRMWARE_OFFSET;
 604        size = VCE_V4_0_FW_SIZE;
 605        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
 606        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
 607
 608        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
 609        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
 610        offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
 611        size = VCE_V4_0_STACK_SIZE;
 612        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
 613        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
 614
 615        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
 616        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
 617        offset += size;
 618        size = VCE_V4_0_DATA_SIZE;
 619        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
 620        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
 621
 622        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
 623        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
 624                        VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
 625                        ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
 626}
 627
 628static int vce_v4_0_set_clockgating_state(void *handle,
 629                                          enum amd_clockgating_state state)
 630{
 631        /* needed for driver unload*/
 632        return 0;
 633}
 634
 635#if 0
 636static bool vce_v4_0_is_idle(void *handle)
 637{
 638        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 639        u32 mask = 0;
 640
 641        mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
 642        mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
 643
 644        return !(RREG32(mmSRBM_STATUS2) & mask);
 645}
 646
 647static int vce_v4_0_wait_for_idle(void *handle)
 648{
 649        unsigned i;
 650        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 651
 652        for (i = 0; i < adev->usec_timeout; i++)
 653                if (vce_v4_0_is_idle(handle))
 654                        return 0;
 655
 656        return -ETIMEDOUT;
 657}
 658
 659#define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
 660#define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
 661#define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
 662#define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
 663                                      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
 664
 665static bool vce_v4_0_check_soft_reset(void *handle)
 666{
 667        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 668        u32 srbm_soft_reset = 0;
 669
 670        /* According to VCE team , we should use VCE_STATUS instead
 671         * SRBM_STATUS.VCE_BUSY bit for busy status checking.
 672         * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
 673         * instance's registers are accessed
 674         * (0 for 1st instance, 10 for 2nd instance).
 675         *
 676         *VCE_STATUS
 677         *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
 678         *|----+----+-----------+----+----+----+----------+---------+----|
 679         *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
 680         *
 681         * VCE team suggest use bit 3--bit 6 for busy status check
 682         */
 683        mutex_lock(&adev->grbm_idx_mutex);
 684        WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
 685        if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
 686                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
 687                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
 688        }
 689        WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
 690        if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
 691                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
 692                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
 693        }
 694        WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
 695        mutex_unlock(&adev->grbm_idx_mutex);
 696
 697        if (srbm_soft_reset) {
 698                adev->vce.srbm_soft_reset = srbm_soft_reset;
 699                return true;
 700        } else {
 701                adev->vce.srbm_soft_reset = 0;
 702                return false;
 703        }
 704}
 705
 706static int vce_v4_0_soft_reset(void *handle)
 707{
 708        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 709        u32 srbm_soft_reset;
 710
 711        if (!adev->vce.srbm_soft_reset)
 712                return 0;
 713        srbm_soft_reset = adev->vce.srbm_soft_reset;
 714
 715        if (srbm_soft_reset) {
 716                u32 tmp;
 717
 718                tmp = RREG32(mmSRBM_SOFT_RESET);
 719                tmp |= srbm_soft_reset;
 720                dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
 721                WREG32(mmSRBM_SOFT_RESET, tmp);
 722                tmp = RREG32(mmSRBM_SOFT_RESET);
 723
 724                udelay(50);
 725
 726                tmp &= ~srbm_soft_reset;
 727                WREG32(mmSRBM_SOFT_RESET, tmp);
 728                tmp = RREG32(mmSRBM_SOFT_RESET);
 729
 730                /* Wait a little for things to settle down */
 731                udelay(50);
 732        }
 733
 734        return 0;
 735}
 736
 737static int vce_v4_0_pre_soft_reset(void *handle)
 738{
 739        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 740
 741        if (!adev->vce.srbm_soft_reset)
 742                return 0;
 743
 744        mdelay(5);
 745
 746        return vce_v4_0_suspend(adev);
 747}
 748
 749
 750static int vce_v4_0_post_soft_reset(void *handle)
 751{
 752        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 753
 754        if (!adev->vce.srbm_soft_reset)
 755                return 0;
 756
 757        mdelay(5);
 758
 759        return vce_v4_0_resume(adev);
 760}
 761
 762static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
 763{
 764        u32 tmp, data;
 765
 766        tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
 767        if (override)
 768                data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
 769        else
 770                data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
 771
 772        if (tmp != data)
 773                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
 774}
 775
 776static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
 777                                             bool gated)
 778{
 779        u32 data;
 780
 781        /* Set Override to disable Clock Gating */
 782        vce_v4_0_override_vce_clock_gating(adev, true);
 783
 784        /* This function enables MGCG which is controlled by firmware.
 785           With the clocks in the gated state the core is still
 786           accessible but the firmware will throttle the clocks on the
 787           fly as necessary.
 788        */
 789        if (gated) {
 790                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
 791                data |= 0x1ff;
 792                data &= ~0xef0000;
 793                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
 794
 795                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
 796                data |= 0x3ff000;
 797                data &= ~0xffc00000;
 798                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
 799
 800                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
 801                data |= 0x2;
 802                data &= ~0x00010000;
 803                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
 804
 805                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
 806                data |= 0x37f;
 807                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
 808
 809                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
 810                data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
 811                        VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
 812                        VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
 813                        0x8;
 814                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
 815        } else {
 816                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
 817                data &= ~0x80010;
 818                data |= 0xe70008;
 819                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
 820
 821                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
 822                data |= 0xffc00000;
 823                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
 824
 825                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
 826                data |= 0x10000;
 827                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
 828
 829                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
 830                data &= ~0xffc00000;
 831                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
 832
 833                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
 834                data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
 835                          VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
 836                          VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
 837                          0x8);
 838                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
 839        }
 840        vce_v4_0_override_vce_clock_gating(adev, false);
 841}
 842
 843static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
 844{
 845        u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
 846
 847        if (enable)
 848                tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
 849        else
 850                tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
 851
 852        WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
 853}
 854
 855static int vce_v4_0_set_clockgating_state(void *handle,
 856                                          enum amd_clockgating_state state)
 857{
 858        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 859        bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
 860        int i;
 861
 862        if ((adev->asic_type == CHIP_POLARIS10) ||
 863                (adev->asic_type == CHIP_TONGA) ||
 864                (adev->asic_type == CHIP_FIJI))
 865                vce_v4_0_set_bypass_mode(adev, enable);
 866
 867        if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
 868                return 0;
 869
 870        mutex_lock(&adev->grbm_idx_mutex);
 871        for (i = 0; i < 2; i++) {
 872                /* Program VCE Instance 0 or 1 if not harvested */
 873                if (adev->vce.harvest_config & (1 << i))
 874                        continue;
 875
 876                WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
 877
 878                if (enable) {
 879                        /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
 880                        uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
 881                        data &= ~(0xf | 0xff0);
 882                        data |= ((0x0 << 0) | (0x04 << 4));
 883                        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
 884
 885                        /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
 886                        data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
 887                        data &= ~(0xf | 0xff0);
 888                        data |= ((0x0 << 0) | (0x04 << 4));
 889                        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
 890                }
 891
 892                vce_v4_0_set_vce_sw_clock_gating(adev, enable);
 893        }
 894
 895        WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
 896        mutex_unlock(&adev->grbm_idx_mutex);
 897
 898        return 0;
 899}
 900
 901static int vce_v4_0_set_powergating_state(void *handle,
 902                                          enum amd_powergating_state state)
 903{
 904        /* This doesn't actually powergate the VCE block.
 905         * That's done in the dpm code via the SMC.  This
 906         * just re-inits the block as necessary.  The actual
 907         * gating still happens in the dpm code.  We should
 908         * revisit this when there is a cleaner line between
 909         * the smc and the hw blocks
 910         */
 911        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 912
 913        if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
 914                return 0;
 915
 916        if (state == AMD_PG_STATE_GATE)
 917                /* XXX do we need a vce_v4_0_stop()? */
 918                return 0;
 919        else
 920                return vce_v4_0_start(adev);
 921}
 922#endif
 923
 924static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
 925                struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
 926{
 927        amdgpu_ring_write(ring, VCE_CMD_IB_VM);
 928        amdgpu_ring_write(ring, vm_id);
 929        amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
 930        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
 931        amdgpu_ring_write(ring, ib->length_dw);
 932}
 933
 934static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
 935                        u64 seq, unsigned flags)
 936{
 937        WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
 938
 939        amdgpu_ring_write(ring, VCE_CMD_FENCE);
 940        amdgpu_ring_write(ring, addr);
 941        amdgpu_ring_write(ring, upper_32_bits(addr));
 942        amdgpu_ring_write(ring, seq);
 943        amdgpu_ring_write(ring, VCE_CMD_TRAP);
 944}
 945
 946static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
 947{
 948        amdgpu_ring_write(ring, VCE_CMD_END);
 949}
 950
 951static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
 952                         unsigned int vm_id, uint64_t pd_addr)
 953{
 954        struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 955        uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 956        unsigned eng = ring->vm_inv_eng;
 957
 958        pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
 959        pd_addr |= AMDGPU_PTE_VALID;
 960
 961        amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
 962        amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
 963        amdgpu_ring_write(ring, upper_32_bits(pd_addr));
 964
 965        amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
 966        amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
 967        amdgpu_ring_write(ring, lower_32_bits(pd_addr));
 968
 969        amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
 970        amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
 971        amdgpu_ring_write(ring, 0xffffffff);
 972        amdgpu_ring_write(ring, lower_32_bits(pd_addr));
 973
 974        /* flush TLB */
 975        amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
 976        amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2);
 977        amdgpu_ring_write(ring, req);
 978
 979        /* wait for flush */
 980        amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
 981        amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
 982        amdgpu_ring_write(ring, 1 << vm_id);
 983        amdgpu_ring_write(ring, 1 << vm_id);
 984}
 985
 986static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
 987                                        struct amdgpu_irq_src *source,
 988                                        unsigned type,
 989                                        enum amdgpu_interrupt_state state)
 990{
 991        uint32_t val = 0;
 992
 993        if (state == AMDGPU_IRQ_STATE_ENABLE)
 994                val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
 995
 996        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
 997                        ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
 998        return 0;
 999}
1000
1001static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1002                                      struct amdgpu_irq_src *source,
1003                                      struct amdgpu_iv_entry *entry)
1004{
1005        DRM_DEBUG("IH: VCE\n");
1006
1007        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_STATUS),
1008                        VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK,
1009                        ~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK);
1010
1011        switch (entry->src_data[0]) {
1012        case 0:
1013        case 1:
1014        case 2:
1015                amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1016                break;
1017        default:
1018                DRM_ERROR("Unhandled interrupt: %d %d\n",
1019                          entry->src_id, entry->src_data[0]);
1020                break;
1021        }
1022
1023        return 0;
1024}
1025
1026const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1027        .name = "vce_v4_0",
1028        .early_init = vce_v4_0_early_init,
1029        .late_init = NULL,
1030        .sw_init = vce_v4_0_sw_init,
1031        .sw_fini = vce_v4_0_sw_fini,
1032        .hw_init = vce_v4_0_hw_init,
1033        .hw_fini = vce_v4_0_hw_fini,
1034        .suspend = vce_v4_0_suspend,
1035        .resume = vce_v4_0_resume,
1036        .is_idle = NULL /* vce_v4_0_is_idle */,
1037        .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1038        .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1039        .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1040        .soft_reset = NULL /* vce_v4_0_soft_reset */,
1041        .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1042        .set_clockgating_state = vce_v4_0_set_clockgating_state,
1043        .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */,
1044};
1045
1046static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1047        .type = AMDGPU_RING_TYPE_VCE,
1048        .align_mask = 0x3f,
1049        .nop = VCE_CMD_NO_OP,
1050        .support_64bit_ptrs = false,
1051        .vmhub = AMDGPU_MMHUB,
1052        .get_rptr = vce_v4_0_ring_get_rptr,
1053        .get_wptr = vce_v4_0_ring_get_wptr,
1054        .set_wptr = vce_v4_0_ring_set_wptr,
1055        .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1056        .emit_frame_size =
1057                17 + /* vce_v4_0_emit_vm_flush */
1058                5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1059                1, /* vce_v4_0_ring_insert_end */
1060        .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1061        .emit_ib = vce_v4_0_ring_emit_ib,
1062        .emit_vm_flush = vce_v4_0_emit_vm_flush,
1063        .emit_fence = vce_v4_0_ring_emit_fence,
1064        .test_ring = amdgpu_vce_ring_test_ring,
1065        .test_ib = amdgpu_vce_ring_test_ib,
1066        .insert_nop = amdgpu_ring_insert_nop,
1067        .insert_end = vce_v4_0_ring_insert_end,
1068        .pad_ib = amdgpu_ring_generic_pad_ib,
1069        .begin_use = amdgpu_vce_ring_begin_use,
1070        .end_use = amdgpu_vce_ring_end_use,
1071};
1072
1073static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1074{
1075        int i;
1076
1077        for (i = 0; i < adev->vce.num_rings; i++)
1078                adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1079        DRM_INFO("VCE enabled in VM mode\n");
1080}
1081
1082static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1083        .set = vce_v4_0_set_interrupt_state,
1084        .process = vce_v4_0_process_interrupt,
1085};
1086
1087static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1088{
1089        adev->vce.irq.num_types = 1;
1090        adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1091};
1092
1093const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1094{
1095        .type = AMD_IP_BLOCK_TYPE_VCE,
1096        .major = 4,
1097        .minor = 0,
1098        .rev = 0,
1099        .funcs = &vce_v4_0_ip_funcs,
1100};
1101