linux/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
<<
>>
Prefs
   1/*
   2 * Copyright 2016 Advanced Micro Devices, Inc.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the
   7 * "Software"), to deal in the Software without restriction, including
   8 * without limitation the rights to use, copy, modify, merge, publish,
   9 * distribute, sub license, and/or sell copies of the Software, and to
  10 * permit persons to whom the Software is furnished to do so, subject to
  11 * the following conditions:
  12 *
  13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20 *
  21 * The above copyright notice and this permission notice (including the
  22 * next paragraph) shall be included in all copies or substantial portions
  23 * of the Software.
  24 *
  25 */
  26
  27#include <linux/firmware.h>
  28#include <drm/drmP.h>
  29#include "amdgpu.h"
  30#include "amdgpu_vce.h"
  31#include "soc15d.h"
  32#include "soc15_common.h"
  33#include "mmsch_v1_0.h"
  34
  35#include "vega10/soc15ip.h"
  36#include "vega10/VCE/vce_4_0_offset.h"
  37#include "vega10/VCE/vce_4_0_default.h"
  38#include "vega10/VCE/vce_4_0_sh_mask.h"
  39#include "vega10/MMHUB/mmhub_1_0_offset.h"
  40#include "vega10/MMHUB/mmhub_1_0_sh_mask.h"
  41
  42#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK   0x02
  43
  44#define VCE_V4_0_FW_SIZE        (384 * 1024)
  45#define VCE_V4_0_STACK_SIZE     (64 * 1024)
  46#define VCE_V4_0_DATA_SIZE      ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
  47
  48static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
  49static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
  50static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
  51
  52/**
  53 * vce_v4_0_ring_get_rptr - get read pointer
  54 *
  55 * @ring: amdgpu_ring pointer
  56 *
  57 * Returns the current hardware read pointer
  58 */
  59static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
  60{
  61        struct amdgpu_device *adev = ring->adev;
  62
  63        if (ring == &adev->vce.ring[0])
  64                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
  65        else if (ring == &adev->vce.ring[1])
  66                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
  67        else
  68                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
  69}
  70
  71/**
  72 * vce_v4_0_ring_get_wptr - get write pointer
  73 *
  74 * @ring: amdgpu_ring pointer
  75 *
  76 * Returns the current hardware write pointer
  77 */
  78static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
  79{
  80        struct amdgpu_device *adev = ring->adev;
  81
  82        if (ring->use_doorbell)
  83                return adev->wb.wb[ring->wptr_offs];
  84
  85        if (ring == &adev->vce.ring[0])
  86                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
  87        else if (ring == &adev->vce.ring[1])
  88                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
  89        else
  90                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
  91}
  92
  93/**
  94 * vce_v4_0_ring_set_wptr - set write pointer
  95 *
  96 * @ring: amdgpu_ring pointer
  97 *
  98 * Commits the write pointer to the hardware
  99 */
 100static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
 101{
 102        struct amdgpu_device *adev = ring->adev;
 103
 104        if (ring->use_doorbell) {
 105                /* XXX check if swapping is necessary on BE */
 106                adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
 107                WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
 108                return;
 109        }
 110
 111        if (ring == &adev->vce.ring[0])
 112                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
 113                        lower_32_bits(ring->wptr));
 114        else if (ring == &adev->vce.ring[1])
 115                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
 116                        lower_32_bits(ring->wptr));
 117        else
 118                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
 119                        lower_32_bits(ring->wptr));
 120}
 121
 122static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
 123{
 124        int i, j;
 125
 126        for (i = 0; i < 10; ++i) {
 127                for (j = 0; j < 100; ++j) {
 128                        uint32_t status =
 129                                RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
 130
 131                        if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
 132                                return 0;
 133                        mdelay(10);
 134                }
 135
 136                DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
 137                WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
 138                                VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
 139                                ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
 140                mdelay(10);
 141                WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
 142                                ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
 143                mdelay(10);
 144
 145        }
 146
 147        return -ETIMEDOUT;
 148}
 149
 150static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
 151                                struct amdgpu_mm_table *table)
 152{
 153        uint32_t data = 0, loop;
 154        uint64_t addr = table->gpu_addr;
 155        struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
 156        uint32_t size;
 157
 158        size = header->header_size + header->vce_table_size + header->uvd_table_size;
 159
 160        /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
 161        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
 162        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
 163
 164        /* 2, update vmid of descriptor */
 165        data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
 166        data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
 167        data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
 168        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
 169
 170        /* 3, notify mmsch about the size of this descriptor */
 171        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
 172
 173        /* 4, set resp to zero */
 174        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
 175
 176        WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
 177        adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
 178        adev->vce.ring[0].wptr = 0;
 179        adev->vce.ring[0].wptr_old = 0;
 180
 181        /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
 182        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
 183
 184        data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
 185        loop = 1000;
 186        while ((data & 0x10000002) != 0x10000002) {
 187                udelay(10);
 188                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
 189                loop--;
 190                if (!loop)
 191                        break;
 192        }
 193
 194        if (!loop) {
 195                dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
 196                return -EBUSY;
 197        }
 198
 199        return 0;
 200}
 201
 202static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
 203{
 204        struct amdgpu_ring *ring;
 205        uint32_t offset, size;
 206        uint32_t table_size = 0;
 207        struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
 208        struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
 209        struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
 210        struct mmsch_v1_0_cmd_end end = { { 0 } };
 211        uint32_t *init_table = adev->virt.mm_table.cpu_addr;
 212        struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
 213
 214        direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
 215        direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
 216        direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
 217        end.cmd_header.command_type = MMSCH_COMMAND__END;
 218
 219        if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
 220                header->version = MMSCH_VERSION;
 221                header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
 222
 223                if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
 224                        header->vce_table_offset = header->header_size;
 225                else
 226                        header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
 227
 228                init_table += header->vce_table_offset;
 229
 230                ring = &adev->vce.ring[0];
 231                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
 232                                            lower_32_bits(ring->gpu_addr));
 233                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
 234                                            upper_32_bits(ring->gpu_addr));
 235                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
 236                                            ring->ring_size / 4);
 237
 238                /* BEGING OF MC_RESUME */
 239                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
 240                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
 241                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
 242                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
 243                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
 244
 245                if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 246                    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
 247                                                adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
 248                    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
 249                                                adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
 250                    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
 251                                                adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
 252                } else {
 253                    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
 254                                                adev->vce.gpu_addr >> 8);
 255                    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
 256                                                adev->vce.gpu_addr >> 8);
 257                    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
 258                                                adev->vce.gpu_addr >> 8);
 259                }
 260
 261                offset = AMDGPU_VCE_FIRMWARE_OFFSET;
 262                size = VCE_V4_0_FW_SIZE;
 263                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
 264                                            offset & 0x7FFFFFFF);
 265                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
 266
 267                offset += size;
 268                size = VCE_V4_0_STACK_SIZE;
 269                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
 270                                            offset & 0x7FFFFFFF);
 271                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
 272
 273                offset += size;
 274                size = VCE_V4_0_DATA_SIZE;
 275                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
 276                                            offset & 0x7FFFFFFF);
 277                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
 278
 279                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
 280                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
 281                                                   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
 282                                                   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
 283
 284                /* end of MC_RESUME */
 285                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
 286                                                   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
 287                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
 288                                                   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
 289                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
 290                                                   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
 291
 292                MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
 293                                              VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
 294                                              VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
 295
 296                /* clear BUSY flag */
 297                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
 298                                                   ~VCE_STATUS__JOB_BUSY_MASK, 0);
 299
 300                /* add end packet */
 301                memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
 302                table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
 303                header->vce_table_size = table_size;
 304        }
 305
 306        return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
 307}
 308
 309/**
 310 * vce_v4_0_start - start VCE block
 311 *
 312 * @adev: amdgpu_device pointer
 313 *
 314 * Setup and start the VCE block
 315 */
 316static int vce_v4_0_start(struct amdgpu_device *adev)
 317{
 318        struct amdgpu_ring *ring;
 319        int r;
 320
 321        ring = &adev->vce.ring[0];
 322
 323        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
 324        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
 325        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
 326        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
 327        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
 328
 329        ring = &adev->vce.ring[1];
 330
 331        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
 332        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
 333        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
 334        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
 335        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
 336
 337        ring = &adev->vce.ring[2];
 338
 339        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
 340        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
 341        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
 342        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
 343        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
 344
 345        vce_v4_0_mc_resume(adev);
 346        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
 347                        ~VCE_STATUS__JOB_BUSY_MASK);
 348
 349        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
 350
 351        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
 352                        ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
 353        mdelay(100);
 354
 355        r = vce_v4_0_firmware_loaded(adev);
 356
 357        /* clear BUSY flag */
 358        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
 359
 360        if (r) {
 361                DRM_ERROR("VCE not responding, giving up!!!\n");
 362                return r;
 363        }
 364
 365        return 0;
 366}
 367
 368static int vce_v4_0_stop(struct amdgpu_device *adev)
 369{
 370
 371        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
 372
 373        /* hold on ECPU */
 374        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
 375                        VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
 376                        ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
 377
 378        /* clear BUSY flag */
 379        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
 380
 381        /* Set Clock-Gating off */
 382        /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
 383                vce_v4_0_set_vce_sw_clock_gating(adev, false);
 384        */
 385
 386        return 0;
 387}
 388
 389static int vce_v4_0_early_init(void *handle)
 390{
 391        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 392
 393        if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
 394                adev->vce.num_rings = 1;
 395        else
 396                adev->vce.num_rings = 3;
 397
 398        vce_v4_0_set_ring_funcs(adev);
 399        vce_v4_0_set_irq_funcs(adev);
 400
 401        return 0;
 402}
 403
 404static int vce_v4_0_sw_init(void *handle)
 405{
 406        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 407        struct amdgpu_ring *ring;
 408        unsigned size;
 409        int r, i;
 410
 411        r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
 412        if (r)
 413                return r;
 414
 415        size  = (VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE) * 2;
 416        if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
 417                size += VCE_V4_0_FW_SIZE;
 418
 419        r = amdgpu_vce_sw_init(adev, size);
 420        if (r)
 421                return r;
 422
 423        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 424                const struct common_firmware_header *hdr;
 425                unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
 426
 427                adev->vce.saved_bo = kmalloc(size, GFP_KERNEL);
 428                if (!adev->vce.saved_bo)
 429                        return -ENOMEM;
 430
 431                hdr = (const struct common_firmware_header *)adev->vce.fw->data;
 432                adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
 433                adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
 434                adev->firmware.fw_size +=
 435                        ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
 436                DRM_INFO("PSP loading VCE firmware\n");
 437        } else {
 438                r = amdgpu_vce_resume(adev);
 439                if (r)
 440                        return r;
 441        }
 442
 443        for (i = 0; i < adev->vce.num_rings; i++) {
 444                ring = &adev->vce.ring[i];
 445                sprintf(ring->name, "vce%d", i);
 446                if (amdgpu_sriov_vf(adev)) {
 447                        /* DOORBELL only works under SRIOV */
 448                        ring->use_doorbell = true;
 449
 450                        /* currently only use the first encoding ring for sriov,
 451                         * so set unused location for other unused rings.
 452                         */
 453                        if (i == 0)
 454                                ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING0_1 * 2;
 455                        else
 456                                ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING2_3 * 2 + 1;
 457                }
 458                r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
 459                if (r)
 460                        return r;
 461        }
 462
 463        r = amdgpu_virt_alloc_mm_table(adev);
 464        if (r)
 465                return r;
 466
 467        return r;
 468}
 469
 470static int vce_v4_0_sw_fini(void *handle)
 471{
 472        int r;
 473        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 474
 475        /* free MM table */
 476        amdgpu_virt_free_mm_table(adev);
 477
 478        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 479                kfree(adev->vce.saved_bo);
 480                adev->vce.saved_bo = NULL;
 481        }
 482
 483        r = amdgpu_vce_suspend(adev);
 484        if (r)
 485                return r;
 486
 487        return amdgpu_vce_sw_fini(adev);
 488}
 489
 490static int vce_v4_0_hw_init(void *handle)
 491{
 492        int r, i;
 493        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 494
 495        if (amdgpu_sriov_vf(adev))
 496                r = vce_v4_0_sriov_start(adev);
 497        else
 498                r = vce_v4_0_start(adev);
 499        if (r)
 500                return r;
 501
 502        for (i = 0; i < adev->vce.num_rings; i++)
 503                adev->vce.ring[i].ready = false;
 504
 505        for (i = 0; i < adev->vce.num_rings; i++) {
 506                r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
 507                if (r)
 508                        return r;
 509                else
 510                        adev->vce.ring[i].ready = true;
 511        }
 512
 513        DRM_INFO("VCE initialized successfully.\n");
 514
 515        return 0;
 516}
 517
 518static int vce_v4_0_hw_fini(void *handle)
 519{
 520        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 521        int i;
 522
 523        if (!amdgpu_sriov_vf(adev)) {
 524                /* vce_v4_0_wait_for_idle(handle); */
 525                vce_v4_0_stop(adev);
 526        } else {
 527                /* full access mode, so don't touch any VCE register */
 528                DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
 529        }
 530
 531        for (i = 0; i < adev->vce.num_rings; i++)
 532                adev->vce.ring[i].ready = false;
 533
 534        return 0;
 535}
 536
 537static int vce_v4_0_suspend(void *handle)
 538{
 539        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 540        int r;
 541
 542        if (adev->vce.vcpu_bo == NULL)
 543                return 0;
 544
 545        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 546                unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
 547                void *ptr = adev->vce.cpu_addr;
 548
 549                memcpy_fromio(adev->vce.saved_bo, ptr, size);
 550        }
 551
 552        r = vce_v4_0_hw_fini(adev);
 553        if (r)
 554                return r;
 555
 556        return amdgpu_vce_suspend(adev);
 557}
 558
 559static int vce_v4_0_resume(void *handle)
 560{
 561        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 562        int r;
 563
 564        if (adev->vce.vcpu_bo == NULL)
 565                return -EINVAL;
 566
 567        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 568                unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
 569                void *ptr = adev->vce.cpu_addr;
 570
 571                memcpy_toio(ptr, adev->vce.saved_bo, size);
 572        } else {
 573                r = amdgpu_vce_resume(adev);
 574                if (r)
 575                        return r;
 576        }
 577
 578        return vce_v4_0_hw_init(adev);
 579}
 580
 581static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
 582{
 583        uint32_t offset, size;
 584
 585        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
 586        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
 587        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
 588        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
 589
 590        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
 591        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
 592        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
 593        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
 594        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
 595
 596        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 597                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
 598                        (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8));
 599                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
 600                        (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
 601        } else {
 602                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
 603                        (adev->vce.gpu_addr >> 8));
 604                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
 605                        (adev->vce.gpu_addr >> 40) & 0xff);
 606        }
 607
 608        offset = AMDGPU_VCE_FIRMWARE_OFFSET;
 609        size = VCE_V4_0_FW_SIZE;
 610        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
 611        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
 612
 613        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
 614        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
 615        offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
 616        size = VCE_V4_0_STACK_SIZE;
 617        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
 618        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
 619
 620        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
 621        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
 622        offset += size;
 623        size = VCE_V4_0_DATA_SIZE;
 624        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
 625        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
 626
 627        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
 628        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
 629                        VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
 630                        ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
 631}
 632
 633static int vce_v4_0_set_clockgating_state(void *handle,
 634                                          enum amd_clockgating_state state)
 635{
 636        /* needed for driver unload*/
 637        return 0;
 638}
 639
 640#if 0
 641static bool vce_v4_0_is_idle(void *handle)
 642{
 643        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 644        u32 mask = 0;
 645
 646        mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
 647        mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
 648
 649        return !(RREG32(mmSRBM_STATUS2) & mask);
 650}
 651
 652static int vce_v4_0_wait_for_idle(void *handle)
 653{
 654        unsigned i;
 655        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 656
 657        for (i = 0; i < adev->usec_timeout; i++)
 658                if (vce_v4_0_is_idle(handle))
 659                        return 0;
 660
 661        return -ETIMEDOUT;
 662}
 663
 664#define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
 665#define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
 666#define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
 667#define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
 668                                      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
 669
 670static bool vce_v4_0_check_soft_reset(void *handle)
 671{
 672        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 673        u32 srbm_soft_reset = 0;
 674
 675        /* According to VCE team , we should use VCE_STATUS instead
 676         * SRBM_STATUS.VCE_BUSY bit for busy status checking.
 677         * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
 678         * instance's registers are accessed
 679         * (0 for 1st instance, 10 for 2nd instance).
 680         *
 681         *VCE_STATUS
 682         *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
 683         *|----+----+-----------+----+----+----+----------+---------+----|
 684         *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
 685         *
 686         * VCE team suggest use bit 3--bit 6 for busy status check
 687         */
 688        mutex_lock(&adev->grbm_idx_mutex);
 689        WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
 690        if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
 691                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
 692                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
 693        }
 694        WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
 695        if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
 696                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
 697                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
 698        }
 699        WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
 700        mutex_unlock(&adev->grbm_idx_mutex);
 701
 702        if (srbm_soft_reset) {
 703                adev->vce.srbm_soft_reset = srbm_soft_reset;
 704                return true;
 705        } else {
 706                adev->vce.srbm_soft_reset = 0;
 707                return false;
 708        }
 709}
 710
 711static int vce_v4_0_soft_reset(void *handle)
 712{
 713        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 714        u32 srbm_soft_reset;
 715
 716        if (!adev->vce.srbm_soft_reset)
 717                return 0;
 718        srbm_soft_reset = adev->vce.srbm_soft_reset;
 719
 720        if (srbm_soft_reset) {
 721                u32 tmp;
 722
 723                tmp = RREG32(mmSRBM_SOFT_RESET);
 724                tmp |= srbm_soft_reset;
 725                dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
 726                WREG32(mmSRBM_SOFT_RESET, tmp);
 727                tmp = RREG32(mmSRBM_SOFT_RESET);
 728
 729                udelay(50);
 730
 731                tmp &= ~srbm_soft_reset;
 732                WREG32(mmSRBM_SOFT_RESET, tmp);
 733                tmp = RREG32(mmSRBM_SOFT_RESET);
 734
 735                /* Wait a little for things to settle down */
 736                udelay(50);
 737        }
 738
 739        return 0;
 740}
 741
 742static int vce_v4_0_pre_soft_reset(void *handle)
 743{
 744        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 745
 746        if (!adev->vce.srbm_soft_reset)
 747                return 0;
 748
 749        mdelay(5);
 750
 751        return vce_v4_0_suspend(adev);
 752}
 753
 754
 755static int vce_v4_0_post_soft_reset(void *handle)
 756{
 757        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 758
 759        if (!adev->vce.srbm_soft_reset)
 760                return 0;
 761
 762        mdelay(5);
 763
 764        return vce_v4_0_resume(adev);
 765}
 766
 767static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
 768{
 769        u32 tmp, data;
 770
 771        tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
 772        if (override)
 773                data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
 774        else
 775                data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
 776
 777        if (tmp != data)
 778                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
 779}
 780
 781static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
 782                                             bool gated)
 783{
 784        u32 data;
 785
 786        /* Set Override to disable Clock Gating */
 787        vce_v4_0_override_vce_clock_gating(adev, true);
 788
 789        /* This function enables MGCG which is controlled by firmware.
 790           With the clocks in the gated state the core is still
 791           accessible but the firmware will throttle the clocks on the
 792           fly as necessary.
 793        */
 794        if (gated) {
 795                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
 796                data |= 0x1ff;
 797                data &= ~0xef0000;
 798                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
 799
 800                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
 801                data |= 0x3ff000;
 802                data &= ~0xffc00000;
 803                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
 804
 805                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
 806                data |= 0x2;
 807                data &= ~0x00010000;
 808                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
 809
 810                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
 811                data |= 0x37f;
 812                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
 813
 814                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
 815                data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
 816                        VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
 817                        VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
 818                        0x8;
 819                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
 820        } else {
 821                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
 822                data &= ~0x80010;
 823                data |= 0xe70008;
 824                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
 825
 826                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
 827                data |= 0xffc00000;
 828                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
 829
 830                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
 831                data |= 0x10000;
 832                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
 833
 834                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
 835                data &= ~0xffc00000;
 836                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
 837
 838                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
 839                data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
 840                          VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
 841                          VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
 842                          0x8);
 843                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
 844        }
 845        vce_v4_0_override_vce_clock_gating(adev, false);
 846}
 847
 848static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
 849{
 850        u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
 851
 852        if (enable)
 853                tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
 854        else
 855                tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
 856
 857        WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
 858}
 859
 860static int vce_v4_0_set_clockgating_state(void *handle,
 861                                          enum amd_clockgating_state state)
 862{
 863        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 864        bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
 865        int i;
 866
 867        if ((adev->asic_type == CHIP_POLARIS10) ||
 868                (adev->asic_type == CHIP_TONGA) ||
 869                (adev->asic_type == CHIP_FIJI))
 870                vce_v4_0_set_bypass_mode(adev, enable);
 871
 872        if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
 873                return 0;
 874
 875        mutex_lock(&adev->grbm_idx_mutex);
 876        for (i = 0; i < 2; i++) {
 877                /* Program VCE Instance 0 or 1 if not harvested */
 878                if (adev->vce.harvest_config & (1 << i))
 879                        continue;
 880
 881                WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
 882
 883                if (enable) {
 884                        /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
 885                        uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
 886                        data &= ~(0xf | 0xff0);
 887                        data |= ((0x0 << 0) | (0x04 << 4));
 888                        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
 889
 890                        /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
 891                        data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
 892                        data &= ~(0xf | 0xff0);
 893                        data |= ((0x0 << 0) | (0x04 << 4));
 894                        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
 895                }
 896
 897                vce_v4_0_set_vce_sw_clock_gating(adev, enable);
 898        }
 899
 900        WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
 901        mutex_unlock(&adev->grbm_idx_mutex);
 902
 903        return 0;
 904}
 905
 906static int vce_v4_0_set_powergating_state(void *handle,
 907                                          enum amd_powergating_state state)
 908{
 909        /* This doesn't actually powergate the VCE block.
 910         * That's done in the dpm code via the SMC.  This
 911         * just re-inits the block as necessary.  The actual
 912         * gating still happens in the dpm code.  We should
 913         * revisit this when there is a cleaner line between
 914         * the smc and the hw blocks
 915         */
 916        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 917
 918        if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
 919                return 0;
 920
 921        if (state == AMD_PG_STATE_GATE)
 922                /* XXX do we need a vce_v4_0_stop()? */
 923                return 0;
 924        else
 925                return vce_v4_0_start(adev);
 926}
 927#endif
 928
 929static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
 930                struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
 931{
 932        amdgpu_ring_write(ring, VCE_CMD_IB_VM);
 933        amdgpu_ring_write(ring, vm_id);
 934        amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
 935        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
 936        amdgpu_ring_write(ring, ib->length_dw);
 937}
 938
 939static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
 940                        u64 seq, unsigned flags)
 941{
 942        WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
 943
 944        amdgpu_ring_write(ring, VCE_CMD_FENCE);
 945        amdgpu_ring_write(ring, addr);
 946        amdgpu_ring_write(ring, upper_32_bits(addr));
 947        amdgpu_ring_write(ring, seq);
 948        amdgpu_ring_write(ring, VCE_CMD_TRAP);
 949}
 950
 951static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
 952{
 953        amdgpu_ring_write(ring, VCE_CMD_END);
 954}
 955
 956static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
 957                         unsigned int vm_id, uint64_t pd_addr)
 958{
 959        struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 960        uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 961        unsigned eng = ring->vm_inv_eng;
 962
 963        pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
 964        pd_addr |= AMDGPU_PTE_VALID;
 965
 966        amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
 967        amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
 968        amdgpu_ring_write(ring, upper_32_bits(pd_addr));
 969
 970        amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
 971        amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
 972        amdgpu_ring_write(ring, lower_32_bits(pd_addr));
 973
 974        amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
 975        amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
 976        amdgpu_ring_write(ring, 0xffffffff);
 977        amdgpu_ring_write(ring, lower_32_bits(pd_addr));
 978
 979        /* flush TLB */
 980        amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
 981        amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2);
 982        amdgpu_ring_write(ring, req);
 983
 984        /* wait for flush */
 985        amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
 986        amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
 987        amdgpu_ring_write(ring, 1 << vm_id);
 988        amdgpu_ring_write(ring, 1 << vm_id);
 989}
 990
 991static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
 992                                        struct amdgpu_irq_src *source,
 993                                        unsigned type,
 994                                        enum amdgpu_interrupt_state state)
 995{
 996        uint32_t val = 0;
 997
 998        if (!amdgpu_sriov_vf(adev)) {
 999                if (state == AMDGPU_IRQ_STATE_ENABLE)
1000                        val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1001
1002                WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1003                                ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1004        }
1005        return 0;
1006}
1007
1008static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1009                                      struct amdgpu_irq_src *source,
1010                                      struct amdgpu_iv_entry *entry)
1011{
1012        DRM_DEBUG("IH: VCE\n");
1013
1014        switch (entry->src_data[0]) {
1015        case 0:
1016        case 1:
1017        case 2:
1018                amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1019                break;
1020        default:
1021                DRM_ERROR("Unhandled interrupt: %d %d\n",
1022                          entry->src_id, entry->src_data[0]);
1023                break;
1024        }
1025
1026        return 0;
1027}
1028
1029const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1030        .name = "vce_v4_0",
1031        .early_init = vce_v4_0_early_init,
1032        .late_init = NULL,
1033        .sw_init = vce_v4_0_sw_init,
1034        .sw_fini = vce_v4_0_sw_fini,
1035        .hw_init = vce_v4_0_hw_init,
1036        .hw_fini = vce_v4_0_hw_fini,
1037        .suspend = vce_v4_0_suspend,
1038        .resume = vce_v4_0_resume,
1039        .is_idle = NULL /* vce_v4_0_is_idle */,
1040        .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1041        .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1042        .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1043        .soft_reset = NULL /* vce_v4_0_soft_reset */,
1044        .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1045        .set_clockgating_state = vce_v4_0_set_clockgating_state,
1046        .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */,
1047};
1048
1049static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1050        .type = AMDGPU_RING_TYPE_VCE,
1051        .align_mask = 0x3f,
1052        .nop = VCE_CMD_NO_OP,
1053        .support_64bit_ptrs = false,
1054        .vmhub = AMDGPU_MMHUB,
1055        .get_rptr = vce_v4_0_ring_get_rptr,
1056        .get_wptr = vce_v4_0_ring_get_wptr,
1057        .set_wptr = vce_v4_0_ring_set_wptr,
1058        .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1059        .emit_frame_size =
1060                17 + /* vce_v4_0_emit_vm_flush */
1061                5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1062                1, /* vce_v4_0_ring_insert_end */
1063        .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1064        .emit_ib = vce_v4_0_ring_emit_ib,
1065        .emit_vm_flush = vce_v4_0_emit_vm_flush,
1066        .emit_fence = vce_v4_0_ring_emit_fence,
1067        .test_ring = amdgpu_vce_ring_test_ring,
1068        .test_ib = amdgpu_vce_ring_test_ib,
1069        .insert_nop = amdgpu_ring_insert_nop,
1070        .insert_end = vce_v4_0_ring_insert_end,
1071        .pad_ib = amdgpu_ring_generic_pad_ib,
1072        .begin_use = amdgpu_vce_ring_begin_use,
1073        .end_use = amdgpu_vce_ring_end_use,
1074};
1075
1076static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1077{
1078        int i;
1079
1080        for (i = 0; i < adev->vce.num_rings; i++)
1081                adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1082        DRM_INFO("VCE enabled in VM mode\n");
1083}
1084
1085static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1086        .set = vce_v4_0_set_interrupt_state,
1087        .process = vce_v4_0_process_interrupt,
1088};
1089
1090static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1091{
1092        adev->vce.irq.num_types = 1;
1093        adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1094};
1095
1096const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1097{
1098        .type = AMD_IP_BLOCK_TYPE_VCE,
1099        .major = 4,
1100        .minor = 0,
1101        .rev = 0,
1102        .funcs = &vce_v4_0_ip_funcs,
1103};
1104