linux/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
<<
>>
Prefs
   1/*
   2 * Copyright 2016 Advanced Micro Devices, Inc.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the
   7 * "Software"), to deal in the Software without restriction, including
   8 * without limitation the rights to use, copy, modify, merge, publish,
   9 * distribute, sub license, and/or sell copies of the Software, and to
  10 * permit persons to whom the Software is furnished to do so, subject to
  11 * the following conditions:
  12 *
  13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20 *
  21 * The above copyright notice and this permission notice (including the
  22 * next paragraph) shall be included in all copies or substantial portions
  23 * of the Software.
  24 *
  25 */
  26
  27#include <linux/firmware.h>
  28#include <drm/drm_drv.h>
  29
  30#include "amdgpu.h"
  31#include "amdgpu_vce.h"
  32#include "soc15.h"
  33#include "soc15d.h"
  34#include "soc15_common.h"
  35#include "mmsch_v1_0.h"
  36
  37#include "vce/vce_4_0_offset.h"
  38#include "vce/vce_4_0_default.h"
  39#include "vce/vce_4_0_sh_mask.h"
  40#include "mmhub/mmhub_1_0_offset.h"
  41#include "mmhub/mmhub_1_0_sh_mask.h"
  42
  43#include "ivsrcid/vce/irqsrcs_vce_4_0.h"
  44
  45#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK   0x02
  46
  47#define VCE_V4_0_FW_SIZE        (384 * 1024)
  48#define VCE_V4_0_STACK_SIZE     (64 * 1024)
  49#define VCE_V4_0_DATA_SIZE      ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
  50
  51static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
  52static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
  53static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
  54
  55/**
  56 * vce_v4_0_ring_get_rptr - get read pointer
  57 *
  58 * @ring: amdgpu_ring pointer
  59 *
  60 * Returns the current hardware read pointer
  61 */
  62static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
  63{
  64        struct amdgpu_device *adev = ring->adev;
  65
  66        if (ring->me == 0)
  67                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
  68        else if (ring->me == 1)
  69                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
  70        else
  71                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
  72}
  73
  74/**
  75 * vce_v4_0_ring_get_wptr - get write pointer
  76 *
  77 * @ring: amdgpu_ring pointer
  78 *
  79 * Returns the current hardware write pointer
  80 */
  81static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
  82{
  83        struct amdgpu_device *adev = ring->adev;
  84
  85        if (ring->use_doorbell)
  86                return adev->wb.wb[ring->wptr_offs];
  87
  88        if (ring->me == 0)
  89                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
  90        else if (ring->me == 1)
  91                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
  92        else
  93                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
  94}
  95
  96/**
  97 * vce_v4_0_ring_set_wptr - set write pointer
  98 *
  99 * @ring: amdgpu_ring pointer
 100 *
 101 * Commits the write pointer to the hardware
 102 */
 103static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
 104{
 105        struct amdgpu_device *adev = ring->adev;
 106
 107        if (ring->use_doorbell) {
 108                /* XXX check if swapping is necessary on BE */
 109                adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
 110                WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
 111                return;
 112        }
 113
 114        if (ring->me == 0)
 115                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
 116                        lower_32_bits(ring->wptr));
 117        else if (ring->me == 1)
 118                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
 119                        lower_32_bits(ring->wptr));
 120        else
 121                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
 122                        lower_32_bits(ring->wptr));
 123}
 124
 125static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
 126{
 127        int i, j;
 128
 129        for (i = 0; i < 10; ++i) {
 130                for (j = 0; j < 100; ++j) {
 131                        uint32_t status =
 132                                RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
 133
 134                        if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
 135                                return 0;
 136                        mdelay(10);
 137                }
 138
 139                DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
 140                WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
 141                                VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
 142                                ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
 143                mdelay(10);
 144                WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
 145                                ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
 146                mdelay(10);
 147
 148        }
 149
 150        return -ETIMEDOUT;
 151}
 152
 153static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
 154                                struct amdgpu_mm_table *table)
 155{
 156        uint32_t data = 0, loop;
 157        uint64_t addr = table->gpu_addr;
 158        struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
 159        uint32_t size;
 160
 161        size = header->header_size + header->vce_table_size + header->uvd_table_size;
 162
 163        /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
 164        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
 165        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
 166
 167        /* 2, update vmid of descriptor */
 168        data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
 169        data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
 170        data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
 171        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
 172
 173        /* 3, notify mmsch about the size of this descriptor */
 174        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
 175
 176        /* 4, set resp to zero */
 177        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
 178
 179        WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
 180        adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
 181        adev->vce.ring[0].wptr = 0;
 182        adev->vce.ring[0].wptr_old = 0;
 183
 184        /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
 185        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
 186
 187        data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
 188        loop = 1000;
 189        while ((data & 0x10000002) != 0x10000002) {
 190                udelay(10);
 191                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
 192                loop--;
 193                if (!loop)
 194                        break;
 195        }
 196
 197        if (!loop) {
 198                dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
 199                return -EBUSY;
 200        }
 201
 202        return 0;
 203}
 204
 205static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
 206{
 207        struct amdgpu_ring *ring;
 208        uint32_t offset, size;
 209        uint32_t table_size = 0;
 210        struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
 211        struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
 212        struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
 213        struct mmsch_v1_0_cmd_end end = { { 0 } };
 214        uint32_t *init_table = adev->virt.mm_table.cpu_addr;
 215        struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
 216
 217        direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
 218        direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
 219        direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
 220        end.cmd_header.command_type = MMSCH_COMMAND__END;
 221
 222        if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
 223                header->version = MMSCH_VERSION;
 224                header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
 225
 226                if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
 227                        header->vce_table_offset = header->header_size;
 228                else
 229                        header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
 230
 231                init_table += header->vce_table_offset;
 232
 233                ring = &adev->vce.ring[0];
 234                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
 235                                            lower_32_bits(ring->gpu_addr));
 236                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
 237                                            upper_32_bits(ring->gpu_addr));
 238                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
 239                                            ring->ring_size / 4);
 240
 241                /* BEGING OF MC_RESUME */
 242                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
 243                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
 244                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
 245                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
 246                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
 247
 248                offset = AMDGPU_VCE_FIRMWARE_OFFSET;
 249                if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 250                        uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
 251                        uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
 252                        uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
 253
 254                        MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 255                                                mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
 256                        MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 257                                                mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
 258                                                (tmr_mc_addr >> 40) & 0xff);
 259                        MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
 260                } else {
 261                        MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 262                                                mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
 263                                                adev->vce.gpu_addr >> 8);
 264                        MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 265                                                mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
 266                                                (adev->vce.gpu_addr >> 40) & 0xff);
 267                        MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
 268                                                offset & ~0x0f000000);
 269
 270                }
 271                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 272                                                mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
 273                                                adev->vce.gpu_addr >> 8);
 274                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 275                                                mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
 276                                                (adev->vce.gpu_addr >> 40) & 0xff);
 277                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 278                                                mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
 279                                                adev->vce.gpu_addr >> 8);
 280                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 281                                                mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
 282                                                (adev->vce.gpu_addr >> 40) & 0xff);
 283
 284                size = VCE_V4_0_FW_SIZE;
 285                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
 286
 287                offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
 288                size = VCE_V4_0_STACK_SIZE;
 289                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
 290                                        (offset & ~0x0f000000) | (1 << 24));
 291                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
 292
 293                offset += size;
 294                size = VCE_V4_0_DATA_SIZE;
 295                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
 296                                        (offset & ~0x0f000000) | (2 << 24));
 297                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
 298
 299                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
 300                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
 301                                                   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
 302                                                   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
 303
 304                /* end of MC_RESUME */
 305                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
 306                                                   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
 307                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
 308                                                   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
 309                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
 310                                                   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
 311
 312                MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
 313                                              VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
 314                                              VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
 315
 316                /* clear BUSY flag */
 317                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
 318                                                   ~VCE_STATUS__JOB_BUSY_MASK, 0);
 319
 320                /* add end packet */
 321                memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
 322                table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
 323                header->vce_table_size = table_size;
 324        }
 325
 326        return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
 327}
 328
 329/**
 330 * vce_v4_0_start - start VCE block
 331 *
 332 * @adev: amdgpu_device pointer
 333 *
 334 * Setup and start the VCE block
 335 */
 336static int vce_v4_0_start(struct amdgpu_device *adev)
 337{
 338        struct amdgpu_ring *ring;
 339        int r;
 340
 341        ring = &adev->vce.ring[0];
 342
 343        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
 344        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
 345        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
 346        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
 347        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
 348
 349        ring = &adev->vce.ring[1];
 350
 351        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
 352        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
 353        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
 354        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
 355        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
 356
 357        ring = &adev->vce.ring[2];
 358
 359        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
 360        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
 361        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
 362        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
 363        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
 364
 365        vce_v4_0_mc_resume(adev);
 366        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
 367                        ~VCE_STATUS__JOB_BUSY_MASK);
 368
 369        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
 370
 371        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
 372                        ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
 373        mdelay(100);
 374
 375        r = vce_v4_0_firmware_loaded(adev);
 376
 377        /* clear BUSY flag */
 378        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
 379
 380        if (r) {
 381                DRM_ERROR("VCE not responding, giving up!!!\n");
 382                return r;
 383        }
 384
 385        return 0;
 386}
 387
 388static int vce_v4_0_stop(struct amdgpu_device *adev)
 389{
 390
 391        /* Disable VCPU */
 392        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
 393
 394        /* hold on ECPU */
 395        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
 396                        VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
 397                        ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
 398
 399        /* clear VCE_STATUS */
 400        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
 401
 402        /* Set Clock-Gating off */
 403        /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
 404                vce_v4_0_set_vce_sw_clock_gating(adev, false);
 405        */
 406
 407        return 0;
 408}
 409
 410static int vce_v4_0_early_init(void *handle)
 411{
 412        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 413
 414        if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
 415                adev->vce.num_rings = 1;
 416        else
 417                adev->vce.num_rings = 3;
 418
 419        vce_v4_0_set_ring_funcs(adev);
 420        vce_v4_0_set_irq_funcs(adev);
 421
 422        return 0;
 423}
 424
 425static int vce_v4_0_sw_init(void *handle)
 426{
 427        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 428        struct amdgpu_ring *ring;
 429
 430        unsigned size;
 431        int r, i;
 432
 433        r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
 434        if (r)
 435                return r;
 436
 437        size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
 438        if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
 439                size += VCE_V4_0_FW_SIZE;
 440
 441        r = amdgpu_vce_sw_init(adev, size);
 442        if (r)
 443                return r;
 444
 445        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 446                const struct common_firmware_header *hdr;
 447                unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
 448
 449                adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
 450                if (!adev->vce.saved_bo)
 451                        return -ENOMEM;
 452
 453                hdr = (const struct common_firmware_header *)adev->vce.fw->data;
 454                adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
 455                adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
 456                adev->firmware.fw_size +=
 457                        ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
 458                DRM_INFO("PSP loading VCE firmware\n");
 459        } else {
 460                r = amdgpu_vce_resume(adev);
 461                if (r)
 462                        return r;
 463        }
 464
 465        for (i = 0; i < adev->vce.num_rings; i++) {
 466                enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
 467
 468                ring = &adev->vce.ring[i];
 469                sprintf(ring->name, "vce%d", i);
 470                if (amdgpu_sriov_vf(adev)) {
 471                        /* DOORBELL only works under SRIOV */
 472                        ring->use_doorbell = true;
 473
 474                        /* currently only use the first encoding ring for sriov,
 475                         * so set unused location for other unused rings.
 476                         */
 477                        if (i == 0)
 478                                ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
 479                        else
 480                                ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
 481                }
 482                r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
 483                                     hw_prio, NULL);
 484                if (r)
 485                        return r;
 486        }
 487
 488
 489        r = amdgpu_vce_entity_init(adev);
 490        if (r)
 491                return r;
 492
 493        r = amdgpu_virt_alloc_mm_table(adev);
 494        if (r)
 495                return r;
 496
 497        return r;
 498}
 499
 500static int vce_v4_0_sw_fini(void *handle)
 501{
 502        int r;
 503        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 504
 505        /* free MM table */
 506        amdgpu_virt_free_mm_table(adev);
 507
 508        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 509                kvfree(adev->vce.saved_bo);
 510                adev->vce.saved_bo = NULL;
 511        }
 512
 513        r = amdgpu_vce_suspend(adev);
 514        if (r)
 515                return r;
 516
 517        return amdgpu_vce_sw_fini(adev);
 518}
 519
 520static int vce_v4_0_hw_init(void *handle)
 521{
 522        int r, i;
 523        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 524
 525        if (amdgpu_sriov_vf(adev))
 526                r = vce_v4_0_sriov_start(adev);
 527        else
 528                r = vce_v4_0_start(adev);
 529        if (r)
 530                return r;
 531
 532        for (i = 0; i < adev->vce.num_rings; i++) {
 533                r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
 534                if (r)
 535                        return r;
 536        }
 537
 538        DRM_INFO("VCE initialized successfully.\n");
 539
 540        return 0;
 541}
 542
 543static int vce_v4_0_hw_fini(void *handle)
 544{
 545        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 546
 547        cancel_delayed_work_sync(&adev->vce.idle_work);
 548
 549        if (!amdgpu_sriov_vf(adev)) {
 550                /* vce_v4_0_wait_for_idle(handle); */
 551                vce_v4_0_stop(adev);
 552        } else {
 553                /* full access mode, so don't touch any VCE register */
 554                DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
 555        }
 556
 557        return 0;
 558}
 559
 560static int vce_v4_0_suspend(void *handle)
 561{
 562        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 563        int r, idx;
 564
 565        if (adev->vce.vcpu_bo == NULL)
 566                return 0;
 567
 568        if (drm_dev_enter(adev_to_drm(adev), &idx)) {
 569                if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 570                        unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
 571                        void *ptr = adev->vce.cpu_addr;
 572
 573                        memcpy_fromio(adev->vce.saved_bo, ptr, size);
 574                }
 575                drm_dev_exit(idx);
 576        }
 577
 578        /*
 579         * Proper cleanups before halting the HW engine:
 580         *   - cancel the delayed idle work
 581         *   - enable powergating
 582         *   - enable clockgating
 583         *   - disable dpm
 584         *
 585         * TODO: to align with the VCN implementation, move the
 586         * jobs for clockgating/powergating/dpm setting to
 587         * ->set_powergating_state().
 588         */
 589        cancel_delayed_work_sync(&adev->vce.idle_work);
 590
 591        if (adev->pm.dpm_enabled) {
 592                amdgpu_dpm_enable_vce(adev, false);
 593        } else {
 594                amdgpu_asic_set_vce_clocks(adev, 0, 0);
 595                amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
 596                                                       AMD_PG_STATE_GATE);
 597                amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
 598                                                       AMD_CG_STATE_GATE);
 599        }
 600
 601        r = vce_v4_0_hw_fini(adev);
 602        if (r)
 603                return r;
 604
 605        return amdgpu_vce_suspend(adev);
 606}
 607
 608static int vce_v4_0_resume(void *handle)
 609{
 610        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 611        int r, idx;
 612
 613        if (adev->vce.vcpu_bo == NULL)
 614                return -EINVAL;
 615
 616        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 617
 618                if (drm_dev_enter(adev_to_drm(adev), &idx)) {
 619                        unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
 620                        void *ptr = adev->vce.cpu_addr;
 621
 622                        memcpy_toio(ptr, adev->vce.saved_bo, size);
 623                        drm_dev_exit(idx);
 624                }
 625        } else {
 626                r = amdgpu_vce_resume(adev);
 627                if (r)
 628                        return r;
 629        }
 630
 631        return vce_v4_0_hw_init(adev);
 632}
 633
 634static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
 635{
 636        uint32_t offset, size;
 637        uint64_t tmr_mc_addr;
 638
 639        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
 640        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
 641        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
 642        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
 643
 644        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
 645        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
 646        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
 647        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
 648        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
 649
 650        offset = AMDGPU_VCE_FIRMWARE_OFFSET;
 651
 652        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 653                tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
 654                                                                                adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
 655                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
 656                        (tmr_mc_addr >> 8));
 657                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
 658                        (tmr_mc_addr >> 40) & 0xff);
 659                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
 660        } else {
 661                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
 662                        (adev->vce.gpu_addr >> 8));
 663                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
 664                        (adev->vce.gpu_addr >> 40) & 0xff);
 665                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
 666        }
 667
 668        size = VCE_V4_0_FW_SIZE;
 669        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
 670
 671        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
 672        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
 673        offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
 674        size = VCE_V4_0_STACK_SIZE;
 675        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
 676        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
 677
 678        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
 679        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
 680        offset += size;
 681        size = VCE_V4_0_DATA_SIZE;
 682        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
 683        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
 684
 685        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
 686        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
 687                        VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
 688                        ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
 689}
 690
 691static int vce_v4_0_set_clockgating_state(void *handle,
 692                                          enum amd_clockgating_state state)
 693{
 694        /* needed for driver unload*/
 695        return 0;
 696}
 697
 698#if 0
 699static bool vce_v4_0_is_idle(void *handle)
 700{
 701        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 702        u32 mask = 0;
 703
 704        mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
 705        mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
 706
 707        return !(RREG32(mmSRBM_STATUS2) & mask);
 708}
 709
 710static int vce_v4_0_wait_for_idle(void *handle)
 711{
 712        unsigned i;
 713        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 714
 715        for (i = 0; i < adev->usec_timeout; i++)
 716                if (vce_v4_0_is_idle(handle))
 717                        return 0;
 718
 719        return -ETIMEDOUT;
 720}
 721
 722#define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
 723#define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
 724#define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
 725#define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
 726                                      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
 727
 728static bool vce_v4_0_check_soft_reset(void *handle)
 729{
 730        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 731        u32 srbm_soft_reset = 0;
 732
 733        /* According to VCE team , we should use VCE_STATUS instead
 734         * SRBM_STATUS.VCE_BUSY bit for busy status checking.
 735         * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
 736         * instance's registers are accessed
 737         * (0 for 1st instance, 10 for 2nd instance).
 738         *
 739         *VCE_STATUS
 740         *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
 741         *|----+----+-----------+----+----+----+----------+---------+----|
 742         *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
 743         *
 744         * VCE team suggest use bit 3--bit 6 for busy status check
 745         */
 746        mutex_lock(&adev->grbm_idx_mutex);
 747        WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
 748        if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
 749                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
 750                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
 751        }
 752        WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
 753        if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
 754                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
 755                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
 756        }
 757        WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
 758        mutex_unlock(&adev->grbm_idx_mutex);
 759
 760        if (srbm_soft_reset) {
 761                adev->vce.srbm_soft_reset = srbm_soft_reset;
 762                return true;
 763        } else {
 764                adev->vce.srbm_soft_reset = 0;
 765                return false;
 766        }
 767}
 768
 769static int vce_v4_0_soft_reset(void *handle)
 770{
 771        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 772        u32 srbm_soft_reset;
 773
 774        if (!adev->vce.srbm_soft_reset)
 775                return 0;
 776        srbm_soft_reset = adev->vce.srbm_soft_reset;
 777
 778        if (srbm_soft_reset) {
 779                u32 tmp;
 780
 781                tmp = RREG32(mmSRBM_SOFT_RESET);
 782                tmp |= srbm_soft_reset;
 783                dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
 784                WREG32(mmSRBM_SOFT_RESET, tmp);
 785                tmp = RREG32(mmSRBM_SOFT_RESET);
 786
 787                udelay(50);
 788
 789                tmp &= ~srbm_soft_reset;
 790                WREG32(mmSRBM_SOFT_RESET, tmp);
 791                tmp = RREG32(mmSRBM_SOFT_RESET);
 792
 793                /* Wait a little for things to settle down */
 794                udelay(50);
 795        }
 796
 797        return 0;
 798}
 799
 800static int vce_v4_0_pre_soft_reset(void *handle)
 801{
 802        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 803
 804        if (!adev->vce.srbm_soft_reset)
 805                return 0;
 806
 807        mdelay(5);
 808
 809        return vce_v4_0_suspend(adev);
 810}
 811
 812
 813static int vce_v4_0_post_soft_reset(void *handle)
 814{
 815        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 816
 817        if (!adev->vce.srbm_soft_reset)
 818                return 0;
 819
 820        mdelay(5);
 821
 822        return vce_v4_0_resume(adev);
 823}
 824
 825static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
 826{
 827        u32 tmp, data;
 828
 829        tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
 830        if (override)
 831                data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
 832        else
 833                data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
 834
 835        if (tmp != data)
 836                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
 837}
 838
 839static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
 840                                             bool gated)
 841{
 842        u32 data;
 843
 844        /* Set Override to disable Clock Gating */
 845        vce_v4_0_override_vce_clock_gating(adev, true);
 846
 847        /* This function enables MGCG which is controlled by firmware.
 848           With the clocks in the gated state the core is still
 849           accessible but the firmware will throttle the clocks on the
 850           fly as necessary.
 851        */
 852        if (gated) {
 853                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
 854                data |= 0x1ff;
 855                data &= ~0xef0000;
 856                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
 857
 858                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
 859                data |= 0x3ff000;
 860                data &= ~0xffc00000;
 861                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
 862
 863                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
 864                data |= 0x2;
 865                data &= ~0x00010000;
 866                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
 867
 868                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
 869                data |= 0x37f;
 870                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
 871
 872                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
 873                data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
 874                        VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
 875                        VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
 876                        0x8;
 877                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
 878        } else {
 879                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
 880                data &= ~0x80010;
 881                data |= 0xe70008;
 882                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
 883
 884                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
 885                data |= 0xffc00000;
 886                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
 887
 888                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
 889                data |= 0x10000;
 890                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
 891
 892                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
 893                data &= ~0xffc00000;
 894                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
 895
 896                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
 897                data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
 898                          VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
 899                          VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
 900                          0x8);
 901                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
 902        }
 903        vce_v4_0_override_vce_clock_gating(adev, false);
 904}
 905
 906static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
 907{
 908        u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
 909
 910        if (enable)
 911                tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
 912        else
 913                tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
 914
 915        WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
 916}
 917
 918static int vce_v4_0_set_clockgating_state(void *handle,
 919                                          enum amd_clockgating_state state)
 920{
 921        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 922        bool enable = (state == AMD_CG_STATE_GATE);
 923        int i;
 924
 925        if ((adev->asic_type == CHIP_POLARIS10) ||
 926                (adev->asic_type == CHIP_TONGA) ||
 927                (adev->asic_type == CHIP_FIJI))
 928                vce_v4_0_set_bypass_mode(adev, enable);
 929
 930        if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
 931                return 0;
 932
 933        mutex_lock(&adev->grbm_idx_mutex);
 934        for (i = 0; i < 2; i++) {
 935                /* Program VCE Instance 0 or 1 if not harvested */
 936                if (adev->vce.harvest_config & (1 << i))
 937                        continue;
 938
 939                WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
 940
 941                if (enable) {
 942                        /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
 943                        uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
 944                        data &= ~(0xf | 0xff0);
 945                        data |= ((0x0 << 0) | (0x04 << 4));
 946                        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
 947
 948                        /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
 949                        data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
 950                        data &= ~(0xf | 0xff0);
 951                        data |= ((0x0 << 0) | (0x04 << 4));
 952                        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
 953                }
 954
 955                vce_v4_0_set_vce_sw_clock_gating(adev, enable);
 956        }
 957
 958        WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
 959        mutex_unlock(&adev->grbm_idx_mutex);
 960
 961        return 0;
 962}
 963#endif
 964
 965static int vce_v4_0_set_powergating_state(void *handle,
 966                                          enum amd_powergating_state state)
 967{
 968        /* This doesn't actually powergate the VCE block.
 969         * That's done in the dpm code via the SMC.  This
 970         * just re-inits the block as necessary.  The actual
 971         * gating still happens in the dpm code.  We should
 972         * revisit this when there is a cleaner line between
 973         * the smc and the hw blocks
 974         */
 975        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 976
 977        if (state == AMD_PG_STATE_GATE)
 978                return vce_v4_0_stop(adev);
 979        else
 980                return vce_v4_0_start(adev);
 981}
 982
 983static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
 984                                        struct amdgpu_ib *ib, uint32_t flags)
 985{
 986        unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 987
 988        amdgpu_ring_write(ring, VCE_CMD_IB_VM);
 989        amdgpu_ring_write(ring, vmid);
 990        amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
 991        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
 992        amdgpu_ring_write(ring, ib->length_dw);
 993}
 994
 995static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
 996                        u64 seq, unsigned flags)
 997{
 998        WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
 999
1000        amdgpu_ring_write(ring, VCE_CMD_FENCE);
1001        amdgpu_ring_write(ring, addr);
1002        amdgpu_ring_write(ring, upper_32_bits(addr));
1003        amdgpu_ring_write(ring, seq);
1004        amdgpu_ring_write(ring, VCE_CMD_TRAP);
1005}
1006
1007static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
1008{
1009        amdgpu_ring_write(ring, VCE_CMD_END);
1010}
1011
1012static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1013                                   uint32_t val, uint32_t mask)
1014{
1015        amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
1016        amdgpu_ring_write(ring, reg << 2);
1017        amdgpu_ring_write(ring, mask);
1018        amdgpu_ring_write(ring, val);
1019}
1020
1021static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
1022                                   unsigned int vmid, uint64_t pd_addr)
1023{
1024        struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
1025
1026        pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1027
1028        /* wait for reg writes */
1029        vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
1030                               vmid * hub->ctx_addr_distance,
1031                               lower_32_bits(pd_addr), 0xffffffff);
1032}
1033
1034static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1035                               uint32_t reg, uint32_t val)
1036{
1037        amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1038        amdgpu_ring_write(ring, reg << 2);
1039        amdgpu_ring_write(ring, val);
1040}
1041
1042static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1043                                        struct amdgpu_irq_src *source,
1044                                        unsigned type,
1045                                        enum amdgpu_interrupt_state state)
1046{
1047        uint32_t val = 0;
1048
1049        if (!amdgpu_sriov_vf(adev)) {
1050                if (state == AMDGPU_IRQ_STATE_ENABLE)
1051                        val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1052
1053                WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1054                                ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1055        }
1056        return 0;
1057}
1058
1059static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1060                                      struct amdgpu_irq_src *source,
1061                                      struct amdgpu_iv_entry *entry)
1062{
1063        DRM_DEBUG("IH: VCE\n");
1064
1065        switch (entry->src_data[0]) {
1066        case 0:
1067        case 1:
1068        case 2:
1069                amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1070                break;
1071        default:
1072                DRM_ERROR("Unhandled interrupt: %d %d\n",
1073                          entry->src_id, entry->src_data[0]);
1074                break;
1075        }
1076
1077        return 0;
1078}
1079
1080const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1081        .name = "vce_v4_0",
1082        .early_init = vce_v4_0_early_init,
1083        .late_init = NULL,
1084        .sw_init = vce_v4_0_sw_init,
1085        .sw_fini = vce_v4_0_sw_fini,
1086        .hw_init = vce_v4_0_hw_init,
1087        .hw_fini = vce_v4_0_hw_fini,
1088        .suspend = vce_v4_0_suspend,
1089        .resume = vce_v4_0_resume,
1090        .is_idle = NULL /* vce_v4_0_is_idle */,
1091        .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1092        .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1093        .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1094        .soft_reset = NULL /* vce_v4_0_soft_reset */,
1095        .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1096        .set_clockgating_state = vce_v4_0_set_clockgating_state,
1097        .set_powergating_state = vce_v4_0_set_powergating_state,
1098};
1099
1100static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1101        .type = AMDGPU_RING_TYPE_VCE,
1102        .align_mask = 0x3f,
1103        .nop = VCE_CMD_NO_OP,
1104        .support_64bit_ptrs = false,
1105        .no_user_fence = true,
1106        .vmhub = AMDGPU_MMHUB_0,
1107        .get_rptr = vce_v4_0_ring_get_rptr,
1108        .get_wptr = vce_v4_0_ring_get_wptr,
1109        .set_wptr = vce_v4_0_ring_set_wptr,
1110        .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1111        .emit_frame_size =
1112                SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1113                SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1114                4 + /* vce_v4_0_emit_vm_flush */
1115                5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1116                1, /* vce_v4_0_ring_insert_end */
1117        .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1118        .emit_ib = vce_v4_0_ring_emit_ib,
1119        .emit_vm_flush = vce_v4_0_emit_vm_flush,
1120        .emit_fence = vce_v4_0_ring_emit_fence,
1121        .test_ring = amdgpu_vce_ring_test_ring,
1122        .test_ib = amdgpu_vce_ring_test_ib,
1123        .insert_nop = amdgpu_ring_insert_nop,
1124        .insert_end = vce_v4_0_ring_insert_end,
1125        .pad_ib = amdgpu_ring_generic_pad_ib,
1126        .begin_use = amdgpu_vce_ring_begin_use,
1127        .end_use = amdgpu_vce_ring_end_use,
1128        .emit_wreg = vce_v4_0_emit_wreg,
1129        .emit_reg_wait = vce_v4_0_emit_reg_wait,
1130        .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1131};
1132
1133static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1134{
1135        int i;
1136
1137        for (i = 0; i < adev->vce.num_rings; i++) {
1138                adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1139                adev->vce.ring[i].me = i;
1140        }
1141        DRM_INFO("VCE enabled in VM mode\n");
1142}
1143
1144static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1145        .set = vce_v4_0_set_interrupt_state,
1146        .process = vce_v4_0_process_interrupt,
1147};
1148
1149static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1150{
1151        adev->vce.irq.num_types = 1;
1152        adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1153};
1154
1155const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1156{
1157        .type = AMD_IP_BLOCK_TYPE_VCE,
1158        .major = 4,
1159        .minor = 0,
1160        .rev = 0,
1161        .funcs = &vce_v4_0_ip_funcs,
1162};
1163