linux/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
<<
>>
Prefs
   1/*
   2 * Copyright 2016 Advanced Micro Devices, Inc.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the
   7 * "Software"), to deal in the Software without restriction, including
   8 * without limitation the rights to use, copy, modify, merge, publish,
   9 * distribute, sub license, and/or sell copies of the Software, and to
  10 * permit persons to whom the Software is furnished to do so, subject to
  11 * the following conditions:
  12 *
  13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20 *
  21 * The above copyright notice and this permission notice (including the
  22 * next paragraph) shall be included in all copies or substantial portions
  23 * of the Software.
  24 *
  25 */
  26
  27#include <linux/firmware.h>
  28
  29#include "amdgpu.h"
  30#include "amdgpu_vce.h"
  31#include "soc15.h"
  32#include "soc15d.h"
  33#include "soc15_common.h"
  34#include "mmsch_v1_0.h"
  35
  36#include "vce/vce_4_0_offset.h"
  37#include "vce/vce_4_0_default.h"
  38#include "vce/vce_4_0_sh_mask.h"
  39#include "mmhub/mmhub_1_0_offset.h"
  40#include "mmhub/mmhub_1_0_sh_mask.h"
  41
  42#include "ivsrcid/vce/irqsrcs_vce_4_0.h"
  43
  44#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK   0x02
  45
  46#define VCE_V4_0_FW_SIZE        (384 * 1024)
  47#define VCE_V4_0_STACK_SIZE     (64 * 1024)
  48#define VCE_V4_0_DATA_SIZE      ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
  49
  50static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
  51static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
  52static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
  53
  54/**
  55 * vce_v4_0_ring_get_rptr - get read pointer
  56 *
  57 * @ring: amdgpu_ring pointer
  58 *
  59 * Returns the current hardware read pointer
  60 */
  61static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
  62{
  63        struct amdgpu_device *adev = ring->adev;
  64
  65        if (ring->me == 0)
  66                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
  67        else if (ring->me == 1)
  68                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
  69        else
  70                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
  71}
  72
  73/**
  74 * vce_v4_0_ring_get_wptr - get write pointer
  75 *
  76 * @ring: amdgpu_ring pointer
  77 *
  78 * Returns the current hardware write pointer
  79 */
  80static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
  81{
  82        struct amdgpu_device *adev = ring->adev;
  83
  84        if (ring->use_doorbell)
  85                return adev->wb.wb[ring->wptr_offs];
  86
  87        if (ring->me == 0)
  88                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
  89        else if (ring->me == 1)
  90                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
  91        else
  92                return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
  93}
  94
  95/**
  96 * vce_v4_0_ring_set_wptr - set write pointer
  97 *
  98 * @ring: amdgpu_ring pointer
  99 *
 100 * Commits the write pointer to the hardware
 101 */
 102static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
 103{
 104        struct amdgpu_device *adev = ring->adev;
 105
 106        if (ring->use_doorbell) {
 107                /* XXX check if swapping is necessary on BE */
 108                adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
 109                WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
 110                return;
 111        }
 112
 113        if (ring->me == 0)
 114                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
 115                        lower_32_bits(ring->wptr));
 116        else if (ring->me == 1)
 117                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
 118                        lower_32_bits(ring->wptr));
 119        else
 120                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
 121                        lower_32_bits(ring->wptr));
 122}
 123
 124static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
 125{
 126        int i, j;
 127
 128        for (i = 0; i < 10; ++i) {
 129                for (j = 0; j < 100; ++j) {
 130                        uint32_t status =
 131                                RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
 132
 133                        if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
 134                                return 0;
 135                        mdelay(10);
 136                }
 137
 138                DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
 139                WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
 140                                VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
 141                                ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
 142                mdelay(10);
 143                WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
 144                                ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
 145                mdelay(10);
 146
 147        }
 148
 149        return -ETIMEDOUT;
 150}
 151
 152static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
 153                                struct amdgpu_mm_table *table)
 154{
 155        uint32_t data = 0, loop;
 156        uint64_t addr = table->gpu_addr;
 157        struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
 158        uint32_t size;
 159
 160        size = header->header_size + header->vce_table_size + header->uvd_table_size;
 161
 162        /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
 163        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
 164        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
 165
 166        /* 2, update vmid of descriptor */
 167        data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
 168        data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
 169        data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
 170        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
 171
 172        /* 3, notify mmsch about the size of this descriptor */
 173        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
 174
 175        /* 4, set resp to zero */
 176        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
 177
 178        WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
 179        adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
 180        adev->vce.ring[0].wptr = 0;
 181        adev->vce.ring[0].wptr_old = 0;
 182
 183        /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
 184        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
 185
 186        data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
 187        loop = 1000;
 188        while ((data & 0x10000002) != 0x10000002) {
 189                udelay(10);
 190                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
 191                loop--;
 192                if (!loop)
 193                        break;
 194        }
 195
 196        if (!loop) {
 197                dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
 198                return -EBUSY;
 199        }
 200
 201        return 0;
 202}
 203
 204static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
 205{
 206        struct amdgpu_ring *ring;
 207        uint32_t offset, size;
 208        uint32_t table_size = 0;
 209        struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
 210        struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
 211        struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
 212        struct mmsch_v1_0_cmd_end end = { { 0 } };
 213        uint32_t *init_table = adev->virt.mm_table.cpu_addr;
 214        struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
 215
 216        direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
 217        direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
 218        direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
 219        end.cmd_header.command_type = MMSCH_COMMAND__END;
 220
 221        if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
 222                header->version = MMSCH_VERSION;
 223                header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
 224
 225                if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
 226                        header->vce_table_offset = header->header_size;
 227                else
 228                        header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
 229
 230                init_table += header->vce_table_offset;
 231
 232                ring = &adev->vce.ring[0];
 233                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
 234                                            lower_32_bits(ring->gpu_addr));
 235                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
 236                                            upper_32_bits(ring->gpu_addr));
 237                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
 238                                            ring->ring_size / 4);
 239
 240                /* BEGING OF MC_RESUME */
 241                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
 242                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
 243                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
 244                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
 245                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
 246
 247                offset = AMDGPU_VCE_FIRMWARE_OFFSET;
 248                if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 249                        uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
 250                        uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
 251                        uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
 252
 253                        MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 254                                                mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
 255                        MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 256                                                mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
 257                                                (tmr_mc_addr >> 40) & 0xff);
 258                        MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
 259                } else {
 260                        MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 261                                                mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
 262                                                adev->vce.gpu_addr >> 8);
 263                        MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 264                                                mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
 265                                                (adev->vce.gpu_addr >> 40) & 0xff);
 266                        MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
 267                                                offset & ~0x0f000000);
 268
 269                }
 270                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 271                                                mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
 272                                                adev->vce.gpu_addr >> 8);
 273                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 274                                                mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
 275                                                (adev->vce.gpu_addr >> 40) & 0xff);
 276                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 277                                                mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
 278                                                adev->vce.gpu_addr >> 8);
 279                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 280                                                mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
 281                                                (adev->vce.gpu_addr >> 40) & 0xff);
 282
 283                size = VCE_V4_0_FW_SIZE;
 284                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
 285
 286                offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
 287                size = VCE_V4_0_STACK_SIZE;
 288                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
 289                                        (offset & ~0x0f000000) | (1 << 24));
 290                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
 291
 292                offset += size;
 293                size = VCE_V4_0_DATA_SIZE;
 294                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
 295                                        (offset & ~0x0f000000) | (2 << 24));
 296                MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
 297
 298                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
 299                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
 300                                                   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
 301                                                   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
 302
 303                /* end of MC_RESUME */
 304                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
 305                                                   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
 306                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
 307                                                   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
 308                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
 309                                                   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
 310
 311                MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
 312                                              VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
 313                                              VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
 314
 315                /* clear BUSY flag */
 316                MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
 317                                                   ~VCE_STATUS__JOB_BUSY_MASK, 0);
 318
 319                /* add end packet */
 320                memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
 321                table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
 322                header->vce_table_size = table_size;
 323        }
 324
 325        return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
 326}
 327
 328/**
 329 * vce_v4_0_start - start VCE block
 330 *
 331 * @adev: amdgpu_device pointer
 332 *
 333 * Setup and start the VCE block
 334 */
 335static int vce_v4_0_start(struct amdgpu_device *adev)
 336{
 337        struct amdgpu_ring *ring;
 338        int r;
 339
 340        ring = &adev->vce.ring[0];
 341
 342        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
 343        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
 344        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
 345        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
 346        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
 347
 348        ring = &adev->vce.ring[1];
 349
 350        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
 351        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
 352        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
 353        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
 354        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
 355
 356        ring = &adev->vce.ring[2];
 357
 358        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
 359        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
 360        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
 361        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
 362        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
 363
 364        vce_v4_0_mc_resume(adev);
 365        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
 366                        ~VCE_STATUS__JOB_BUSY_MASK);
 367
 368        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
 369
 370        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
 371                        ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
 372        mdelay(100);
 373
 374        r = vce_v4_0_firmware_loaded(adev);
 375
 376        /* clear BUSY flag */
 377        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
 378
 379        if (r) {
 380                DRM_ERROR("VCE not responding, giving up!!!\n");
 381                return r;
 382        }
 383
 384        return 0;
 385}
 386
 387static int vce_v4_0_stop(struct amdgpu_device *adev)
 388{
 389
 390        /* Disable VCPU */
 391        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
 392
 393        /* hold on ECPU */
 394        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
 395                        VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
 396                        ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
 397
 398        /* clear VCE_STATUS */
 399        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
 400
 401        /* Set Clock-Gating off */
 402        /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
 403                vce_v4_0_set_vce_sw_clock_gating(adev, false);
 404        */
 405
 406        return 0;
 407}
 408
 409static int vce_v4_0_early_init(void *handle)
 410{
 411        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 412
 413        if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
 414                adev->vce.num_rings = 1;
 415        else
 416                adev->vce.num_rings = 3;
 417
 418        vce_v4_0_set_ring_funcs(adev);
 419        vce_v4_0_set_irq_funcs(adev);
 420
 421        return 0;
 422}
 423
 424static int vce_v4_0_sw_init(void *handle)
 425{
 426        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 427        struct amdgpu_ring *ring;
 428
 429        unsigned size;
 430        int r, i;
 431
 432        r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
 433        if (r)
 434                return r;
 435
 436        size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
 437        if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
 438                size += VCE_V4_0_FW_SIZE;
 439
 440        r = amdgpu_vce_sw_init(adev, size);
 441        if (r)
 442                return r;
 443
 444        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 445                const struct common_firmware_header *hdr;
 446                unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
 447
 448                adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
 449                if (!adev->vce.saved_bo)
 450                        return -ENOMEM;
 451
 452                hdr = (const struct common_firmware_header *)adev->vce.fw->data;
 453                adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
 454                adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
 455                adev->firmware.fw_size +=
 456                        ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
 457                DRM_INFO("PSP loading VCE firmware\n");
 458        } else {
 459                r = amdgpu_vce_resume(adev);
 460                if (r)
 461                        return r;
 462        }
 463
 464        for (i = 0; i < adev->vce.num_rings; i++) {
 465                ring = &adev->vce.ring[i];
 466                sprintf(ring->name, "vce%d", i);
 467                if (amdgpu_sriov_vf(adev)) {
 468                        /* DOORBELL only works under SRIOV */
 469                        ring->use_doorbell = true;
 470
 471                        /* currently only use the first encoding ring for sriov,
 472                         * so set unused location for other unused rings.
 473                         */
 474                        if (i == 0)
 475                                ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
 476                        else
 477                                ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
 478                }
 479                r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
 480                                     AMDGPU_RING_PRIO_DEFAULT, NULL);
 481                if (r)
 482                        return r;
 483        }
 484
 485
 486        r = amdgpu_vce_entity_init(adev);
 487        if (r)
 488                return r;
 489
 490        r = amdgpu_virt_alloc_mm_table(adev);
 491        if (r)
 492                return r;
 493
 494        return r;
 495}
 496
 497static int vce_v4_0_sw_fini(void *handle)
 498{
 499        int r;
 500        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 501
 502        /* free MM table */
 503        amdgpu_virt_free_mm_table(adev);
 504
 505        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 506                kvfree(adev->vce.saved_bo);
 507                adev->vce.saved_bo = NULL;
 508        }
 509
 510        r = amdgpu_vce_suspend(adev);
 511        if (r)
 512                return r;
 513
 514        return amdgpu_vce_sw_fini(adev);
 515}
 516
 517static int vce_v4_0_hw_init(void *handle)
 518{
 519        int r, i;
 520        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 521
 522        if (amdgpu_sriov_vf(adev))
 523                r = vce_v4_0_sriov_start(adev);
 524        else
 525                r = vce_v4_0_start(adev);
 526        if (r)
 527                return r;
 528
 529        for (i = 0; i < adev->vce.num_rings; i++) {
 530                r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
 531                if (r)
 532                        return r;
 533        }
 534
 535        DRM_INFO("VCE initialized successfully.\n");
 536
 537        return 0;
 538}
 539
 540static int vce_v4_0_hw_fini(void *handle)
 541{
 542        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 543
 544        if (!amdgpu_sriov_vf(adev)) {
 545                /* vce_v4_0_wait_for_idle(handle); */
 546                vce_v4_0_stop(adev);
 547        } else {
 548                /* full access mode, so don't touch any VCE register */
 549                DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
 550        }
 551
 552        return 0;
 553}
 554
 555static int vce_v4_0_suspend(void *handle)
 556{
 557        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 558        int r;
 559
 560        if (adev->vce.vcpu_bo == NULL)
 561                return 0;
 562
 563        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 564                unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
 565                void *ptr = adev->vce.cpu_addr;
 566
 567                memcpy_fromio(adev->vce.saved_bo, ptr, size);
 568        }
 569
 570        r = vce_v4_0_hw_fini(adev);
 571        if (r)
 572                return r;
 573
 574        return amdgpu_vce_suspend(adev);
 575}
 576
 577static int vce_v4_0_resume(void *handle)
 578{
 579        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 580        int r;
 581
 582        if (adev->vce.vcpu_bo == NULL)
 583                return -EINVAL;
 584
 585        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 586                unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
 587                void *ptr = adev->vce.cpu_addr;
 588
 589                memcpy_toio(ptr, adev->vce.saved_bo, size);
 590        } else {
 591                r = amdgpu_vce_resume(adev);
 592                if (r)
 593                        return r;
 594        }
 595
 596        return vce_v4_0_hw_init(adev);
 597}
 598
 599static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
 600{
 601        uint32_t offset, size;
 602        uint64_t tmr_mc_addr;
 603
 604        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
 605        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
 606        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
 607        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
 608
 609        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
 610        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
 611        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
 612        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
 613        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
 614
 615        offset = AMDGPU_VCE_FIRMWARE_OFFSET;
 616
 617        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 618                tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
 619                                                                                adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
 620                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
 621                        (tmr_mc_addr >> 8));
 622                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
 623                        (tmr_mc_addr >> 40) & 0xff);
 624                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
 625        } else {
 626                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
 627                        (adev->vce.gpu_addr >> 8));
 628                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
 629                        (adev->vce.gpu_addr >> 40) & 0xff);
 630                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
 631        }
 632
 633        size = VCE_V4_0_FW_SIZE;
 634        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
 635
 636        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
 637        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
 638        offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
 639        size = VCE_V4_0_STACK_SIZE;
 640        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
 641        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
 642
 643        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
 644        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
 645        offset += size;
 646        size = VCE_V4_0_DATA_SIZE;
 647        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
 648        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
 649
 650        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
 651        WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
 652                        VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
 653                        ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
 654}
 655
 656static int vce_v4_0_set_clockgating_state(void *handle,
 657                                          enum amd_clockgating_state state)
 658{
 659        /* needed for driver unload*/
 660        return 0;
 661}
 662
 663#if 0
 664static bool vce_v4_0_is_idle(void *handle)
 665{
 666        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 667        u32 mask = 0;
 668
 669        mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
 670        mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
 671
 672        return !(RREG32(mmSRBM_STATUS2) & mask);
 673}
 674
 675static int vce_v4_0_wait_for_idle(void *handle)
 676{
 677        unsigned i;
 678        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 679
 680        for (i = 0; i < adev->usec_timeout; i++)
 681                if (vce_v4_0_is_idle(handle))
 682                        return 0;
 683
 684        return -ETIMEDOUT;
 685}
 686
 687#define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
 688#define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
 689#define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
 690#define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
 691                                      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
 692
 693static bool vce_v4_0_check_soft_reset(void *handle)
 694{
 695        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 696        u32 srbm_soft_reset = 0;
 697
 698        /* According to VCE team , we should use VCE_STATUS instead
 699         * SRBM_STATUS.VCE_BUSY bit for busy status checking.
 700         * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
 701         * instance's registers are accessed
 702         * (0 for 1st instance, 10 for 2nd instance).
 703         *
 704         *VCE_STATUS
 705         *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
 706         *|----+----+-----------+----+----+----+----------+---------+----|
 707         *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
 708         *
 709         * VCE team suggest use bit 3--bit 6 for busy status check
 710         */
 711        mutex_lock(&adev->grbm_idx_mutex);
 712        WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
 713        if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
 714                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
 715                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
 716        }
 717        WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
 718        if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
 719                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
 720                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
 721        }
 722        WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
 723        mutex_unlock(&adev->grbm_idx_mutex);
 724
 725        if (srbm_soft_reset) {
 726                adev->vce.srbm_soft_reset = srbm_soft_reset;
 727                return true;
 728        } else {
 729                adev->vce.srbm_soft_reset = 0;
 730                return false;
 731        }
 732}
 733
 734static int vce_v4_0_soft_reset(void *handle)
 735{
 736        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 737        u32 srbm_soft_reset;
 738
 739        if (!adev->vce.srbm_soft_reset)
 740                return 0;
 741        srbm_soft_reset = adev->vce.srbm_soft_reset;
 742
 743        if (srbm_soft_reset) {
 744                u32 tmp;
 745
 746                tmp = RREG32(mmSRBM_SOFT_RESET);
 747                tmp |= srbm_soft_reset;
 748                dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
 749                WREG32(mmSRBM_SOFT_RESET, tmp);
 750                tmp = RREG32(mmSRBM_SOFT_RESET);
 751
 752                udelay(50);
 753
 754                tmp &= ~srbm_soft_reset;
 755                WREG32(mmSRBM_SOFT_RESET, tmp);
 756                tmp = RREG32(mmSRBM_SOFT_RESET);
 757
 758                /* Wait a little for things to settle down */
 759                udelay(50);
 760        }
 761
 762        return 0;
 763}
 764
 765static int vce_v4_0_pre_soft_reset(void *handle)
 766{
 767        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 768
 769        if (!adev->vce.srbm_soft_reset)
 770                return 0;
 771
 772        mdelay(5);
 773
 774        return vce_v4_0_suspend(adev);
 775}
 776
 777
 778static int vce_v4_0_post_soft_reset(void *handle)
 779{
 780        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 781
 782        if (!adev->vce.srbm_soft_reset)
 783                return 0;
 784
 785        mdelay(5);
 786
 787        return vce_v4_0_resume(adev);
 788}
 789
 790static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
 791{
 792        u32 tmp, data;
 793
 794        tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
 795        if (override)
 796                data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
 797        else
 798                data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
 799
 800        if (tmp != data)
 801                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
 802}
 803
 804static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
 805                                             bool gated)
 806{
 807        u32 data;
 808
 809        /* Set Override to disable Clock Gating */
 810        vce_v4_0_override_vce_clock_gating(adev, true);
 811
 812        /* This function enables MGCG which is controlled by firmware.
 813           With the clocks in the gated state the core is still
 814           accessible but the firmware will throttle the clocks on the
 815           fly as necessary.
 816        */
 817        if (gated) {
 818                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
 819                data |= 0x1ff;
 820                data &= ~0xef0000;
 821                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
 822
 823                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
 824                data |= 0x3ff000;
 825                data &= ~0xffc00000;
 826                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
 827
 828                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
 829                data |= 0x2;
 830                data &= ~0x00010000;
 831                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
 832
 833                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
 834                data |= 0x37f;
 835                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
 836
 837                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
 838                data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
 839                        VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
 840                        VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
 841                        0x8;
 842                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
 843        } else {
 844                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
 845                data &= ~0x80010;
 846                data |= 0xe70008;
 847                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
 848
 849                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
 850                data |= 0xffc00000;
 851                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
 852
 853                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
 854                data |= 0x10000;
 855                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
 856
 857                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
 858                data &= ~0xffc00000;
 859                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
 860
 861                data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
 862                data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
 863                          VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
 864                          VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
 865                          0x8);
 866                WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
 867        }
 868        vce_v4_0_override_vce_clock_gating(adev, false);
 869}
 870
 871static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
 872{
 873        u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
 874
 875        if (enable)
 876                tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
 877        else
 878                tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
 879
 880        WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
 881}
 882
 883static int vce_v4_0_set_clockgating_state(void *handle,
 884                                          enum amd_clockgating_state state)
 885{
 886        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 887        bool enable = (state == AMD_CG_STATE_GATE);
 888        int i;
 889
 890        if ((adev->asic_type == CHIP_POLARIS10) ||
 891                (adev->asic_type == CHIP_TONGA) ||
 892                (adev->asic_type == CHIP_FIJI))
 893                vce_v4_0_set_bypass_mode(adev, enable);
 894
 895        if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
 896                return 0;
 897
 898        mutex_lock(&adev->grbm_idx_mutex);
 899        for (i = 0; i < 2; i++) {
 900                /* Program VCE Instance 0 or 1 if not harvested */
 901                if (adev->vce.harvest_config & (1 << i))
 902                        continue;
 903
 904                WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
 905
 906                if (enable) {
 907                        /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
 908                        uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
 909                        data &= ~(0xf | 0xff0);
 910                        data |= ((0x0 << 0) | (0x04 << 4));
 911                        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
 912
 913                        /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
 914                        data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
 915                        data &= ~(0xf | 0xff0);
 916                        data |= ((0x0 << 0) | (0x04 << 4));
 917                        WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
 918                }
 919
 920                vce_v4_0_set_vce_sw_clock_gating(adev, enable);
 921        }
 922
 923        WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
 924        mutex_unlock(&adev->grbm_idx_mutex);
 925
 926        return 0;
 927}
 928#endif
 929
 930static int vce_v4_0_set_powergating_state(void *handle,
 931                                          enum amd_powergating_state state)
 932{
 933        /* This doesn't actually powergate the VCE block.
 934         * That's done in the dpm code via the SMC.  This
 935         * just re-inits the block as necessary.  The actual
 936         * gating still happens in the dpm code.  We should
 937         * revisit this when there is a cleaner line between
 938         * the smc and the hw blocks
 939         */
 940        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 941
 942        if (state == AMD_PG_STATE_GATE)
 943                return vce_v4_0_stop(adev);
 944        else
 945                return vce_v4_0_start(adev);
 946}
 947
 948static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
 949                                        struct amdgpu_ib *ib, uint32_t flags)
 950{
 951        unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 952
 953        amdgpu_ring_write(ring, VCE_CMD_IB_VM);
 954        amdgpu_ring_write(ring, vmid);
 955        amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
 956        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
 957        amdgpu_ring_write(ring, ib->length_dw);
 958}
 959
 960static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
 961                        u64 seq, unsigned flags)
 962{
 963        WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
 964
 965        amdgpu_ring_write(ring, VCE_CMD_FENCE);
 966        amdgpu_ring_write(ring, addr);
 967        amdgpu_ring_write(ring, upper_32_bits(addr));
 968        amdgpu_ring_write(ring, seq);
 969        amdgpu_ring_write(ring, VCE_CMD_TRAP);
 970}
 971
 972static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
 973{
 974        amdgpu_ring_write(ring, VCE_CMD_END);
 975}
 976
 977static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
 978                                   uint32_t val, uint32_t mask)
 979{
 980        amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
 981        amdgpu_ring_write(ring, reg << 2);
 982        amdgpu_ring_write(ring, mask);
 983        amdgpu_ring_write(ring, val);
 984}
 985
 986static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
 987                                   unsigned int vmid, uint64_t pd_addr)
 988{
 989        struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 990
 991        pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
 992
 993        /* wait for reg writes */
 994        vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
 995                               vmid * hub->ctx_addr_distance,
 996                               lower_32_bits(pd_addr), 0xffffffff);
 997}
 998
 999static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1000                               uint32_t reg, uint32_t val)
1001{
1002        amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1003        amdgpu_ring_write(ring, reg << 2);
1004        amdgpu_ring_write(ring, val);
1005}
1006
1007static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1008                                        struct amdgpu_irq_src *source,
1009                                        unsigned type,
1010                                        enum amdgpu_interrupt_state state)
1011{
1012        uint32_t val = 0;
1013
1014        if (!amdgpu_sriov_vf(adev)) {
1015                if (state == AMDGPU_IRQ_STATE_ENABLE)
1016                        val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1017
1018                WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1019                                ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1020        }
1021        return 0;
1022}
1023
1024static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1025                                      struct amdgpu_irq_src *source,
1026                                      struct amdgpu_iv_entry *entry)
1027{
1028        DRM_DEBUG("IH: VCE\n");
1029
1030        switch (entry->src_data[0]) {
1031        case 0:
1032        case 1:
1033        case 2:
1034                amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1035                break;
1036        default:
1037                DRM_ERROR("Unhandled interrupt: %d %d\n",
1038                          entry->src_id, entry->src_data[0]);
1039                break;
1040        }
1041
1042        return 0;
1043}
1044
1045const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1046        .name = "vce_v4_0",
1047        .early_init = vce_v4_0_early_init,
1048        .late_init = NULL,
1049        .sw_init = vce_v4_0_sw_init,
1050        .sw_fini = vce_v4_0_sw_fini,
1051        .hw_init = vce_v4_0_hw_init,
1052        .hw_fini = vce_v4_0_hw_fini,
1053        .suspend = vce_v4_0_suspend,
1054        .resume = vce_v4_0_resume,
1055        .is_idle = NULL /* vce_v4_0_is_idle */,
1056        .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1057        .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1058        .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1059        .soft_reset = NULL /* vce_v4_0_soft_reset */,
1060        .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1061        .set_clockgating_state = vce_v4_0_set_clockgating_state,
1062        .set_powergating_state = vce_v4_0_set_powergating_state,
1063};
1064
1065static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1066        .type = AMDGPU_RING_TYPE_VCE,
1067        .align_mask = 0x3f,
1068        .nop = VCE_CMD_NO_OP,
1069        .support_64bit_ptrs = false,
1070        .no_user_fence = true,
1071        .vmhub = AMDGPU_MMHUB_0,
1072        .get_rptr = vce_v4_0_ring_get_rptr,
1073        .get_wptr = vce_v4_0_ring_get_wptr,
1074        .set_wptr = vce_v4_0_ring_set_wptr,
1075        .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1076        .emit_frame_size =
1077                SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1078                SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1079                4 + /* vce_v4_0_emit_vm_flush */
1080                5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1081                1, /* vce_v4_0_ring_insert_end */
1082        .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1083        .emit_ib = vce_v4_0_ring_emit_ib,
1084        .emit_vm_flush = vce_v4_0_emit_vm_flush,
1085        .emit_fence = vce_v4_0_ring_emit_fence,
1086        .test_ring = amdgpu_vce_ring_test_ring,
1087        .test_ib = amdgpu_vce_ring_test_ib,
1088        .insert_nop = amdgpu_ring_insert_nop,
1089        .insert_end = vce_v4_0_ring_insert_end,
1090        .pad_ib = amdgpu_ring_generic_pad_ib,
1091        .begin_use = amdgpu_vce_ring_begin_use,
1092        .end_use = amdgpu_vce_ring_end_use,
1093        .emit_wreg = vce_v4_0_emit_wreg,
1094        .emit_reg_wait = vce_v4_0_emit_reg_wait,
1095        .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1096};
1097
1098static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1099{
1100        int i;
1101
1102        for (i = 0; i < adev->vce.num_rings; i++) {
1103                adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1104                adev->vce.ring[i].me = i;
1105        }
1106        DRM_INFO("VCE enabled in VM mode\n");
1107}
1108
1109static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1110        .set = vce_v4_0_set_interrupt_state,
1111        .process = vce_v4_0_process_interrupt,
1112};
1113
1114static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1115{
1116        adev->vce.irq.num_types = 1;
1117        adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1118};
1119
1120const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1121{
1122        .type = AMD_IP_BLOCK_TYPE_VCE,
1123        .major = 4,
1124        .minor = 0,
1125        .rev = 0,
1126        .funcs = &vce_v4_0_ip_funcs,
1127};
1128