linux/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
<<
>>
Prefs
   1/*
   2 * Copyright 2018 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23
  24#include <linux/firmware.h>
  25
  26#include "amdgpu.h"
  27#include "amdgpu_vcn.h"
  28#include "soc15.h"
  29#include "soc15d.h"
  30#include "amdgpu_pm.h"
  31#include "amdgpu_psp.h"
  32#include "mmsch_v2_0.h"
  33#include "vcn_v2_0.h"
  34
  35#include "vcn/vcn_2_0_0_offset.h"
  36#include "vcn/vcn_2_0_0_sh_mask.h"
  37#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
  38
  39#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET                        0x1fd
  40#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET                    0x503
  41#define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET                  0x504
  42#define mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET                  0x505
  43#define mmUVD_NO_OP_INTERNAL_OFFSET                             0x53f
  44#define mmUVD_GP_SCRATCH8_INTERNAL_OFFSET                       0x54a
  45#define mmUVD_SCRATCH9_INTERNAL_OFFSET                          0xc01d
  46
  47#define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET                   0x1e1
  48#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET         0x5a6
  49#define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET          0x5a7
  50#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET                       0x1e2
  51
  52static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev);
  53static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev);
  54static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev);
  55static int vcn_v2_0_set_powergating_state(void *handle,
  56                                enum amd_powergating_state state);
  57static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
  58                                int inst_idx, struct dpg_pause_state *new_state);
  59static int vcn_v2_0_start_sriov(struct amdgpu_device *adev);
  60/**
  61 * vcn_v2_0_early_init - set function pointers
  62 *
  63 * @handle: amdgpu_device pointer
  64 *
  65 * Set ring and irq function pointers
  66 */
  67static int vcn_v2_0_early_init(void *handle)
  68{
  69        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  70
  71        adev->vcn.num_vcn_inst = 1;
  72        if (amdgpu_sriov_vf(adev))
  73                adev->vcn.num_enc_rings = 1;
  74        else
  75                adev->vcn.num_enc_rings = 2;
  76
  77        vcn_v2_0_set_dec_ring_funcs(adev);
  78        vcn_v2_0_set_enc_ring_funcs(adev);
  79        vcn_v2_0_set_irq_funcs(adev);
  80
  81        return 0;
  82}
  83
  84/**
  85 * vcn_v2_0_sw_init - sw init for VCN block
  86 *
  87 * @handle: amdgpu_device pointer
  88 *
  89 * Load firmware and sw initialization
  90 */
  91static int vcn_v2_0_sw_init(void *handle)
  92{
  93        struct amdgpu_ring *ring;
  94        int i, r;
  95        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  96        volatile struct amdgpu_fw_shared *fw_shared;
  97
  98        /* VCN DEC TRAP */
  99        r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
 100                              VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT,
 101                              &adev->vcn.inst->irq);
 102        if (r)
 103                return r;
 104
 105        /* VCN ENC TRAP */
 106        for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
 107                r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
 108                                      i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE,
 109                                      &adev->vcn.inst->irq);
 110                if (r)
 111                        return r;
 112        }
 113
 114        r = amdgpu_vcn_sw_init(adev);
 115        if (r)
 116                return r;
 117
 118        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 119                const struct common_firmware_header *hdr;
 120                hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
 121                adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN;
 122                adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw;
 123                adev->firmware.fw_size +=
 124                        ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
 125                DRM_INFO("PSP loading VCN firmware\n");
 126        }
 127
 128        r = amdgpu_vcn_resume(adev);
 129        if (r)
 130                return r;
 131
 132        ring = &adev->vcn.inst->ring_dec;
 133
 134        ring->use_doorbell = true;
 135        ring->doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 << 1;
 136
 137        sprintf(ring->name, "vcn_dec");
 138        r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
 139                             AMDGPU_RING_PRIO_DEFAULT, NULL);
 140        if (r)
 141                return r;
 142
 143        adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
 144        adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
 145        adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
 146        adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
 147        adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
 148        adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
 149
 150        adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
 151        adev->vcn.inst->external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9);
 152        adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
 153        adev->vcn.inst->external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0);
 154        adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
 155        adev->vcn.inst->external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1);
 156        adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
 157        adev->vcn.inst->external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD);
 158        adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
 159        adev->vcn.inst->external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP);
 160
 161        for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
 162                ring = &adev->vcn.inst->ring_enc[i];
 163                ring->use_doorbell = true;
 164                if (!amdgpu_sriov_vf(adev))
 165                        ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i;
 166                else
 167                        ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + i;
 168                sprintf(ring->name, "vcn_enc%d", i);
 169                r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
 170                                     AMDGPU_RING_PRIO_DEFAULT, NULL);
 171                if (r)
 172                        return r;
 173        }
 174
 175        adev->vcn.pause_dpg_mode = vcn_v2_0_pause_dpg_mode;
 176
 177        r = amdgpu_virt_alloc_mm_table(adev);
 178        if (r)
 179                return r;
 180
 181        fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
 182        fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG);
 183        return 0;
 184}
 185
 186/**
 187 * vcn_v2_0_sw_fini - sw fini for VCN block
 188 *
 189 * @handle: amdgpu_device pointer
 190 *
 191 * VCN suspend and free up sw allocation
 192 */
 193static int vcn_v2_0_sw_fini(void *handle)
 194{
 195        int r;
 196        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 197        volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
 198
 199        fw_shared->present_flag_0 = 0;
 200
 201        amdgpu_virt_free_mm_table(adev);
 202
 203        r = amdgpu_vcn_suspend(adev);
 204        if (r)
 205                return r;
 206
 207        r = amdgpu_vcn_sw_fini(adev);
 208
 209        return r;
 210}
 211
 212/**
 213 * vcn_v2_0_hw_init - start and test VCN block
 214 *
 215 * @handle: amdgpu_device pointer
 216 *
 217 * Initialize the hardware, boot up the VCPU and do some testing
 218 */
 219static int vcn_v2_0_hw_init(void *handle)
 220{
 221        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 222        struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
 223        int i, r;
 224
 225        adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
 226                                             ring->doorbell_index, 0);
 227
 228        if (amdgpu_sriov_vf(adev))
 229                vcn_v2_0_start_sriov(adev);
 230
 231        r = amdgpu_ring_test_helper(ring);
 232        if (r)
 233                goto done;
 234
 235        //Disable vcn decode for sriov
 236        if (amdgpu_sriov_vf(adev))
 237                ring->sched.ready = false;
 238
 239        for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
 240                ring = &adev->vcn.inst->ring_enc[i];
 241                r = amdgpu_ring_test_helper(ring);
 242                if (r)
 243                        goto done;
 244        }
 245
 246done:
 247        if (!r)
 248                DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
 249                        (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
 250
 251        return r;
 252}
 253
 254/**
 255 * vcn_v2_0_hw_fini - stop the hardware block
 256 *
 257 * @handle: amdgpu_device pointer
 258 *
 259 * Stop the VCN block, mark ring as not ready any more
 260 */
 261static int vcn_v2_0_hw_fini(void *handle)
 262{
 263        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 264
 265        cancel_delayed_work_sync(&adev->vcn.idle_work);
 266
 267        if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
 268            (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
 269              RREG32_SOC15(VCN, 0, mmUVD_STATUS)))
 270                vcn_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
 271
 272        return 0;
 273}
 274
 275/**
 276 * vcn_v2_0_suspend - suspend VCN block
 277 *
 278 * @handle: amdgpu_device pointer
 279 *
 280 * HW fini and suspend VCN block
 281 */
 282static int vcn_v2_0_suspend(void *handle)
 283{
 284        int r;
 285        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 286
 287        r = vcn_v2_0_hw_fini(adev);
 288        if (r)
 289                return r;
 290
 291        r = amdgpu_vcn_suspend(adev);
 292
 293        return r;
 294}
 295
 296/**
 297 * vcn_v2_0_resume - resume VCN block
 298 *
 299 * @handle: amdgpu_device pointer
 300 *
 301 * Resume firmware and hw init VCN block
 302 */
 303static int vcn_v2_0_resume(void *handle)
 304{
 305        int r;
 306        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 307
 308        r = amdgpu_vcn_resume(adev);
 309        if (r)
 310                return r;
 311
 312        r = vcn_v2_0_hw_init(adev);
 313
 314        return r;
 315}
 316
 317/**
 318 * vcn_v2_0_mc_resume - memory controller programming
 319 *
 320 * @adev: amdgpu_device pointer
 321 *
 322 * Let the VCN memory controller know it's offsets
 323 */
 324static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
 325{
 326        uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
 327        uint32_t offset;
 328
 329        if (amdgpu_sriov_vf(adev))
 330                return;
 331
 332        /* cache window 0: fw */
 333        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 334                WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
 335                        (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo));
 336                WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
 337                        (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi));
 338                WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0);
 339                offset = 0;
 340        } else {
 341                WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
 342                        lower_32_bits(adev->vcn.inst->gpu_addr));
 343                WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
 344                        upper_32_bits(adev->vcn.inst->gpu_addr));
 345                offset = size;
 346                WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
 347                        AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
 348        }
 349
 350        WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size);
 351
 352        /* cache window 1: stack */
 353        WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
 354                lower_32_bits(adev->vcn.inst->gpu_addr + offset));
 355        WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
 356                upper_32_bits(adev->vcn.inst->gpu_addr + offset));
 357        WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0);
 358        WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
 359
 360        /* cache window 2: context */
 361        WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
 362                lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
 363        WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
 364                upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
 365        WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0);
 366        WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
 367
 368        /* non-cache window */
 369        WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
 370                lower_32_bits(adev->vcn.inst->fw_shared_gpu_addr));
 371        WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
 372                upper_32_bits(adev->vcn.inst->fw_shared_gpu_addr));
 373        WREG32_SOC15(UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
 374        WREG32_SOC15(UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0,
 375                AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
 376
 377        WREG32_SOC15(UVD, 0, mmUVD_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
 378}
 379
 380static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirect)
 381{
 382        uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
 383        uint32_t offset;
 384
 385        /* cache window 0: fw */
 386        if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 387                if (!indirect) {
 388                        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 389                                UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
 390                                (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo), 0, indirect);
 391                        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 392                                UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
 393                                (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi), 0, indirect);
 394                        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 395                                UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
 396                } else {
 397                        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 398                                UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
 399                        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 400                                UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
 401                        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 402                                UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
 403                }
 404                offset = 0;
 405        } else {
 406                WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 407                        UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
 408                        lower_32_bits(adev->vcn.inst->gpu_addr), 0, indirect);
 409                WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 410                        UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
 411                        upper_32_bits(adev->vcn.inst->gpu_addr), 0, indirect);
 412                offset = size;
 413                WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 414                        UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
 415                        AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
 416        }
 417
 418        if (!indirect)
 419                WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 420                        UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
 421        else
 422                WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 423                        UVD, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
 424
 425        /* cache window 1: stack */
 426        if (!indirect) {
 427                WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 428                        UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
 429                        lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect);
 430                WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 431                        UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
 432                        upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect);
 433                WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 434                        UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
 435        } else {
 436                WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 437                        UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
 438                WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 439                        UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
 440                WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 441                        UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
 442        }
 443        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 444                UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
 445
 446        /* cache window 2: context */
 447        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 448                UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
 449                lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
 450        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 451                UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
 452                upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
 453        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 454                UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
 455        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 456                UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
 457
 458        /* non-cache window */
 459        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 460                UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
 461                lower_32_bits(adev->vcn.inst->fw_shared_gpu_addr), 0, indirect);
 462        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 463                UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
 464                upper_32_bits(adev->vcn.inst->fw_shared_gpu_addr), 0, indirect);
 465        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 466                UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
 467        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 468                UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0),
 469                AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect);
 470
 471        /* VCN global tiling registers */
 472        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 473                UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
 474}
 475
 476/**
 477 * vcn_v2_0_disable_clock_gating - disable VCN clock gating
 478 *
 479 * @adev: amdgpu_device pointer
 480 *
 481 * Disable clock gating for VCN block
 482 */
 483static void vcn_v2_0_disable_clock_gating(struct amdgpu_device *adev)
 484{
 485        uint32_t data;
 486
 487        if (amdgpu_sriov_vf(adev))
 488                return;
 489
 490        /* UVD disable CGC */
 491        data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
 492        if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
 493                data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
 494        else
 495                data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
 496        data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
 497        data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
 498        WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data);
 499
 500        data = RREG32_SOC15(VCN, 0, mmUVD_CGC_GATE);
 501        data &= ~(UVD_CGC_GATE__SYS_MASK
 502                | UVD_CGC_GATE__UDEC_MASK
 503                | UVD_CGC_GATE__MPEG2_MASK
 504                | UVD_CGC_GATE__REGS_MASK
 505                | UVD_CGC_GATE__RBC_MASK
 506                | UVD_CGC_GATE__LMI_MC_MASK
 507                | UVD_CGC_GATE__LMI_UMC_MASK
 508                | UVD_CGC_GATE__IDCT_MASK
 509                | UVD_CGC_GATE__MPRD_MASK
 510                | UVD_CGC_GATE__MPC_MASK
 511                | UVD_CGC_GATE__LBSI_MASK
 512                | UVD_CGC_GATE__LRBBM_MASK
 513                | UVD_CGC_GATE__UDEC_RE_MASK
 514                | UVD_CGC_GATE__UDEC_CM_MASK
 515                | UVD_CGC_GATE__UDEC_IT_MASK
 516                | UVD_CGC_GATE__UDEC_DB_MASK
 517                | UVD_CGC_GATE__UDEC_MP_MASK
 518                | UVD_CGC_GATE__WCB_MASK
 519                | UVD_CGC_GATE__VCPU_MASK
 520                | UVD_CGC_GATE__SCPU_MASK);
 521        WREG32_SOC15(VCN, 0, mmUVD_CGC_GATE, data);
 522
 523        data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
 524        data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
 525                | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
 526                | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
 527                | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
 528                | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
 529                | UVD_CGC_CTRL__SYS_MODE_MASK
 530                | UVD_CGC_CTRL__UDEC_MODE_MASK
 531                | UVD_CGC_CTRL__MPEG2_MODE_MASK
 532                | UVD_CGC_CTRL__REGS_MODE_MASK
 533                | UVD_CGC_CTRL__RBC_MODE_MASK
 534                | UVD_CGC_CTRL__LMI_MC_MODE_MASK
 535                | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
 536                | UVD_CGC_CTRL__IDCT_MODE_MASK
 537                | UVD_CGC_CTRL__MPRD_MODE_MASK
 538                | UVD_CGC_CTRL__MPC_MODE_MASK
 539                | UVD_CGC_CTRL__LBSI_MODE_MASK
 540                | UVD_CGC_CTRL__LRBBM_MODE_MASK
 541                | UVD_CGC_CTRL__WCB_MODE_MASK
 542                | UVD_CGC_CTRL__VCPU_MODE_MASK
 543                | UVD_CGC_CTRL__SCPU_MODE_MASK);
 544        WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data);
 545
 546        /* turn on */
 547        data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE);
 548        data |= (UVD_SUVD_CGC_GATE__SRE_MASK
 549                | UVD_SUVD_CGC_GATE__SIT_MASK
 550                | UVD_SUVD_CGC_GATE__SMP_MASK
 551                | UVD_SUVD_CGC_GATE__SCM_MASK
 552                | UVD_SUVD_CGC_GATE__SDB_MASK
 553                | UVD_SUVD_CGC_GATE__SRE_H264_MASK
 554                | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
 555                | UVD_SUVD_CGC_GATE__SIT_H264_MASK
 556                | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
 557                | UVD_SUVD_CGC_GATE__SCM_H264_MASK
 558                | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
 559                | UVD_SUVD_CGC_GATE__SDB_H264_MASK
 560                | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
 561                | UVD_SUVD_CGC_GATE__SCLR_MASK
 562                | UVD_SUVD_CGC_GATE__UVD_SC_MASK
 563                | UVD_SUVD_CGC_GATE__ENT_MASK
 564                | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
 565                | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
 566                | UVD_SUVD_CGC_GATE__SITE_MASK
 567                | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
 568                | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
 569                | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
 570                | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
 571                | UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
 572        WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE, data);
 573
 574        data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL);
 575        data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
 576                | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
 577                | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
 578                | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
 579                | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
 580                | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
 581                | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
 582                | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
 583                | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
 584                | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
 585        WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
 586}
 587
 588static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
 589                uint8_t sram_sel, uint8_t indirect)
 590{
 591        uint32_t reg_data = 0;
 592
 593        /* enable sw clock gating control */
 594        if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
 595                reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
 596        else
 597                reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
 598        reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
 599        reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
 600        reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
 601                 UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
 602                 UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
 603                 UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
 604                 UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
 605                 UVD_CGC_CTRL__SYS_MODE_MASK |
 606                 UVD_CGC_CTRL__UDEC_MODE_MASK |
 607                 UVD_CGC_CTRL__MPEG2_MODE_MASK |
 608                 UVD_CGC_CTRL__REGS_MODE_MASK |
 609                 UVD_CGC_CTRL__RBC_MODE_MASK |
 610                 UVD_CGC_CTRL__LMI_MC_MODE_MASK |
 611                 UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
 612                 UVD_CGC_CTRL__IDCT_MODE_MASK |
 613                 UVD_CGC_CTRL__MPRD_MODE_MASK |
 614                 UVD_CGC_CTRL__MPC_MODE_MASK |
 615                 UVD_CGC_CTRL__LBSI_MODE_MASK |
 616                 UVD_CGC_CTRL__LRBBM_MODE_MASK |
 617                 UVD_CGC_CTRL__WCB_MODE_MASK |
 618                 UVD_CGC_CTRL__VCPU_MODE_MASK |
 619                 UVD_CGC_CTRL__SCPU_MODE_MASK);
 620        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 621                UVD, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
 622
 623        /* turn off clock gating */
 624        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 625                UVD, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect);
 626
 627        /* turn on SUVD clock gating */
 628        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 629                UVD, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
 630
 631        /* turn on sw mode in UVD_SUVD_CGC_CTRL */
 632        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 633                UVD, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
 634}
 635
 636/**
 637 * vcn_v2_0_enable_clock_gating - enable VCN clock gating
 638 *
 639 * @adev: amdgpu_device pointer
 640 *
 641 * Enable clock gating for VCN block
 642 */
 643static void vcn_v2_0_enable_clock_gating(struct amdgpu_device *adev)
 644{
 645        uint32_t data = 0;
 646
 647        if (amdgpu_sriov_vf(adev))
 648                return;
 649
 650        /* enable UVD CGC */
 651        data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
 652        if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
 653                data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
 654        else
 655                data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
 656        data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
 657        data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
 658        WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data);
 659
 660        data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
 661        data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
 662                | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
 663                | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
 664                | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
 665                | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
 666                | UVD_CGC_CTRL__SYS_MODE_MASK
 667                | UVD_CGC_CTRL__UDEC_MODE_MASK
 668                | UVD_CGC_CTRL__MPEG2_MODE_MASK
 669                | UVD_CGC_CTRL__REGS_MODE_MASK
 670                | UVD_CGC_CTRL__RBC_MODE_MASK
 671                | UVD_CGC_CTRL__LMI_MC_MODE_MASK
 672                | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
 673                | UVD_CGC_CTRL__IDCT_MODE_MASK
 674                | UVD_CGC_CTRL__MPRD_MODE_MASK
 675                | UVD_CGC_CTRL__MPC_MODE_MASK
 676                | UVD_CGC_CTRL__LBSI_MODE_MASK
 677                | UVD_CGC_CTRL__LRBBM_MODE_MASK
 678                | UVD_CGC_CTRL__WCB_MODE_MASK
 679                | UVD_CGC_CTRL__VCPU_MODE_MASK
 680                | UVD_CGC_CTRL__SCPU_MODE_MASK);
 681        WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data);
 682
 683        data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL);
 684        data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
 685                | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
 686                | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
 687                | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
 688                | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
 689                | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
 690                | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
 691                | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
 692                | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
 693                | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
 694        WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
 695}
 696
 697static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev)
 698{
 699        uint32_t data = 0;
 700
 701        if (amdgpu_sriov_vf(adev))
 702                return;
 703
 704        if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
 705                data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
 706                        | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
 707                        | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
 708                        | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
 709                        | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
 710                        | 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT
 711                        | 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT
 712                        | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
 713                        | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
 714                        | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT);
 715
 716                WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
 717                SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS,
 718                        UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON_2_0, 0xFFFFF);
 719        } else {
 720                data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
 721                        | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
 722                        | 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
 723                        | 1 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
 724                        | 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
 725                        | 1 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT
 726                        | 1 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT
 727                        | 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
 728                        | 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
 729                        | 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT);
 730                WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
 731                SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, 0,  0xFFFFF);
 732        }
 733
 734        /* polling UVD_PGFSM_STATUS to confirm UVDM_PWR_STATUS,
 735         * UVDU_PWR_STATUS are 0 (power on) */
 736
 737        data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS);
 738        data &= ~0x103;
 739        if (adev->pg_flags & AMD_PG_SUPPORT_VCN)
 740                data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON |
 741                        UVD_POWER_STATUS__UVD_PG_EN_MASK;
 742
 743        WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data);
 744}
 745
 746static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev)
 747{
 748        uint32_t data = 0;
 749
 750        if (amdgpu_sriov_vf(adev))
 751                return;
 752
 753        if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
 754                /* Before power off, this indicator has to be turned on */
 755                data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS);
 756                data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK;
 757                data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
 758                WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data);
 759
 760
 761                data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
 762                        | 2 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
 763                        | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
 764                        | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
 765                        | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
 766                        | 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT
 767                        | 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT
 768                        | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
 769                        | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
 770                        | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT);
 771
 772                WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
 773
 774                data = (2 << UVD_PGFSM_STATUS__UVDM_PWR_STATUS__SHIFT
 775                        | 2 << UVD_PGFSM_STATUS__UVDU_PWR_STATUS__SHIFT
 776                        | 2 << UVD_PGFSM_STATUS__UVDF_PWR_STATUS__SHIFT
 777                        | 2 << UVD_PGFSM_STATUS__UVDC_PWR_STATUS__SHIFT
 778                        | 2 << UVD_PGFSM_STATUS__UVDB_PWR_STATUS__SHIFT
 779                        | 2 << UVD_PGFSM_STATUS__UVDIL_PWR_STATUS__SHIFT
 780                        | 2 << UVD_PGFSM_STATUS__UVDIR_PWR_STATUS__SHIFT
 781                        | 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT
 782                        | 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT
 783                        | 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT);
 784                SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, data, 0xFFFFF);
 785        }
 786}
 787
 788static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
 789{
 790        volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
 791        struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
 792        uint32_t rb_bufsz, tmp;
 793
 794        vcn_v2_0_enable_static_power_gating(adev);
 795
 796        /* enable dynamic power gating mode */
 797        tmp = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
 798        tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
 799        tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
 800        WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, tmp);
 801
 802        if (indirect)
 803                adev->vcn.inst->dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst->dpg_sram_cpu_addr;
 804
 805        /* enable clock gating */
 806        vcn_v2_0_clock_gating_dpg_mode(adev, 0, indirect);
 807
 808        /* enable VCPU clock */
 809        tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
 810        tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
 811        tmp |= UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP_MASK;
 812        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 813                UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
 814
 815        /* disable master interupt */
 816        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 817                UVD, 0, mmUVD_MASTINT_EN), 0, 0, indirect);
 818
 819        /* setup mmUVD_LMI_CTRL */
 820        tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
 821                UVD_LMI_CTRL__REQ_MODE_MASK |
 822                UVD_LMI_CTRL__CRC_RESET_MASK |
 823                UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
 824                UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
 825                UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
 826                (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
 827                0x00100000L);
 828        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 829                UVD, 0, mmUVD_LMI_CTRL), tmp, 0, indirect);
 830
 831        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 832                UVD, 0, mmUVD_MPC_CNTL),
 833                0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
 834
 835        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 836                UVD, 0, mmUVD_MPC_SET_MUXA0),
 837                ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
 838                 (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
 839                 (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
 840                 (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
 841
 842        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 843                UVD, 0, mmUVD_MPC_SET_MUXB0),
 844                ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
 845                 (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
 846                 (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
 847                 (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
 848
 849        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 850                UVD, 0, mmUVD_MPC_SET_MUX),
 851                ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
 852                 (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
 853                 (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
 854
 855        vcn_v2_0_mc_resume_dpg_mode(adev, indirect);
 856
 857        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 858                UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
 859        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 860                UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
 861
 862        /* release VCPU reset to boot */
 863        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 864                UVD, 0, mmUVD_SOFT_RESET), 0, 0, indirect);
 865
 866        /* enable LMI MC and UMC channels */
 867        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 868                UVD, 0, mmUVD_LMI_CTRL2),
 869                0x1F << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT, 0, indirect);
 870
 871        /* enable master interrupt */
 872        WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
 873                UVD, 0, mmUVD_MASTINT_EN),
 874                UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
 875
 876        if (indirect)
 877                psp_update_vcn_sram(adev, 0, adev->vcn.inst->dpg_sram_gpu_addr,
 878                                    (uint32_t)((uintptr_t)adev->vcn.inst->dpg_sram_curr_addr -
 879                                               (uintptr_t)adev->vcn.inst->dpg_sram_cpu_addr));
 880
 881        /* force RBC into idle state */
 882        rb_bufsz = order_base_2(ring->ring_size);
 883        tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
 884        tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
 885        tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
 886        tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
 887        tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
 888        WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp);
 889
 890        /* Stall DPG before WPTR/RPTR reset */
 891        WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
 892                UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
 893                ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
 894        fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
 895
 896        /* set the write pointer delay */
 897        WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0);
 898
 899        /* set the wb address */
 900        WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR,
 901                (upper_32_bits(ring->gpu_addr) >> 2));
 902
 903        /* program the RB_BASE for ring buffer */
 904        WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
 905                lower_32_bits(ring->gpu_addr));
 906        WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
 907                upper_32_bits(ring->gpu_addr));
 908
 909        /* Initialize the ring buffer's read and write pointers */
 910        WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0);
 911
 912        WREG32_SOC15(UVD, 0, mmUVD_SCRATCH2, 0);
 913
 914        ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
 915        WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
 916                lower_32_bits(ring->wptr));
 917
 918        fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
 919        /* Unstall DPG */
 920        WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
 921                0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
 922        return 0;
 923}
 924
 925static int vcn_v2_0_start(struct amdgpu_device *adev)
 926{
 927        volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
 928        struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
 929        uint32_t rb_bufsz, tmp;
 930        uint32_t lmi_swap_cntl;
 931        int i, j, r;
 932
 933        if (adev->pm.dpm_enabled)
 934                amdgpu_dpm_enable_uvd(adev, true);
 935
 936        if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
 937                return vcn_v2_0_start_dpg_mode(adev, adev->vcn.indirect_sram);
 938
 939        vcn_v2_0_disable_static_power_gating(adev);
 940
 941        /* set uvd status busy */
 942        tmp = RREG32_SOC15(UVD, 0, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
 943        WREG32_SOC15(UVD, 0, mmUVD_STATUS, tmp);
 944
 945        /*SW clock gating */
 946        vcn_v2_0_disable_clock_gating(adev);
 947
 948        /* enable VCPU clock */
 949        WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL),
 950                UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
 951
 952        /* disable master interrupt */
 953        WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0,
 954                ~UVD_MASTINT_EN__VCPU_EN_MASK);
 955
 956        /* setup mmUVD_LMI_CTRL */
 957        tmp = RREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL);
 958        WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL, tmp |
 959                UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
 960                UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
 961                UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
 962                UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
 963
 964        /* setup mmUVD_MPC_CNTL */
 965        tmp = RREG32_SOC15(UVD, 0, mmUVD_MPC_CNTL);
 966        tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
 967        tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
 968        WREG32_SOC15(VCN, 0, mmUVD_MPC_CNTL, tmp);
 969
 970        /* setup UVD_MPC_SET_MUXA0 */
 971        WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0,
 972                ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
 973                (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
 974                (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
 975                (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
 976
 977        /* setup UVD_MPC_SET_MUXB0 */
 978        WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0,
 979                ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
 980                (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
 981                (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
 982                (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
 983
 984        /* setup mmUVD_MPC_SET_MUX */
 985        WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX,
 986                ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
 987                (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
 988                (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
 989
 990        vcn_v2_0_mc_resume(adev);
 991
 992        /* release VCPU reset to boot */
 993        WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0,
 994                ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
 995
 996        /* enable LMI MC and UMC channels */
 997        WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0,
 998                ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
 999
1000        tmp = RREG32_SOC15(VCN, 0, mmUVD_SOFT_RESET);
1001        tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
1002        tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
1003        WREG32_SOC15(VCN, 0, mmUVD_SOFT_RESET, tmp);
1004
1005        /* disable byte swapping */
1006        lmi_swap_cntl = 0;
1007#ifdef __BIG_ENDIAN
1008        /* swap (8 in 32) RB and IB */
1009        lmi_swap_cntl = 0xa;
1010#endif
1011        WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
1012
1013        for (i = 0; i < 10; ++i) {
1014                uint32_t status;
1015
1016                for (j = 0; j < 100; ++j) {
1017                        status = RREG32_SOC15(UVD, 0, mmUVD_STATUS);
1018                        if (status & 2)
1019                                break;
1020                        mdelay(10);
1021                }
1022                r = 0;
1023                if (status & 2)
1024                        break;
1025
1026                DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n");
1027                WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
1028                        UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
1029                        ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
1030                mdelay(10);
1031                WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0,
1032                        ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
1033                mdelay(10);
1034                r = -1;
1035        }
1036
1037        if (r) {
1038                DRM_ERROR("VCN decode not responding, giving up!!!\n");
1039                return r;
1040        }
1041
1042        /* enable master interrupt */
1043        WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
1044                UVD_MASTINT_EN__VCPU_EN_MASK,
1045                ~UVD_MASTINT_EN__VCPU_EN_MASK);
1046
1047        /* clear the busy bit of VCN_STATUS */
1048        WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0,
1049                ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
1050
1051        WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_VMID, 0);
1052
1053        /* force RBC into idle state */
1054        rb_bufsz = order_base_2(ring->ring_size);
1055        tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
1056        tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
1057        tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
1058        tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
1059        tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
1060        WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp);
1061
1062        fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
1063        /* program the RB_BASE for ring buffer */
1064        WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
1065                lower_32_bits(ring->gpu_addr));
1066        WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
1067                upper_32_bits(ring->gpu_addr));
1068
1069        /* Initialize the ring buffer's read and write pointers */
1070        WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0);
1071
1072        ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
1073        WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
1074                        lower_32_bits(ring->wptr));
1075        fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
1076
1077        fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
1078        ring = &adev->vcn.inst->ring_enc[0];
1079        WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
1080        WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
1081        WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
1082        WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
1083        WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
1084        fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
1085
1086        fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
1087        ring = &adev->vcn.inst->ring_enc[1];
1088        WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
1089        WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
1090        WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
1091        WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
1092        WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
1093        fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
1094
1095        return 0;
1096}
1097
1098static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev)
1099{
1100        uint32_t tmp;
1101
1102        /* Wait for power status to be 1 */
1103        SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 1,
1104                UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1105
1106        /* wait for read ptr to be equal to write ptr */
1107        tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR);
1108        SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF);
1109
1110        tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
1111        SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF);
1112
1113        tmp = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
1114        SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF);
1115
1116        SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 1,
1117                UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1118
1119        /* disable dynamic power gating mode */
1120        WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0,
1121                        ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
1122
1123        return 0;
1124}
1125
1126static int vcn_v2_0_stop(struct amdgpu_device *adev)
1127{
1128        uint32_t tmp;
1129        int r;
1130
1131        if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
1132                r = vcn_v2_0_stop_dpg_mode(adev);
1133                if (r)
1134                        return r;
1135                goto power_off;
1136        }
1137
1138        /* wait for uvd idle */
1139        r = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
1140        if (r)
1141                return r;
1142
1143        tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
1144                UVD_LMI_STATUS__READ_CLEAN_MASK |
1145                UVD_LMI_STATUS__WRITE_CLEAN_MASK |
1146                UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
1147        r = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp);
1148        if (r)
1149                return r;
1150
1151        /* stall UMC channel */
1152        tmp = RREG32_SOC15(VCN, 0, mmUVD_LMI_CTRL2);
1153        tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
1154        WREG32_SOC15(VCN, 0, mmUVD_LMI_CTRL2, tmp);
1155
1156        tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
1157                UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
1158        r = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp);
1159        if (r)
1160                return r;
1161
1162        /* disable VCPU clock */
1163        WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 0,
1164                ~(UVD_VCPU_CNTL__CLK_EN_MASK));
1165
1166        /* reset LMI UMC */
1167        WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
1168                UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK,
1169                ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
1170
1171        /* reset LMI */
1172        WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
1173                UVD_SOFT_RESET__LMI_SOFT_RESET_MASK,
1174                ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK);
1175
1176        /* reset VCPU */
1177        WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
1178                UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
1179                ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
1180
1181        /* clear status */
1182        WREG32_SOC15(VCN, 0, mmUVD_STATUS, 0);
1183
1184        vcn_v2_0_enable_clock_gating(adev);
1185        vcn_v2_0_enable_static_power_gating(adev);
1186
1187power_off:
1188        if (adev->pm.dpm_enabled)
1189                amdgpu_dpm_enable_uvd(adev, false);
1190
1191        return 0;
1192}
1193
1194static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
1195                                int inst_idx, struct dpg_pause_state *new_state)
1196{
1197        struct amdgpu_ring *ring;
1198        uint32_t reg_data = 0;
1199        int ret_code;
1200
1201        /* pause/unpause if state is changed */
1202        if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
1203                DRM_DEBUG("dpg pause state changed %d -> %d",
1204                        adev->vcn.inst[inst_idx].pause_state.fw_based,  new_state->fw_based);
1205                reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
1206                        (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
1207
1208                if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
1209                        ret_code = SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 0x1,
1210                                UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1211
1212                        if (!ret_code) {
1213                                volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
1214                                /* pause DPG */
1215                                reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
1216                                WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
1217
1218                                /* wait for ACK */
1219                                SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
1220                                           UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
1221                                           UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
1222
1223                                /* Stall DPG before WPTR/RPTR reset */
1224                                WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
1225                                           UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
1226                                           ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
1227                                /* Restore */
1228                                fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
1229                                ring = &adev->vcn.inst->ring_enc[0];
1230                                ring->wptr = 0;
1231                                WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
1232                                WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
1233                                WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
1234                                WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
1235                                WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
1236                                fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
1237
1238                                fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
1239                                ring = &adev->vcn.inst->ring_enc[1];
1240                                ring->wptr = 0;
1241                                WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
1242                                WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
1243                                WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
1244                                WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
1245                                WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
1246                                fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
1247
1248                                fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
1249                                WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
1250                                           RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
1251                                fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
1252                                /* Unstall DPG */
1253                                WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
1254                                           0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
1255
1256                                SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
1257                                           UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
1258                                           UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1259                        }
1260                } else {
1261                        /* unpause dpg, no need to wait */
1262                        reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
1263                        WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
1264                }
1265                adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
1266        }
1267
1268        return 0;
1269}
1270
1271static bool vcn_v2_0_is_idle(void *handle)
1272{
1273        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1274
1275        return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == UVD_STATUS__IDLE);
1276}
1277
1278static int vcn_v2_0_wait_for_idle(void *handle)
1279{
1280        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1281        int ret;
1282
1283        ret = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE,
1284                UVD_STATUS__IDLE);
1285
1286        return ret;
1287}
1288
1289static int vcn_v2_0_set_clockgating_state(void *handle,
1290                                          enum amd_clockgating_state state)
1291{
1292        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1293        bool enable = (state == AMD_CG_STATE_GATE);
1294
1295        if (amdgpu_sriov_vf(adev))
1296                return 0;
1297
1298        if (enable) {
1299                /* wait for STATUS to clear */
1300                if (!vcn_v2_0_is_idle(handle))
1301                        return -EBUSY;
1302                vcn_v2_0_enable_clock_gating(adev);
1303        } else {
1304                /* disable HW gating and enable Sw gating */
1305                vcn_v2_0_disable_clock_gating(adev);
1306        }
1307        return 0;
1308}
1309
1310/**
1311 * vcn_v2_0_dec_ring_get_rptr - get read pointer
1312 *
1313 * @ring: amdgpu_ring pointer
1314 *
1315 * Returns the current hardware read pointer
1316 */
1317static uint64_t vcn_v2_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
1318{
1319        struct amdgpu_device *adev = ring->adev;
1320
1321        return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
1322}
1323
1324/**
1325 * vcn_v2_0_dec_ring_get_wptr - get write pointer
1326 *
1327 * @ring: amdgpu_ring pointer
1328 *
1329 * Returns the current hardware write pointer
1330 */
1331static uint64_t vcn_v2_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
1332{
1333        struct amdgpu_device *adev = ring->adev;
1334
1335        if (ring->use_doorbell)
1336                return adev->wb.wb[ring->wptr_offs];
1337        else
1338                return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR);
1339}
1340
1341/**
1342 * vcn_v2_0_dec_ring_set_wptr - set write pointer
1343 *
1344 * @ring: amdgpu_ring pointer
1345 *
1346 * Commits the write pointer to the hardware
1347 */
1348static void vcn_v2_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
1349{
1350        struct amdgpu_device *adev = ring->adev;
1351
1352        if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
1353                WREG32_SOC15(UVD, 0, mmUVD_SCRATCH2,
1354                        lower_32_bits(ring->wptr) | 0x80000000);
1355
1356        if (ring->use_doorbell) {
1357                adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
1358                WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
1359        } else {
1360                WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
1361        }
1362}
1363
1364/**
1365 * vcn_v2_0_dec_ring_insert_start - insert a start command
1366 *
1367 * @ring: amdgpu_ring pointer
1368 *
1369 * Write a start command to the ring.
1370 */
1371void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring)
1372{
1373        struct amdgpu_device *adev = ring->adev;
1374
1375        amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
1376        amdgpu_ring_write(ring, 0);
1377        amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
1378        amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1));
1379}
1380
1381/**
1382 * vcn_v2_0_dec_ring_insert_end - insert a end command
1383 *
1384 * @ring: amdgpu_ring pointer
1385 *
1386 * Write a end command to the ring.
1387 */
1388void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring)
1389{
1390        struct amdgpu_device *adev = ring->adev;
1391
1392        amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
1393        amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_END << 1));
1394}
1395
1396/**
1397 * vcn_v2_0_dec_ring_insert_nop - insert a nop command
1398 *
1399 * @ring: amdgpu_ring pointer
1400 * @count: the number of NOP packets to insert
1401 *
1402 * Write a nop command to the ring.
1403 */
1404void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
1405{
1406        struct amdgpu_device *adev = ring->adev;
1407        int i;
1408
1409        WARN_ON(ring->wptr % 2 || count % 2);
1410
1411        for (i = 0; i < count / 2; i++) {
1412                amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.nop, 0));
1413                amdgpu_ring_write(ring, 0);
1414        }
1415}
1416
1417/**
1418 * vcn_v2_0_dec_ring_emit_fence - emit an fence & trap command
1419 *
1420 * @ring: amdgpu_ring pointer
1421 * @addr: address
1422 * @seq: sequence number
1423 * @flags: fence related flags
1424 *
1425 * Write a fence and a trap command to the ring.
1426 */
1427void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
1428                                unsigned flags)
1429{
1430        struct amdgpu_device *adev = ring->adev;
1431
1432        WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
1433        amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.context_id, 0));
1434        amdgpu_ring_write(ring, seq);
1435
1436        amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
1437        amdgpu_ring_write(ring, addr & 0xffffffff);
1438
1439        amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
1440        amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
1441
1442        amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
1443        amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_FENCE << 1));
1444
1445        amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
1446        amdgpu_ring_write(ring, 0);
1447
1448        amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
1449        amdgpu_ring_write(ring, 0);
1450
1451        amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
1452
1453        amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_TRAP << 1));
1454}
1455
1456/**
1457 * vcn_v2_0_dec_ring_emit_ib - execute indirect buffer
1458 *
1459 * @ring: amdgpu_ring pointer
1460 * @job: job to retrieve vmid from
1461 * @ib: indirect buffer to execute
1462 * @flags: unused
1463 *
1464 * Write ring commands to execute the indirect buffer
1465 */
1466void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
1467                               struct amdgpu_job *job,
1468                               struct amdgpu_ib *ib,
1469                               uint32_t flags)
1470{
1471        struct amdgpu_device *adev = ring->adev;
1472        unsigned vmid = AMDGPU_JOB_GET_VMID(job);
1473
1474        amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_vmid, 0));
1475        amdgpu_ring_write(ring, vmid);
1476
1477        amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_low, 0));
1478        amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
1479        amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_high, 0));
1480        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
1481        amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_size, 0));
1482        amdgpu_ring_write(ring, ib->length_dw);
1483}
1484
1485void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1486                                uint32_t val, uint32_t mask)
1487{
1488        struct amdgpu_device *adev = ring->adev;
1489
1490        amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
1491        amdgpu_ring_write(ring, reg << 2);
1492
1493        amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
1494        amdgpu_ring_write(ring, val);
1495
1496        amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.gp_scratch8, 0));
1497        amdgpu_ring_write(ring, mask);
1498
1499        amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
1500
1501        amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_REG_READ_COND_WAIT << 1));
1502}
1503
1504void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
1505                                unsigned vmid, uint64_t pd_addr)
1506{
1507        struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
1508        uint32_t data0, data1, mask;
1509
1510        pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1511
1512        /* wait for register write */
1513        data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance;
1514        data1 = lower_32_bits(pd_addr);
1515        mask = 0xffffffff;
1516        vcn_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask);
1517}
1518
1519void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring,
1520                                uint32_t reg, uint32_t val)
1521{
1522        struct amdgpu_device *adev = ring->adev;
1523
1524        amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
1525        amdgpu_ring_write(ring, reg << 2);
1526
1527        amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
1528        amdgpu_ring_write(ring, val);
1529
1530        amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
1531
1532        amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_WRITE_REG << 1));
1533}
1534
1535/**
1536 * vcn_v2_0_enc_ring_get_rptr - get enc read pointer
1537 *
1538 * @ring: amdgpu_ring pointer
1539 *
1540 * Returns the current hardware enc read pointer
1541 */
1542static uint64_t vcn_v2_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
1543{
1544        struct amdgpu_device *adev = ring->adev;
1545
1546        if (ring == &adev->vcn.inst->ring_enc[0])
1547                return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR);
1548        else
1549                return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2);
1550}
1551
1552 /**
1553 * vcn_v2_0_enc_ring_get_wptr - get enc write pointer
1554 *
1555 * @ring: amdgpu_ring pointer
1556 *
1557 * Returns the current hardware enc write pointer
1558 */
1559static uint64_t vcn_v2_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
1560{
1561        struct amdgpu_device *adev = ring->adev;
1562
1563        if (ring == &adev->vcn.inst->ring_enc[0]) {
1564                if (ring->use_doorbell)
1565                        return adev->wb.wb[ring->wptr_offs];
1566                else
1567                        return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR);
1568        } else {
1569                if (ring->use_doorbell)
1570                        return adev->wb.wb[ring->wptr_offs];
1571                else
1572                        return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
1573        }
1574}
1575
1576 /**
1577 * vcn_v2_0_enc_ring_set_wptr - set enc write pointer
1578 *
1579 * @ring: amdgpu_ring pointer
1580 *
1581 * Commits the enc write pointer to the hardware
1582 */
1583static void vcn_v2_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
1584{
1585        struct amdgpu_device *adev = ring->adev;
1586
1587        if (ring == &adev->vcn.inst->ring_enc[0]) {
1588                if (ring->use_doorbell) {
1589                        adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
1590                        WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
1591                } else {
1592                        WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
1593                }
1594        } else {
1595                if (ring->use_doorbell) {
1596                        adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
1597                        WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
1598                } else {
1599                        WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
1600                }
1601        }
1602}
1603
1604/**
1605 * vcn_v2_0_enc_ring_emit_fence - emit an enc fence & trap command
1606 *
1607 * @ring: amdgpu_ring pointer
1608 * @addr: address
1609 * @seq: sequence number
1610 * @flags: fence related flags
1611 *
1612 * Write enc a fence and a trap command to the ring.
1613 */
1614void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
1615                                u64 seq, unsigned flags)
1616{
1617        WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
1618
1619        amdgpu_ring_write(ring, VCN_ENC_CMD_FENCE);
1620        amdgpu_ring_write(ring, addr);
1621        amdgpu_ring_write(ring, upper_32_bits(addr));
1622        amdgpu_ring_write(ring, seq);
1623        amdgpu_ring_write(ring, VCN_ENC_CMD_TRAP);
1624}
1625
1626void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring)
1627{
1628        amdgpu_ring_write(ring, VCN_ENC_CMD_END);
1629}
1630
1631/**
1632 * vcn_v2_0_enc_ring_emit_ib - enc execute indirect buffer
1633 *
1634 * @ring: amdgpu_ring pointer
1635 * @job: job to retrive vmid from
1636 * @ib: indirect buffer to execute
1637 * @flags: unused
1638 *
1639 * Write enc ring commands to execute the indirect buffer
1640 */
1641void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
1642                               struct amdgpu_job *job,
1643                               struct amdgpu_ib *ib,
1644                               uint32_t flags)
1645{
1646        unsigned vmid = AMDGPU_JOB_GET_VMID(job);
1647
1648        amdgpu_ring_write(ring, VCN_ENC_CMD_IB);
1649        amdgpu_ring_write(ring, vmid);
1650        amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
1651        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
1652        amdgpu_ring_write(ring, ib->length_dw);
1653}
1654
1655void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1656                                uint32_t val, uint32_t mask)
1657{
1658        amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT);
1659        amdgpu_ring_write(ring, reg << 2);
1660        amdgpu_ring_write(ring, mask);
1661        amdgpu_ring_write(ring, val);
1662}
1663
1664void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
1665                                unsigned int vmid, uint64_t pd_addr)
1666{
1667        struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
1668
1669        pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1670
1671        /* wait for reg writes */
1672        vcn_v2_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
1673                                        vmid * hub->ctx_addr_distance,
1674                                        lower_32_bits(pd_addr), 0xffffffff);
1675}
1676
1677void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
1678{
1679        amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE);
1680        amdgpu_ring_write(ring, reg << 2);
1681        amdgpu_ring_write(ring, val);
1682}
1683
1684static int vcn_v2_0_set_interrupt_state(struct amdgpu_device *adev,
1685                                        struct amdgpu_irq_src *source,
1686                                        unsigned type,
1687                                        enum amdgpu_interrupt_state state)
1688{
1689        return 0;
1690}
1691
1692static int vcn_v2_0_process_interrupt(struct amdgpu_device *adev,
1693                                      struct amdgpu_irq_src *source,
1694                                      struct amdgpu_iv_entry *entry)
1695{
1696        DRM_DEBUG("IH: VCN TRAP\n");
1697
1698        switch (entry->src_id) {
1699        case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT:
1700                amdgpu_fence_process(&adev->vcn.inst->ring_dec);
1701                break;
1702        case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
1703                amdgpu_fence_process(&adev->vcn.inst->ring_enc[0]);
1704                break;
1705        case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY:
1706                amdgpu_fence_process(&adev->vcn.inst->ring_enc[1]);
1707                break;
1708        default:
1709                DRM_ERROR("Unhandled interrupt: %d %d\n",
1710                          entry->src_id, entry->src_data[0]);
1711                break;
1712        }
1713
1714        return 0;
1715}
1716
1717int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring)
1718{
1719        struct amdgpu_device *adev = ring->adev;
1720        uint32_t tmp = 0;
1721        unsigned i;
1722        int r;
1723
1724        if (amdgpu_sriov_vf(adev))
1725                return 0;
1726
1727        WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD);
1728        r = amdgpu_ring_alloc(ring, 4);
1729        if (r)
1730                return r;
1731        amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
1732        amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1));
1733        amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0));
1734        amdgpu_ring_write(ring, 0xDEADBEEF);
1735        amdgpu_ring_commit(ring);
1736        for (i = 0; i < adev->usec_timeout; i++) {
1737                tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9);
1738                if (tmp == 0xDEADBEEF)
1739                        break;
1740                udelay(1);
1741        }
1742
1743        if (i >= adev->usec_timeout)
1744                r = -ETIMEDOUT;
1745
1746        return r;
1747}
1748
1749
1750static int vcn_v2_0_set_powergating_state(void *handle,
1751                                          enum amd_powergating_state state)
1752{
1753        /* This doesn't actually powergate the VCN block.
1754         * That's done in the dpm code via the SMC.  This
1755         * just re-inits the block as necessary.  The actual
1756         * gating still happens in the dpm code.  We should
1757         * revisit this when there is a cleaner line between
1758         * the smc and the hw blocks
1759         */
1760        int ret;
1761        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1762
1763        if (amdgpu_sriov_vf(adev)) {
1764                adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
1765                return 0;
1766        }
1767
1768        if (state == adev->vcn.cur_state)
1769                return 0;
1770
1771        if (state == AMD_PG_STATE_GATE)
1772                ret = vcn_v2_0_stop(adev);
1773        else
1774                ret = vcn_v2_0_start(adev);
1775
1776        if (!ret)
1777                adev->vcn.cur_state = state;
1778        return ret;
1779}
1780
1781static int vcn_v2_0_start_mmsch(struct amdgpu_device *adev,
1782                                struct amdgpu_mm_table *table)
1783{
1784        uint32_t data = 0, loop;
1785        uint64_t addr = table->gpu_addr;
1786        struct mmsch_v2_0_init_header *header;
1787        uint32_t size;
1788        int i;
1789
1790        header = (struct mmsch_v2_0_init_header *)table->cpu_addr;
1791        size = header->header_size + header->vcn_table_size;
1792
1793        /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr
1794         * of memory descriptor location
1795         */
1796        WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(addr));
1797        WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(addr));
1798
1799        /* 2, update vmid of descriptor */
1800        data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_VMID);
1801        data &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
1802        /* use domain0 for MM scheduler */
1803        data |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
1804        WREG32_SOC15(UVD, 0, mmMMSCH_VF_VMID, data);
1805
1806        /* 3, notify mmsch about the size of this descriptor */
1807        WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_SIZE, size);
1808
1809        /* 4, set resp to zero */
1810        WREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP, 0);
1811
1812        adev->vcn.inst->ring_dec.wptr = 0;
1813        adev->vcn.inst->ring_dec.wptr_old = 0;
1814        vcn_v2_0_dec_ring_set_wptr(&adev->vcn.inst->ring_dec);
1815
1816        for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
1817                adev->vcn.inst->ring_enc[i].wptr = 0;
1818                adev->vcn.inst->ring_enc[i].wptr_old = 0;
1819                vcn_v2_0_enc_ring_set_wptr(&adev->vcn.inst->ring_enc[i]);
1820        }
1821
1822        /* 5, kick off the initialization and wait until
1823         * VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero
1824         */
1825        WREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_HOST, 0x10000001);
1826
1827        data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP);
1828        loop = 1000;
1829        while ((data & 0x10000002) != 0x10000002) {
1830                udelay(10);
1831                data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP);
1832                loop--;
1833                if (!loop)
1834                        break;
1835        }
1836
1837        if (!loop) {
1838                DRM_ERROR("failed to init MMSCH, " \
1839                        "mmMMSCH_VF_MAILBOX_RESP = 0x%08x\n", data);
1840                return -EBUSY;
1841        }
1842
1843        return 0;
1844}
1845
1846static int vcn_v2_0_start_sriov(struct amdgpu_device *adev)
1847{
1848        int r;
1849        uint32_t tmp;
1850        struct amdgpu_ring *ring;
1851        uint32_t offset, size;
1852        uint32_t table_size = 0;
1853        struct mmsch_v2_0_cmd_direct_write direct_wt = { {0} };
1854        struct mmsch_v2_0_cmd_direct_read_modify_write direct_rd_mod_wt = { {0} };
1855        struct mmsch_v2_0_cmd_end end = { {0} };
1856        struct mmsch_v2_0_init_header *header;
1857        uint32_t *init_table = adev->virt.mm_table.cpu_addr;
1858        uint8_t i = 0;
1859
1860        header = (struct mmsch_v2_0_init_header *)init_table;
1861        direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
1862        direct_rd_mod_wt.cmd_header.command_type =
1863                MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
1864        end.cmd_header.command_type = MMSCH_COMMAND__END;
1865
1866        if (header->vcn_table_offset == 0 && header->vcn_table_size == 0) {
1867                header->version = MMSCH_VERSION;
1868                header->header_size = sizeof(struct mmsch_v2_0_init_header) >> 2;
1869
1870                header->vcn_table_offset = header->header_size;
1871
1872                init_table += header->vcn_table_offset;
1873
1874                size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
1875
1876                MMSCH_V2_0_INSERT_DIRECT_RD_MOD_WT(
1877                        SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
1878                        0xFFFFFFFF, 0x00000004);
1879
1880                /* mc resume*/
1881                if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1882                        tmp = AMDGPU_UCODE_ID_VCN;
1883                        MMSCH_V2_0_INSERT_DIRECT_WT(
1884                                SOC15_REG_OFFSET(UVD, i,
1885                                        mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
1886                                adev->firmware.ucode[tmp].tmr_mc_addr_lo);
1887                        MMSCH_V2_0_INSERT_DIRECT_WT(
1888                                SOC15_REG_OFFSET(UVD, i,
1889                                        mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
1890                                adev->firmware.ucode[tmp].tmr_mc_addr_hi);
1891                        offset = 0;
1892                } else {
1893                        MMSCH_V2_0_INSERT_DIRECT_WT(
1894                                SOC15_REG_OFFSET(UVD, i,
1895                                        mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
1896                                lower_32_bits(adev->vcn.inst->gpu_addr));
1897                        MMSCH_V2_0_INSERT_DIRECT_WT(
1898                                SOC15_REG_OFFSET(UVD, i,
1899                                        mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
1900                                upper_32_bits(adev->vcn.inst->gpu_addr));
1901                        offset = size;
1902                }
1903
1904                MMSCH_V2_0_INSERT_DIRECT_WT(
1905                        SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0),
1906                        0);
1907                MMSCH_V2_0_INSERT_DIRECT_WT(
1908                        SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0),
1909                        size);
1910
1911                MMSCH_V2_0_INSERT_DIRECT_WT(
1912                        SOC15_REG_OFFSET(UVD, i,
1913                                mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
1914                        lower_32_bits(adev->vcn.inst->gpu_addr + offset));
1915                MMSCH_V2_0_INSERT_DIRECT_WT(
1916                        SOC15_REG_OFFSET(UVD, i,
1917                                mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
1918                        upper_32_bits(adev->vcn.inst->gpu_addr + offset));
1919                MMSCH_V2_0_INSERT_DIRECT_WT(
1920                        SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET1),
1921                        0);
1922                MMSCH_V2_0_INSERT_DIRECT_WT(
1923                        SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE1),
1924                        AMDGPU_VCN_STACK_SIZE);
1925
1926                MMSCH_V2_0_INSERT_DIRECT_WT(
1927                        SOC15_REG_OFFSET(UVD, i,
1928                                mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
1929                        lower_32_bits(adev->vcn.inst->gpu_addr + offset +
1930                                AMDGPU_VCN_STACK_SIZE));
1931                MMSCH_V2_0_INSERT_DIRECT_WT(
1932                        SOC15_REG_OFFSET(UVD, i,
1933                                mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
1934                        upper_32_bits(adev->vcn.inst->gpu_addr + offset +
1935                                AMDGPU_VCN_STACK_SIZE));
1936                MMSCH_V2_0_INSERT_DIRECT_WT(
1937                        SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET2),
1938                        0);
1939                MMSCH_V2_0_INSERT_DIRECT_WT(
1940                        SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2),
1941                        AMDGPU_VCN_CONTEXT_SIZE);
1942
1943                for (r = 0; r < adev->vcn.num_enc_rings; ++r) {
1944                        ring = &adev->vcn.inst->ring_enc[r];
1945                        ring->wptr = 0;
1946                        MMSCH_V2_0_INSERT_DIRECT_WT(
1947                                SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_LO),
1948                                lower_32_bits(ring->gpu_addr));
1949                        MMSCH_V2_0_INSERT_DIRECT_WT(
1950                                SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_HI),
1951                                upper_32_bits(ring->gpu_addr));
1952                        MMSCH_V2_0_INSERT_DIRECT_WT(
1953                                SOC15_REG_OFFSET(UVD, i, mmUVD_RB_SIZE),
1954                                ring->ring_size / 4);
1955                }
1956
1957                ring = &adev->vcn.inst->ring_dec;
1958                ring->wptr = 0;
1959                MMSCH_V2_0_INSERT_DIRECT_WT(
1960                        SOC15_REG_OFFSET(UVD, i,
1961                                mmUVD_LMI_RBC_RB_64BIT_BAR_LOW),
1962                        lower_32_bits(ring->gpu_addr));
1963                MMSCH_V2_0_INSERT_DIRECT_WT(
1964                        SOC15_REG_OFFSET(UVD, i,
1965                                mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH),
1966                        upper_32_bits(ring->gpu_addr));
1967                /* force RBC into idle state */
1968                tmp = order_base_2(ring->ring_size);
1969                tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, tmp);
1970                tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
1971                tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
1972                tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
1973                tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
1974                MMSCH_V2_0_INSERT_DIRECT_WT(
1975                        SOC15_REG_OFFSET(UVD, i, mmUVD_RBC_RB_CNTL), tmp);
1976
1977                /* add end packet */
1978                tmp = sizeof(struct mmsch_v2_0_cmd_end);
1979                memcpy((void *)init_table, &end, tmp);
1980                table_size += (tmp / 4);
1981                header->vcn_table_size = table_size;
1982
1983        }
1984        return vcn_v2_0_start_mmsch(adev, &adev->virt.mm_table);
1985}
1986
1987static const struct amd_ip_funcs vcn_v2_0_ip_funcs = {
1988        .name = "vcn_v2_0",
1989        .early_init = vcn_v2_0_early_init,
1990        .late_init = NULL,
1991        .sw_init = vcn_v2_0_sw_init,
1992        .sw_fini = vcn_v2_0_sw_fini,
1993        .hw_init = vcn_v2_0_hw_init,
1994        .hw_fini = vcn_v2_0_hw_fini,
1995        .suspend = vcn_v2_0_suspend,
1996        .resume = vcn_v2_0_resume,
1997        .is_idle = vcn_v2_0_is_idle,
1998        .wait_for_idle = vcn_v2_0_wait_for_idle,
1999        .check_soft_reset = NULL,
2000        .pre_soft_reset = NULL,
2001        .soft_reset = NULL,
2002        .post_soft_reset = NULL,
2003        .set_clockgating_state = vcn_v2_0_set_clockgating_state,
2004        .set_powergating_state = vcn_v2_0_set_powergating_state,
2005};
2006
2007static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = {
2008        .type = AMDGPU_RING_TYPE_VCN_DEC,
2009        .align_mask = 0xf,
2010        .vmhub = AMDGPU_MMHUB_0,
2011        .get_rptr = vcn_v2_0_dec_ring_get_rptr,
2012        .get_wptr = vcn_v2_0_dec_ring_get_wptr,
2013        .set_wptr = vcn_v2_0_dec_ring_set_wptr,
2014        .emit_frame_size =
2015                SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
2016                SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
2017                8 + /* vcn_v2_0_dec_ring_emit_vm_flush */
2018                14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */
2019                6,
2020        .emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */
2021        .emit_ib = vcn_v2_0_dec_ring_emit_ib,
2022        .emit_fence = vcn_v2_0_dec_ring_emit_fence,
2023        .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush,
2024        .test_ring = vcn_v2_0_dec_ring_test_ring,
2025        .test_ib = amdgpu_vcn_dec_ring_test_ib,
2026        .insert_nop = vcn_v2_0_dec_ring_insert_nop,
2027        .insert_start = vcn_v2_0_dec_ring_insert_start,
2028        .insert_end = vcn_v2_0_dec_ring_insert_end,
2029        .pad_ib = amdgpu_ring_generic_pad_ib,
2030        .begin_use = amdgpu_vcn_ring_begin_use,
2031        .end_use = amdgpu_vcn_ring_end_use,
2032        .emit_wreg = vcn_v2_0_dec_ring_emit_wreg,
2033        .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,
2034        .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
2035};
2036
2037static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = {
2038        .type = AMDGPU_RING_TYPE_VCN_ENC,
2039        .align_mask = 0x3f,
2040        .nop = VCN_ENC_CMD_NO_OP,
2041        .vmhub = AMDGPU_MMHUB_0,
2042        .get_rptr = vcn_v2_0_enc_ring_get_rptr,
2043        .get_wptr = vcn_v2_0_enc_ring_get_wptr,
2044        .set_wptr = vcn_v2_0_enc_ring_set_wptr,
2045        .emit_frame_size =
2046                SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
2047                SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
2048                4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
2049                5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
2050                1, /* vcn_v2_0_enc_ring_insert_end */
2051        .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
2052        .emit_ib = vcn_v2_0_enc_ring_emit_ib,
2053        .emit_fence = vcn_v2_0_enc_ring_emit_fence,
2054        .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
2055        .test_ring = amdgpu_vcn_enc_ring_test_ring,
2056        .test_ib = amdgpu_vcn_enc_ring_test_ib,
2057        .insert_nop = amdgpu_ring_insert_nop,
2058        .insert_end = vcn_v2_0_enc_ring_insert_end,
2059        .pad_ib = amdgpu_ring_generic_pad_ib,
2060        .begin_use = amdgpu_vcn_ring_begin_use,
2061        .end_use = amdgpu_vcn_ring_end_use,
2062        .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
2063        .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
2064        .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
2065};
2066
2067static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev)
2068{
2069        adev->vcn.inst->ring_dec.funcs = &vcn_v2_0_dec_ring_vm_funcs;
2070        DRM_INFO("VCN decode is enabled in VM mode\n");
2071}
2072
2073static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev)
2074{
2075        int i;
2076
2077        for (i = 0; i < adev->vcn.num_enc_rings; ++i)
2078                adev->vcn.inst->ring_enc[i].funcs = &vcn_v2_0_enc_ring_vm_funcs;
2079
2080        DRM_INFO("VCN encode is enabled in VM mode\n");
2081}
2082
2083static const struct amdgpu_irq_src_funcs vcn_v2_0_irq_funcs = {
2084        .set = vcn_v2_0_set_interrupt_state,
2085        .process = vcn_v2_0_process_interrupt,
2086};
2087
2088static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev)
2089{
2090        adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 1;
2091        adev->vcn.inst->irq.funcs = &vcn_v2_0_irq_funcs;
2092}
2093
2094const struct amdgpu_ip_block_version vcn_v2_0_ip_block =
2095{
2096                .type = AMD_IP_BLOCK_TYPE_VCN,
2097                .major = 2,
2098                .minor = 0,
2099                .rev = 0,
2100                .funcs = &vcn_v2_0_ip_funcs,
2101};
2102