linux/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the
   7 * "Software"), to deal in the Software without restriction, including
   8 * without limitation the rights to use, copy, modify, merge, publish,
   9 * distribute, sub license, and/or sell copies of the Software, and to
  10 * permit persons to whom the Software is furnished to do so, subject to
  11 * the following conditions:
  12 *
  13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20 *
  21 * The above copyright notice and this permission notice (including the
  22 * next paragraph) shall be included in all copies or substantial portions
  23 * of the Software.
  24 *
  25 * Authors: Christian König <christian.koenig@amd.com>
  26 */
  27
  28#include <linux/firmware.h>
  29
  30#include "amdgpu.h"
  31#include "amdgpu_vce.h"
  32#include "vid.h"
  33#include "vce/vce_3_0_d.h"
  34#include "vce/vce_3_0_sh_mask.h"
  35#include "oss/oss_3_0_d.h"
  36#include "oss/oss_3_0_sh_mask.h"
  37#include "gca/gfx_8_0_d.h"
  38#include "smu/smu_7_1_2_d.h"
  39#include "smu/smu_7_1_2_sh_mask.h"
  40#include "gca/gfx_8_0_sh_mask.h"
  41#include "ivsrcid/ivsrcid_vislands30.h"
  42
  43
  44#define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT     0x04
  45#define GRBM_GFX_INDEX__VCE_INSTANCE_MASK       0x10
  46#define GRBM_GFX_INDEX__VCE_ALL_PIPE            0x07
  47
  48#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0 0x8616
  49#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1 0x8617
  50#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2 0x8618
  51#define mmGRBM_GFX_INDEX_DEFAULT 0xE0000000
  52
  53#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK   0x02
  54
  55#define VCE_V3_0_FW_SIZE        (384 * 1024)
  56#define VCE_V3_0_STACK_SIZE     (64 * 1024)
  57#define VCE_V3_0_DATA_SIZE      ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
  58
  59#define FW_52_8_3       ((52 << 24) | (8 << 16) | (3 << 8))
  60
  61#define GET_VCE_INSTANCE(i)  ((i) << GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT \
  62                                        | GRBM_GFX_INDEX__VCE_ALL_PIPE)
  63
  64static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
  65static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
  66static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
  67static int vce_v3_0_wait_for_idle(void *handle);
  68static int vce_v3_0_set_clockgating_state(void *handle,
  69                                          enum amd_clockgating_state state);
  70/**
  71 * vce_v3_0_ring_get_rptr - get read pointer
  72 *
  73 * @ring: amdgpu_ring pointer
  74 *
  75 * Returns the current hardware read pointer
  76 */
  77static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
  78{
  79        struct amdgpu_device *adev = ring->adev;
  80        u32 v;
  81
  82        mutex_lock(&adev->grbm_idx_mutex);
  83        if (adev->vce.harvest_config == 0 ||
  84                adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
  85                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
  86        else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
  87                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
  88
  89        if (ring->me == 0)
  90                v = RREG32(mmVCE_RB_RPTR);
  91        else if (ring->me == 1)
  92                v = RREG32(mmVCE_RB_RPTR2);
  93        else
  94                v = RREG32(mmVCE_RB_RPTR3);
  95
  96        WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
  97        mutex_unlock(&adev->grbm_idx_mutex);
  98
  99        return v;
 100}
 101
 102/**
 103 * vce_v3_0_ring_get_wptr - get write pointer
 104 *
 105 * @ring: amdgpu_ring pointer
 106 *
 107 * Returns the current hardware write pointer
 108 */
 109static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
 110{
 111        struct amdgpu_device *adev = ring->adev;
 112        u32 v;
 113
 114        mutex_lock(&adev->grbm_idx_mutex);
 115        if (adev->vce.harvest_config == 0 ||
 116                adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
 117                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
 118        else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
 119                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
 120
 121        if (ring->me == 0)
 122                v = RREG32(mmVCE_RB_WPTR);
 123        else if (ring->me == 1)
 124                v = RREG32(mmVCE_RB_WPTR2);
 125        else
 126                v = RREG32(mmVCE_RB_WPTR3);
 127
 128        WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
 129        mutex_unlock(&adev->grbm_idx_mutex);
 130
 131        return v;
 132}
 133
 134/**
 135 * vce_v3_0_ring_set_wptr - set write pointer
 136 *
 137 * @ring: amdgpu_ring pointer
 138 *
 139 * Commits the write pointer to the hardware
 140 */
 141static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
 142{
 143        struct amdgpu_device *adev = ring->adev;
 144
 145        mutex_lock(&adev->grbm_idx_mutex);
 146        if (adev->vce.harvest_config == 0 ||
 147                adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
 148                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
 149        else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
 150                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
 151
 152        if (ring->me == 0)
 153                WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
 154        else if (ring->me == 1)
 155                WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
 156        else
 157                WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
 158
 159        WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
 160        mutex_unlock(&adev->grbm_idx_mutex);
 161}
 162
 163static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
 164{
 165        WREG32_FIELD(VCE_RB_ARB_CTRL, VCE_CGTT_OVERRIDE, override ? 1 : 0);
 166}
 167
 168static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
 169                                             bool gated)
 170{
 171        u32 data;
 172
 173        /* Set Override to disable Clock Gating */
 174        vce_v3_0_override_vce_clock_gating(adev, true);
 175
 176        /* This function enables MGCG which is controlled by firmware.
 177           With the clocks in the gated state the core is still
 178           accessible but the firmware will throttle the clocks on the
 179           fly as necessary.
 180        */
 181        if (!gated) {
 182                data = RREG32(mmVCE_CLOCK_GATING_B);
 183                data |= 0x1ff;
 184                data &= ~0xef0000;
 185                WREG32(mmVCE_CLOCK_GATING_B, data);
 186
 187                data = RREG32(mmVCE_UENC_CLOCK_GATING);
 188                data |= 0x3ff000;
 189                data &= ~0xffc00000;
 190                WREG32(mmVCE_UENC_CLOCK_GATING, data);
 191
 192                data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
 193                data |= 0x2;
 194                data &= ~0x00010000;
 195                WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
 196
 197                data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
 198                data |= 0x37f;
 199                WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
 200
 201                data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
 202                data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
 203                        VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
 204                        VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
 205                        0x8;
 206                WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
 207        } else {
 208                data = RREG32(mmVCE_CLOCK_GATING_B);
 209                data &= ~0x80010;
 210                data |= 0xe70008;
 211                WREG32(mmVCE_CLOCK_GATING_B, data);
 212
 213                data = RREG32(mmVCE_UENC_CLOCK_GATING);
 214                data |= 0xffc00000;
 215                WREG32(mmVCE_UENC_CLOCK_GATING, data);
 216
 217                data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
 218                data |= 0x10000;
 219                WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
 220
 221                data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
 222                data &= ~0x3ff;
 223                WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
 224
 225                data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
 226                data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
 227                          VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
 228                          VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
 229                          0x8);
 230                WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
 231        }
 232        vce_v3_0_override_vce_clock_gating(adev, false);
 233}
 234
 235static int vce_v3_0_firmware_loaded(struct amdgpu_device *adev)
 236{
 237        int i, j;
 238
 239        for (i = 0; i < 10; ++i) {
 240                for (j = 0; j < 100; ++j) {
 241                        uint32_t status = RREG32(mmVCE_STATUS);
 242
 243                        if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
 244                                return 0;
 245                        mdelay(10);
 246                }
 247
 248                DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
 249                WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
 250                mdelay(10);
 251                WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
 252                mdelay(10);
 253        }
 254
 255        return -ETIMEDOUT;
 256}
 257
 258/**
 259 * vce_v3_0_start - start VCE block
 260 *
 261 * @adev: amdgpu_device pointer
 262 *
 263 * Setup and start the VCE block
 264 */
 265static int vce_v3_0_start(struct amdgpu_device *adev)
 266{
 267        struct amdgpu_ring *ring;
 268        int idx, r;
 269
 270        mutex_lock(&adev->grbm_idx_mutex);
 271        for (idx = 0; idx < 2; ++idx) {
 272                if (adev->vce.harvest_config & (1 << idx))
 273                        continue;
 274
 275                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
 276
 277                /* Program instance 0 reg space for two instances or instance 0 case
 278                program instance 1 reg space for only instance 1 available case */
 279                if (idx != 1 || adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) {
 280                        ring = &adev->vce.ring[0];
 281                        WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
 282                        WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
 283                        WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
 284                        WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
 285                        WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
 286
 287                        ring = &adev->vce.ring[1];
 288                        WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
 289                        WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
 290                        WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
 291                        WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
 292                        WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
 293
 294                        ring = &adev->vce.ring[2];
 295                        WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr));
 296                        WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
 297                        WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr);
 298                        WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr));
 299                        WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4);
 300                }
 301
 302                vce_v3_0_mc_resume(adev, idx);
 303                WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1);
 304
 305                if (adev->asic_type >= CHIP_STONEY)
 306                        WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
 307                else
 308                        WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 1);
 309
 310                WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
 311                mdelay(100);
 312
 313                r = vce_v3_0_firmware_loaded(adev);
 314
 315                /* clear BUSY flag */
 316                WREG32_FIELD(VCE_STATUS, JOB_BUSY, 0);
 317
 318                if (r) {
 319                        DRM_ERROR("VCE not responding, giving up!!!\n");
 320                        mutex_unlock(&adev->grbm_idx_mutex);
 321                        return r;
 322                }
 323        }
 324
 325        WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
 326        mutex_unlock(&adev->grbm_idx_mutex);
 327
 328        return 0;
 329}
 330
 331static int vce_v3_0_stop(struct amdgpu_device *adev)
 332{
 333        int idx;
 334
 335        mutex_lock(&adev->grbm_idx_mutex);
 336        for (idx = 0; idx < 2; ++idx) {
 337                if (adev->vce.harvest_config & (1 << idx))
 338                        continue;
 339
 340                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
 341
 342                if (adev->asic_type >= CHIP_STONEY)
 343                        WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001);
 344                else
 345                        WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 0);
 346
 347                /* hold on ECPU */
 348                WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
 349
 350                /* clear VCE STATUS */
 351                WREG32(mmVCE_STATUS, 0);
 352        }
 353
 354        WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
 355        mutex_unlock(&adev->grbm_idx_mutex);
 356
 357        return 0;
 358}
 359
 360#define ixVCE_HARVEST_FUSE_MACRO__ADDRESS     0xC0014074
 361#define VCE_HARVEST_FUSE_MACRO__SHIFT       27
 362#define VCE_HARVEST_FUSE_MACRO__MASK        0x18000000
 363
 364static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
 365{
 366        u32 tmp;
 367
 368        if ((adev->asic_type == CHIP_FIJI) ||
 369            (adev->asic_type == CHIP_STONEY))
 370                return AMDGPU_VCE_HARVEST_VCE1;
 371
 372        if (adev->flags & AMD_IS_APU)
 373                tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) &
 374                       VCE_HARVEST_FUSE_MACRO__MASK) >>
 375                        VCE_HARVEST_FUSE_MACRO__SHIFT;
 376        else
 377                tmp = (RREG32_SMC(ixCC_HARVEST_FUSES) &
 378                       CC_HARVEST_FUSES__VCE_DISABLE_MASK) >>
 379                        CC_HARVEST_FUSES__VCE_DISABLE__SHIFT;
 380
 381        switch (tmp) {
 382        case 1:
 383                return AMDGPU_VCE_HARVEST_VCE0;
 384        case 2:
 385                return AMDGPU_VCE_HARVEST_VCE1;
 386        case 3:
 387                return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1;
 388        default:
 389                if ((adev->asic_type == CHIP_POLARIS10) ||
 390                    (adev->asic_type == CHIP_POLARIS11) ||
 391                    (adev->asic_type == CHIP_POLARIS12) ||
 392                    (adev->asic_type == CHIP_VEGAM))
 393                        return AMDGPU_VCE_HARVEST_VCE1;
 394
 395                return 0;
 396        }
 397}
 398
 399static int vce_v3_0_early_init(void *handle)
 400{
 401        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 402
 403        adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
 404
 405        if ((adev->vce.harvest_config &
 406             (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) ==
 407            (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
 408                return -ENOENT;
 409
 410        adev->vce.num_rings = 3;
 411
 412        vce_v3_0_set_ring_funcs(adev);
 413        vce_v3_0_set_irq_funcs(adev);
 414
 415        return 0;
 416}
 417
 418static int vce_v3_0_sw_init(void *handle)
 419{
 420        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 421        struct amdgpu_ring *ring;
 422        int r, i;
 423
 424        /* VCE */
 425        r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_VCE_TRAP, &adev->vce.irq);
 426        if (r)
 427                return r;
 428
 429        r = amdgpu_vce_sw_init(adev, VCE_V3_0_FW_SIZE +
 430                (VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE) * 2);
 431        if (r)
 432                return r;
 433
 434        /* 52.8.3 required for 3 ring support */
 435        if (adev->vce.fw_version < FW_52_8_3)
 436                adev->vce.num_rings = 2;
 437
 438        r = amdgpu_vce_resume(adev);
 439        if (r)
 440                return r;
 441
 442        for (i = 0; i < adev->vce.num_rings; i++) {
 443                enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
 444
 445                ring = &adev->vce.ring[i];
 446                sprintf(ring->name, "vce%d", i);
 447                r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
 448                                     hw_prio, NULL);
 449                if (r)
 450                        return r;
 451        }
 452
 453        r = amdgpu_vce_entity_init(adev);
 454
 455        return r;
 456}
 457
 458static int vce_v3_0_sw_fini(void *handle)
 459{
 460        int r;
 461        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 462
 463        r = amdgpu_vce_suspend(adev);
 464        if (r)
 465                return r;
 466
 467        return amdgpu_vce_sw_fini(adev);
 468}
 469
 470static int vce_v3_0_hw_init(void *handle)
 471{
 472        int r, i;
 473        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 474
 475        vce_v3_0_override_vce_clock_gating(adev, true);
 476
 477        amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
 478
 479        for (i = 0; i < adev->vce.num_rings; i++) {
 480                r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
 481                if (r)
 482                        return r;
 483        }
 484
 485        DRM_INFO("VCE initialized successfully.\n");
 486
 487        return 0;
 488}
 489
 490static int vce_v3_0_hw_fini(void *handle)
 491{
 492        int r;
 493        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 494
 495        cancel_delayed_work_sync(&adev->vce.idle_work);
 496
 497        r = vce_v3_0_wait_for_idle(handle);
 498        if (r)
 499                return r;
 500
 501        vce_v3_0_stop(adev);
 502        return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE);
 503}
 504
 505static int vce_v3_0_suspend(void *handle)
 506{
 507        int r;
 508        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 509
 510        /*
 511         * Proper cleanups before halting the HW engine:
 512         *   - cancel the delayed idle work
 513         *   - enable powergating
 514         *   - enable clockgating
 515         *   - disable dpm
 516         *
 517         * TODO: to align with the VCN implementation, move the
 518         * jobs for clockgating/powergating/dpm setting to
 519         * ->set_powergating_state().
 520         */
 521        cancel_delayed_work_sync(&adev->vce.idle_work);
 522
 523        if (adev->pm.dpm_enabled) {
 524                amdgpu_dpm_enable_vce(adev, false);
 525        } else {
 526                amdgpu_asic_set_vce_clocks(adev, 0, 0);
 527                amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
 528                                                       AMD_PG_STATE_GATE);
 529                amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
 530                                                       AMD_CG_STATE_GATE);
 531        }
 532
 533        r = vce_v3_0_hw_fini(adev);
 534        if (r)
 535                return r;
 536
 537        return amdgpu_vce_suspend(adev);
 538}
 539
 540static int vce_v3_0_resume(void *handle)
 541{
 542        int r;
 543        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 544
 545        r = amdgpu_vce_resume(adev);
 546        if (r)
 547                return r;
 548
 549        return vce_v3_0_hw_init(adev);
 550}
 551
 552static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
 553{
 554        uint32_t offset, size;
 555
 556        WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
 557        WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
 558        WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
 559        WREG32(mmVCE_CLOCK_GATING_B, 0x1FF);
 560
 561        WREG32(mmVCE_LMI_CTRL, 0x00398000);
 562        WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
 563        WREG32(mmVCE_LMI_SWAP_CNTL, 0);
 564        WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
 565        WREG32(mmVCE_LMI_VM_CTRL, 0);
 566        WREG32_OR(mmVCE_VCPU_CNTL, 0x00100000);
 567
 568        if (adev->asic_type >= CHIP_STONEY) {
 569                WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
 570                WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
 571                WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
 572        } else
 573                WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
 574        offset = AMDGPU_VCE_FIRMWARE_OFFSET;
 575        size = VCE_V3_0_FW_SIZE;
 576        WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
 577        WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
 578
 579        if (idx == 0) {
 580                offset += size;
 581                size = VCE_V3_0_STACK_SIZE;
 582                WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
 583                WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
 584                offset += size;
 585                size = VCE_V3_0_DATA_SIZE;
 586                WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
 587                WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
 588        } else {
 589                offset += size + VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE;
 590                size = VCE_V3_0_STACK_SIZE;
 591                WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0xfffffff);
 592                WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
 593                offset += size;
 594                size = VCE_V3_0_DATA_SIZE;
 595                WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0xfffffff);
 596                WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
 597        }
 598
 599        WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
 600        WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
 601}
 602
 603static bool vce_v3_0_is_idle(void *handle)
 604{
 605        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 606        u32 mask = 0;
 607
 608        mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
 609        mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
 610
 611        return !(RREG32(mmSRBM_STATUS2) & mask);
 612}
 613
 614static int vce_v3_0_wait_for_idle(void *handle)
 615{
 616        unsigned i;
 617        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 618
 619        for (i = 0; i < adev->usec_timeout; i++)
 620                if (vce_v3_0_is_idle(handle))
 621                        return 0;
 622
 623        return -ETIMEDOUT;
 624}
 625
 626#define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
 627#define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
 628#define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
 629#define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
 630                                      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
 631
 632static bool vce_v3_0_check_soft_reset(void *handle)
 633{
 634        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 635        u32 srbm_soft_reset = 0;
 636
 637        /* According to VCE team , we should use VCE_STATUS instead
 638         * SRBM_STATUS.VCE_BUSY bit for busy status checking.
 639         * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
 640         * instance's registers are accessed
 641         * (0 for 1st instance, 10 for 2nd instance).
 642         *
 643         *VCE_STATUS
 644         *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
 645         *|----+----+-----------+----+----+----+----------+---------+----|
 646         *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
 647         *
 648         * VCE team suggest use bit 3--bit 6 for busy status check
 649         */
 650        mutex_lock(&adev->grbm_idx_mutex);
 651        WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
 652        if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
 653                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
 654                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
 655        }
 656        WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
 657        if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
 658                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
 659                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
 660        }
 661        WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
 662        mutex_unlock(&adev->grbm_idx_mutex);
 663
 664        if (srbm_soft_reset) {
 665                adev->vce.srbm_soft_reset = srbm_soft_reset;
 666                return true;
 667        } else {
 668                adev->vce.srbm_soft_reset = 0;
 669                return false;
 670        }
 671}
 672
 673static int vce_v3_0_soft_reset(void *handle)
 674{
 675        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 676        u32 srbm_soft_reset;
 677
 678        if (!adev->vce.srbm_soft_reset)
 679                return 0;
 680        srbm_soft_reset = adev->vce.srbm_soft_reset;
 681
 682        if (srbm_soft_reset) {
 683                u32 tmp;
 684
 685                tmp = RREG32(mmSRBM_SOFT_RESET);
 686                tmp |= srbm_soft_reset;
 687                dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
 688                WREG32(mmSRBM_SOFT_RESET, tmp);
 689                tmp = RREG32(mmSRBM_SOFT_RESET);
 690
 691                udelay(50);
 692
 693                tmp &= ~srbm_soft_reset;
 694                WREG32(mmSRBM_SOFT_RESET, tmp);
 695                tmp = RREG32(mmSRBM_SOFT_RESET);
 696
 697                /* Wait a little for things to settle down */
 698                udelay(50);
 699        }
 700
 701        return 0;
 702}
 703
 704static int vce_v3_0_pre_soft_reset(void *handle)
 705{
 706        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 707
 708        if (!adev->vce.srbm_soft_reset)
 709                return 0;
 710
 711        mdelay(5);
 712
 713        return vce_v3_0_suspend(adev);
 714}
 715
 716
 717static int vce_v3_0_post_soft_reset(void *handle)
 718{
 719        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 720
 721        if (!adev->vce.srbm_soft_reset)
 722                return 0;
 723
 724        mdelay(5);
 725
 726        return vce_v3_0_resume(adev);
 727}
 728
 729static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
 730                                        struct amdgpu_irq_src *source,
 731                                        unsigned type,
 732                                        enum amdgpu_interrupt_state state)
 733{
 734        uint32_t val = 0;
 735
 736        if (state == AMDGPU_IRQ_STATE_ENABLE)
 737                val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
 738
 739        WREG32_P(mmVCE_SYS_INT_EN, val, ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
 740        return 0;
 741}
 742
 743static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
 744                                      struct amdgpu_irq_src *source,
 745                                      struct amdgpu_iv_entry *entry)
 746{
 747        DRM_DEBUG("IH: VCE\n");
 748
 749        WREG32_FIELD(VCE_SYS_INT_STATUS, VCE_SYS_INT_TRAP_INTERRUPT_INT, 1);
 750
 751        switch (entry->src_data[0]) {
 752        case 0:
 753        case 1:
 754        case 2:
 755                amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
 756                break;
 757        default:
 758                DRM_ERROR("Unhandled interrupt: %d %d\n",
 759                          entry->src_id, entry->src_data[0]);
 760                break;
 761        }
 762
 763        return 0;
 764}
 765
 766static int vce_v3_0_set_clockgating_state(void *handle,
 767                                          enum amd_clockgating_state state)
 768{
 769        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 770        bool enable = (state == AMD_CG_STATE_GATE);
 771        int i;
 772
 773        if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
 774                return 0;
 775
 776        mutex_lock(&adev->grbm_idx_mutex);
 777        for (i = 0; i < 2; i++) {
 778                /* Program VCE Instance 0 or 1 if not harvested */
 779                if (adev->vce.harvest_config & (1 << i))
 780                        continue;
 781
 782                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(i));
 783
 784                if (!enable) {
 785                        /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
 786                        uint32_t data = RREG32(mmVCE_CLOCK_GATING_A);
 787                        data &= ~(0xf | 0xff0);
 788                        data |= ((0x0 << 0) | (0x04 << 4));
 789                        WREG32(mmVCE_CLOCK_GATING_A, data);
 790
 791                        /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
 792                        data = RREG32(mmVCE_UENC_CLOCK_GATING);
 793                        data &= ~(0xf | 0xff0);
 794                        data |= ((0x0 << 0) | (0x04 << 4));
 795                        WREG32(mmVCE_UENC_CLOCK_GATING, data);
 796                }
 797
 798                vce_v3_0_set_vce_sw_clock_gating(adev, enable);
 799        }
 800
 801        WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
 802        mutex_unlock(&adev->grbm_idx_mutex);
 803
 804        return 0;
 805}
 806
 807static int vce_v3_0_set_powergating_state(void *handle,
 808                                          enum amd_powergating_state state)
 809{
 810        /* This doesn't actually powergate the VCE block.
 811         * That's done in the dpm code via the SMC.  This
 812         * just re-inits the block as necessary.  The actual
 813         * gating still happens in the dpm code.  We should
 814         * revisit this when there is a cleaner line between
 815         * the smc and the hw blocks
 816         */
 817        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 818        int ret = 0;
 819
 820        if (state == AMD_PG_STATE_GATE) {
 821                ret = vce_v3_0_stop(adev);
 822                if (ret)
 823                        goto out;
 824        } else {
 825                ret = vce_v3_0_start(adev);
 826                if (ret)
 827                        goto out;
 828        }
 829
 830out:
 831        return ret;
 832}
 833
 834static void vce_v3_0_get_clockgating_state(void *handle, u32 *flags)
 835{
 836        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 837        int data;
 838
 839        mutex_lock(&adev->pm.mutex);
 840
 841        if (adev->flags & AMD_IS_APU)
 842                data = RREG32_SMC(ixCURRENT_PG_STATUS_APU);
 843        else
 844                data = RREG32_SMC(ixCURRENT_PG_STATUS);
 845
 846        if (data & CURRENT_PG_STATUS__VCE_PG_STATUS_MASK) {
 847                DRM_INFO("Cannot get clockgating state when VCE is powergated.\n");
 848                goto out;
 849        }
 850
 851        WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
 852
 853        /* AMD_CG_SUPPORT_VCE_MGCG */
 854        data = RREG32(mmVCE_CLOCK_GATING_A);
 855        if (data & (0x04 << 4))
 856                *flags |= AMD_CG_SUPPORT_VCE_MGCG;
 857
 858out:
 859        mutex_unlock(&adev->pm.mutex);
 860}
 861
 862static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
 863                                  struct amdgpu_job *job,
 864                                  struct amdgpu_ib *ib,
 865                                  uint32_t flags)
 866{
 867        unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 868
 869        amdgpu_ring_write(ring, VCE_CMD_IB_VM);
 870        amdgpu_ring_write(ring, vmid);
 871        amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
 872        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
 873        amdgpu_ring_write(ring, ib->length_dw);
 874}
 875
 876static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring,
 877                                   unsigned int vmid, uint64_t pd_addr)
 878{
 879        amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB);
 880        amdgpu_ring_write(ring, vmid);
 881        amdgpu_ring_write(ring, pd_addr >> 12);
 882
 883        amdgpu_ring_write(ring, VCE_CMD_FLUSH_TLB);
 884        amdgpu_ring_write(ring, vmid);
 885        amdgpu_ring_write(ring, VCE_CMD_END);
 886}
 887
 888static void vce_v3_0_emit_pipeline_sync(struct amdgpu_ring *ring)
 889{
 890        uint32_t seq = ring->fence_drv.sync_seq;
 891        uint64_t addr = ring->fence_drv.gpu_addr;
 892
 893        amdgpu_ring_write(ring, VCE_CMD_WAIT_GE);
 894        amdgpu_ring_write(ring, lower_32_bits(addr));
 895        amdgpu_ring_write(ring, upper_32_bits(addr));
 896        amdgpu_ring_write(ring, seq);
 897}
 898
 899static const struct amd_ip_funcs vce_v3_0_ip_funcs = {
 900        .name = "vce_v3_0",
 901        .early_init = vce_v3_0_early_init,
 902        .late_init = NULL,
 903        .sw_init = vce_v3_0_sw_init,
 904        .sw_fini = vce_v3_0_sw_fini,
 905        .hw_init = vce_v3_0_hw_init,
 906        .hw_fini = vce_v3_0_hw_fini,
 907        .suspend = vce_v3_0_suspend,
 908        .resume = vce_v3_0_resume,
 909        .is_idle = vce_v3_0_is_idle,
 910        .wait_for_idle = vce_v3_0_wait_for_idle,
 911        .check_soft_reset = vce_v3_0_check_soft_reset,
 912        .pre_soft_reset = vce_v3_0_pre_soft_reset,
 913        .soft_reset = vce_v3_0_soft_reset,
 914        .post_soft_reset = vce_v3_0_post_soft_reset,
 915        .set_clockgating_state = vce_v3_0_set_clockgating_state,
 916        .set_powergating_state = vce_v3_0_set_powergating_state,
 917        .get_clockgating_state = vce_v3_0_get_clockgating_state,
 918};
 919
 920static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
 921        .type = AMDGPU_RING_TYPE_VCE,
 922        .align_mask = 0xf,
 923        .nop = VCE_CMD_NO_OP,
 924        .support_64bit_ptrs = false,
 925        .no_user_fence = true,
 926        .get_rptr = vce_v3_0_ring_get_rptr,
 927        .get_wptr = vce_v3_0_ring_get_wptr,
 928        .set_wptr = vce_v3_0_ring_set_wptr,
 929        .parse_cs = amdgpu_vce_ring_parse_cs,
 930        .emit_frame_size =
 931                4 + /* vce_v3_0_emit_pipeline_sync */
 932                6, /* amdgpu_vce_ring_emit_fence x1 no user fence */
 933        .emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
 934        .emit_ib = amdgpu_vce_ring_emit_ib,
 935        .emit_fence = amdgpu_vce_ring_emit_fence,
 936        .test_ring = amdgpu_vce_ring_test_ring,
 937        .test_ib = amdgpu_vce_ring_test_ib,
 938        .insert_nop = amdgpu_ring_insert_nop,
 939        .pad_ib = amdgpu_ring_generic_pad_ib,
 940        .begin_use = amdgpu_vce_ring_begin_use,
 941        .end_use = amdgpu_vce_ring_end_use,
 942};
 943
 944static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
 945        .type = AMDGPU_RING_TYPE_VCE,
 946        .align_mask = 0xf,
 947        .nop = VCE_CMD_NO_OP,
 948        .support_64bit_ptrs = false,
 949        .no_user_fence = true,
 950        .get_rptr = vce_v3_0_ring_get_rptr,
 951        .get_wptr = vce_v3_0_ring_get_wptr,
 952        .set_wptr = vce_v3_0_ring_set_wptr,
 953        .parse_cs = amdgpu_vce_ring_parse_cs_vm,
 954        .emit_frame_size =
 955                6 + /* vce_v3_0_emit_vm_flush */
 956                4 + /* vce_v3_0_emit_pipeline_sync */
 957                6 + 6, /* amdgpu_vce_ring_emit_fence x2 vm fence */
 958        .emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */
 959        .emit_ib = vce_v3_0_ring_emit_ib,
 960        .emit_vm_flush = vce_v3_0_emit_vm_flush,
 961        .emit_pipeline_sync = vce_v3_0_emit_pipeline_sync,
 962        .emit_fence = amdgpu_vce_ring_emit_fence,
 963        .test_ring = amdgpu_vce_ring_test_ring,
 964        .test_ib = amdgpu_vce_ring_test_ib,
 965        .insert_nop = amdgpu_ring_insert_nop,
 966        .pad_ib = amdgpu_ring_generic_pad_ib,
 967        .begin_use = amdgpu_vce_ring_begin_use,
 968        .end_use = amdgpu_vce_ring_end_use,
 969};
 970
 971static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
 972{
 973        int i;
 974
 975        if (adev->asic_type >= CHIP_STONEY) {
 976                for (i = 0; i < adev->vce.num_rings; i++) {
 977                        adev->vce.ring[i].funcs = &vce_v3_0_ring_vm_funcs;
 978                        adev->vce.ring[i].me = i;
 979                }
 980                DRM_INFO("VCE enabled in VM mode\n");
 981        } else {
 982                for (i = 0; i < adev->vce.num_rings; i++) {
 983                        adev->vce.ring[i].funcs = &vce_v3_0_ring_phys_funcs;
 984                        adev->vce.ring[i].me = i;
 985                }
 986                DRM_INFO("VCE enabled in physical mode\n");
 987        }
 988}
 989
 990static const struct amdgpu_irq_src_funcs vce_v3_0_irq_funcs = {
 991        .set = vce_v3_0_set_interrupt_state,
 992        .process = vce_v3_0_process_interrupt,
 993};
 994
 995static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev)
 996{
 997        adev->vce.irq.num_types = 1;
 998        adev->vce.irq.funcs = &vce_v3_0_irq_funcs;
 999};
1000
1001const struct amdgpu_ip_block_version vce_v3_0_ip_block =
1002{
1003        .type = AMD_IP_BLOCK_TYPE_VCE,
1004        .major = 3,
1005        .minor = 0,
1006        .rev = 0,
1007        .funcs = &vce_v3_0_ip_funcs,
1008};
1009
1010const struct amdgpu_ip_block_version vce_v3_1_ip_block =
1011{
1012        .type = AMD_IP_BLOCK_TYPE_VCE,
1013        .major = 3,
1014        .minor = 1,
1015        .rev = 0,
1016        .funcs = &vce_v3_0_ip_funcs,
1017};
1018
1019const struct amdgpu_ip_block_version vce_v3_4_ip_block =
1020{
1021        .type = AMD_IP_BLOCK_TYPE_VCE,
1022        .major = 3,
1023        .minor = 4,
1024        .rev = 0,
1025        .funcs = &vce_v3_0_ip_funcs,
1026};
1027