linux/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the
   7 * "Software"), to deal in the Software without restriction, including
   8 * without limitation the rights to use, copy, modify, merge, publish,
   9 * distribute, sub license, and/or sell copies of the Software, and to
  10 * permit persons to whom the Software is furnished to do so, subject to
  11 * the following conditions:
  12 *
  13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20 *
  21 * The above copyright notice and this permission notice (including the
  22 * next paragraph) shall be included in all copies or substantial portions
  23 * of the Software.
  24 *
  25 * Authors: Christian König <christian.koenig@amd.com>
  26 */
  27
  28#include <linux/firmware.h>
  29
  30#include "amdgpu.h"
  31#include "amdgpu_vce.h"
  32#include "vid.h"
  33#include "vce/vce_3_0_d.h"
  34#include "vce/vce_3_0_sh_mask.h"
  35#include "oss/oss_3_0_d.h"
  36#include "oss/oss_3_0_sh_mask.h"
  37#include "gca/gfx_8_0_d.h"
  38#include "smu/smu_7_1_2_d.h"
  39#include "smu/smu_7_1_2_sh_mask.h"
  40#include "gca/gfx_8_0_sh_mask.h"
  41#include "ivsrcid/ivsrcid_vislands30.h"
  42
  43
  44#define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT     0x04
  45#define GRBM_GFX_INDEX__VCE_INSTANCE_MASK       0x10
  46#define GRBM_GFX_INDEX__VCE_ALL_PIPE            0x07
  47
  48#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0 0x8616
  49#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1 0x8617
  50#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2 0x8618
  51#define mmGRBM_GFX_INDEX_DEFAULT 0xE0000000
  52
  53#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK   0x02
  54
  55#define VCE_V3_0_FW_SIZE        (384 * 1024)
  56#define VCE_V3_0_STACK_SIZE     (64 * 1024)
  57#define VCE_V3_0_DATA_SIZE      ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
  58
  59#define FW_52_8_3       ((52 << 24) | (8 << 16) | (3 << 8))
  60
  61#define GET_VCE_INSTANCE(i)  ((i) << GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT \
  62                                        | GRBM_GFX_INDEX__VCE_ALL_PIPE)
  63
  64static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
  65static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
  66static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
  67static int vce_v3_0_wait_for_idle(void *handle);
  68static int vce_v3_0_set_clockgating_state(void *handle,
  69                                          enum amd_clockgating_state state);
  70/**
  71 * vce_v3_0_ring_get_rptr - get read pointer
  72 *
  73 * @ring: amdgpu_ring pointer
  74 *
  75 * Returns the current hardware read pointer
  76 */
  77static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
  78{
  79        struct amdgpu_device *adev = ring->adev;
  80        u32 v;
  81
  82        mutex_lock(&adev->grbm_idx_mutex);
  83        if (adev->vce.harvest_config == 0 ||
  84                adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
  85                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
  86        else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
  87                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
  88
  89        if (ring->me == 0)
  90                v = RREG32(mmVCE_RB_RPTR);
  91        else if (ring->me == 1)
  92                v = RREG32(mmVCE_RB_RPTR2);
  93        else
  94                v = RREG32(mmVCE_RB_RPTR3);
  95
  96        WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
  97        mutex_unlock(&adev->grbm_idx_mutex);
  98
  99        return v;
 100}
 101
 102/**
 103 * vce_v3_0_ring_get_wptr - get write pointer
 104 *
 105 * @ring: amdgpu_ring pointer
 106 *
 107 * Returns the current hardware write pointer
 108 */
 109static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
 110{
 111        struct amdgpu_device *adev = ring->adev;
 112        u32 v;
 113
 114        mutex_lock(&adev->grbm_idx_mutex);
 115        if (adev->vce.harvest_config == 0 ||
 116                adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
 117                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
 118        else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
 119                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
 120
 121        if (ring->me == 0)
 122                v = RREG32(mmVCE_RB_WPTR);
 123        else if (ring->me == 1)
 124                v = RREG32(mmVCE_RB_WPTR2);
 125        else
 126                v = RREG32(mmVCE_RB_WPTR3);
 127
 128        WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
 129        mutex_unlock(&adev->grbm_idx_mutex);
 130
 131        return v;
 132}
 133
 134/**
 135 * vce_v3_0_ring_set_wptr - set write pointer
 136 *
 137 * @ring: amdgpu_ring pointer
 138 *
 139 * Commits the write pointer to the hardware
 140 */
 141static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
 142{
 143        struct amdgpu_device *adev = ring->adev;
 144
 145        mutex_lock(&adev->grbm_idx_mutex);
 146        if (adev->vce.harvest_config == 0 ||
 147                adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
 148                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
 149        else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
 150                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
 151
 152        if (ring->me == 0)
 153                WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
 154        else if (ring->me == 1)
 155                WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
 156        else
 157                WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
 158
 159        WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
 160        mutex_unlock(&adev->grbm_idx_mutex);
 161}
 162
 163static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
 164{
 165        WREG32_FIELD(VCE_RB_ARB_CTRL, VCE_CGTT_OVERRIDE, override ? 1 : 0);
 166}
 167
 168static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
 169                                             bool gated)
 170{
 171        u32 data;
 172
 173        /* Set Override to disable Clock Gating */
 174        vce_v3_0_override_vce_clock_gating(adev, true);
 175
 176        /* This function enables MGCG which is controlled by firmware.
 177           With the clocks in the gated state the core is still
 178           accessible but the firmware will throttle the clocks on the
 179           fly as necessary.
 180        */
 181        if (!gated) {
 182                data = RREG32(mmVCE_CLOCK_GATING_B);
 183                data |= 0x1ff;
 184                data &= ~0xef0000;
 185                WREG32(mmVCE_CLOCK_GATING_B, data);
 186
 187                data = RREG32(mmVCE_UENC_CLOCK_GATING);
 188                data |= 0x3ff000;
 189                data &= ~0xffc00000;
 190                WREG32(mmVCE_UENC_CLOCK_GATING, data);
 191
 192                data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
 193                data |= 0x2;
 194                data &= ~0x00010000;
 195                WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
 196
 197                data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
 198                data |= 0x37f;
 199                WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
 200
 201                data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
 202                data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
 203                        VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
 204                        VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
 205                        0x8;
 206                WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
 207        } else {
 208                data = RREG32(mmVCE_CLOCK_GATING_B);
 209                data &= ~0x80010;
 210                data |= 0xe70008;
 211                WREG32(mmVCE_CLOCK_GATING_B, data);
 212
 213                data = RREG32(mmVCE_UENC_CLOCK_GATING);
 214                data |= 0xffc00000;
 215                WREG32(mmVCE_UENC_CLOCK_GATING, data);
 216
 217                data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
 218                data |= 0x10000;
 219                WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
 220
 221                data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
 222                data &= ~0x3ff;
 223                WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
 224
 225                data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
 226                data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
 227                          VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
 228                          VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
 229                          0x8);
 230                WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
 231        }
 232        vce_v3_0_override_vce_clock_gating(adev, false);
 233}
 234
 235static int vce_v3_0_firmware_loaded(struct amdgpu_device *adev)
 236{
 237        int i, j;
 238
 239        for (i = 0; i < 10; ++i) {
 240                for (j = 0; j < 100; ++j) {
 241                        uint32_t status = RREG32(mmVCE_STATUS);
 242
 243                        if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
 244                                return 0;
 245                        mdelay(10);
 246                }
 247
 248                DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
 249                WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
 250                mdelay(10);
 251                WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
 252                mdelay(10);
 253        }
 254
 255        return -ETIMEDOUT;
 256}
 257
 258/**
 259 * vce_v3_0_start - start VCE block
 260 *
 261 * @adev: amdgpu_device pointer
 262 *
 263 * Setup and start the VCE block
 264 */
 265static int vce_v3_0_start(struct amdgpu_device *adev)
 266{
 267        struct amdgpu_ring *ring;
 268        int idx, r;
 269
 270        mutex_lock(&adev->grbm_idx_mutex);
 271        for (idx = 0; idx < 2; ++idx) {
 272                if (adev->vce.harvest_config & (1 << idx))
 273                        continue;
 274
 275                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
 276
 277                /* Program instance 0 reg space for two instances or instance 0 case
 278                program instance 1 reg space for only instance 1 available case */
 279                if (idx != 1 || adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) {
 280                        ring = &adev->vce.ring[0];
 281                        WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
 282                        WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
 283                        WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
 284                        WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
 285                        WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
 286
 287                        ring = &adev->vce.ring[1];
 288                        WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
 289                        WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
 290                        WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
 291                        WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
 292                        WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
 293
 294                        ring = &adev->vce.ring[2];
 295                        WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr));
 296                        WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
 297                        WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr);
 298                        WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr));
 299                        WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4);
 300                }
 301
 302                vce_v3_0_mc_resume(adev, idx);
 303                WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1);
 304
 305                if (adev->asic_type >= CHIP_STONEY)
 306                        WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
 307                else
 308                        WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 1);
 309
 310                WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
 311                mdelay(100);
 312
 313                r = vce_v3_0_firmware_loaded(adev);
 314
 315                /* clear BUSY flag */
 316                WREG32_FIELD(VCE_STATUS, JOB_BUSY, 0);
 317
 318                if (r) {
 319                        DRM_ERROR("VCE not responding, giving up!!!\n");
 320                        mutex_unlock(&adev->grbm_idx_mutex);
 321                        return r;
 322                }
 323        }
 324
 325        WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
 326        mutex_unlock(&adev->grbm_idx_mutex);
 327
 328        return 0;
 329}
 330
 331static int vce_v3_0_stop(struct amdgpu_device *adev)
 332{
 333        int idx;
 334
 335        mutex_lock(&adev->grbm_idx_mutex);
 336        for (idx = 0; idx < 2; ++idx) {
 337                if (adev->vce.harvest_config & (1 << idx))
 338                        continue;
 339
 340                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
 341
 342                if (adev->asic_type >= CHIP_STONEY)
 343                        WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001);
 344                else
 345                        WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 0);
 346
 347                /* hold on ECPU */
 348                WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
 349
 350                /* clear VCE STATUS */
 351                WREG32(mmVCE_STATUS, 0);
 352        }
 353
 354        WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
 355        mutex_unlock(&adev->grbm_idx_mutex);
 356
 357        return 0;
 358}
 359
 360#define ixVCE_HARVEST_FUSE_MACRO__ADDRESS     0xC0014074
 361#define VCE_HARVEST_FUSE_MACRO__SHIFT       27
 362#define VCE_HARVEST_FUSE_MACRO__MASK        0x18000000
 363
 364static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
 365{
 366        u32 tmp;
 367
 368        if ((adev->asic_type == CHIP_FIJI) ||
 369            (adev->asic_type == CHIP_STONEY))
 370                return AMDGPU_VCE_HARVEST_VCE1;
 371
 372        if (adev->flags & AMD_IS_APU)
 373                tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) &
 374                       VCE_HARVEST_FUSE_MACRO__MASK) >>
 375                        VCE_HARVEST_FUSE_MACRO__SHIFT;
 376        else
 377                tmp = (RREG32_SMC(ixCC_HARVEST_FUSES) &
 378                       CC_HARVEST_FUSES__VCE_DISABLE_MASK) >>
 379                        CC_HARVEST_FUSES__VCE_DISABLE__SHIFT;
 380
 381        switch (tmp) {
 382        case 1:
 383                return AMDGPU_VCE_HARVEST_VCE0;
 384        case 2:
 385                return AMDGPU_VCE_HARVEST_VCE1;
 386        case 3:
 387                return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1;
 388        default:
 389                if ((adev->asic_type == CHIP_POLARIS10) ||
 390                    (adev->asic_type == CHIP_POLARIS11) ||
 391                    (adev->asic_type == CHIP_POLARIS12) ||
 392                    (adev->asic_type == CHIP_VEGAM))
 393                        return AMDGPU_VCE_HARVEST_VCE1;
 394
 395                return 0;
 396        }
 397}
 398
 399static int vce_v3_0_early_init(void *handle)
 400{
 401        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 402
 403        adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
 404
 405        if ((adev->vce.harvest_config &
 406             (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) ==
 407            (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
 408                return -ENOENT;
 409
 410        adev->vce.num_rings = 3;
 411
 412        vce_v3_0_set_ring_funcs(adev);
 413        vce_v3_0_set_irq_funcs(adev);
 414
 415        return 0;
 416}
 417
 418static int vce_v3_0_sw_init(void *handle)
 419{
 420        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 421        struct amdgpu_ring *ring;
 422        int r, i;
 423
 424        /* VCE */
 425        r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_VCE_TRAP, &adev->vce.irq);
 426        if (r)
 427                return r;
 428
 429        r = amdgpu_vce_sw_init(adev, VCE_V3_0_FW_SIZE +
 430                (VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE) * 2);
 431        if (r)
 432                return r;
 433
 434        /* 52.8.3 required for 3 ring support */
 435        if (adev->vce.fw_version < FW_52_8_3)
 436                adev->vce.num_rings = 2;
 437
 438        r = amdgpu_vce_resume(adev);
 439        if (r)
 440                return r;
 441
 442        for (i = 0; i < adev->vce.num_rings; i++) {
 443                ring = &adev->vce.ring[i];
 444                sprintf(ring->name, "vce%d", i);
 445                r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
 446                                     AMDGPU_RING_PRIO_DEFAULT, NULL);
 447                if (r)
 448                        return r;
 449        }
 450
 451        r = amdgpu_vce_entity_init(adev);
 452
 453        return r;
 454}
 455
 456static int vce_v3_0_sw_fini(void *handle)
 457{
 458        int r;
 459        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 460
 461        r = amdgpu_vce_suspend(adev);
 462        if (r)
 463                return r;
 464
 465        return amdgpu_vce_sw_fini(adev);
 466}
 467
 468static int vce_v3_0_hw_init(void *handle)
 469{
 470        int r, i;
 471        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 472
 473        vce_v3_0_override_vce_clock_gating(adev, true);
 474
 475        amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
 476
 477        for (i = 0; i < adev->vce.num_rings; i++) {
 478                r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
 479                if (r)
 480                        return r;
 481        }
 482
 483        DRM_INFO("VCE initialized successfully.\n");
 484
 485        return 0;
 486}
 487
 488static int vce_v3_0_hw_fini(void *handle)
 489{
 490        int r;
 491        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 492
 493        r = vce_v3_0_wait_for_idle(handle);
 494        if (r)
 495                return r;
 496
 497        vce_v3_0_stop(adev);
 498        return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE);
 499}
 500
 501static int vce_v3_0_suspend(void *handle)
 502{
 503        int r;
 504        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 505
 506        r = vce_v3_0_hw_fini(adev);
 507        if (r)
 508                return r;
 509
 510        return amdgpu_vce_suspend(adev);
 511}
 512
 513static int vce_v3_0_resume(void *handle)
 514{
 515        int r;
 516        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 517
 518        r = amdgpu_vce_resume(adev);
 519        if (r)
 520                return r;
 521
 522        return vce_v3_0_hw_init(adev);
 523}
 524
 525static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
 526{
 527        uint32_t offset, size;
 528
 529        WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
 530        WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
 531        WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
 532        WREG32(mmVCE_CLOCK_GATING_B, 0x1FF);
 533
 534        WREG32(mmVCE_LMI_CTRL, 0x00398000);
 535        WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
 536        WREG32(mmVCE_LMI_SWAP_CNTL, 0);
 537        WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
 538        WREG32(mmVCE_LMI_VM_CTRL, 0);
 539        WREG32_OR(mmVCE_VCPU_CNTL, 0x00100000);
 540
 541        if (adev->asic_type >= CHIP_STONEY) {
 542                WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
 543                WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
 544                WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
 545        } else
 546                WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
 547        offset = AMDGPU_VCE_FIRMWARE_OFFSET;
 548        size = VCE_V3_0_FW_SIZE;
 549        WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
 550        WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
 551
 552        if (idx == 0) {
 553                offset += size;
 554                size = VCE_V3_0_STACK_SIZE;
 555                WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
 556                WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
 557                offset += size;
 558                size = VCE_V3_0_DATA_SIZE;
 559                WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
 560                WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
 561        } else {
 562                offset += size + VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE;
 563                size = VCE_V3_0_STACK_SIZE;
 564                WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0xfffffff);
 565                WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
 566                offset += size;
 567                size = VCE_V3_0_DATA_SIZE;
 568                WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0xfffffff);
 569                WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
 570        }
 571
 572        WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
 573        WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
 574}
 575
 576static bool vce_v3_0_is_idle(void *handle)
 577{
 578        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 579        u32 mask = 0;
 580
 581        mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
 582        mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
 583
 584        return !(RREG32(mmSRBM_STATUS2) & mask);
 585}
 586
 587static int vce_v3_0_wait_for_idle(void *handle)
 588{
 589        unsigned i;
 590        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 591
 592        for (i = 0; i < adev->usec_timeout; i++)
 593                if (vce_v3_0_is_idle(handle))
 594                        return 0;
 595
 596        return -ETIMEDOUT;
 597}
 598
 599#define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
 600#define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
 601#define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
 602#define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
 603                                      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
 604
 605static bool vce_v3_0_check_soft_reset(void *handle)
 606{
 607        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 608        u32 srbm_soft_reset = 0;
 609
 610        /* According to VCE team , we should use VCE_STATUS instead
 611         * SRBM_STATUS.VCE_BUSY bit for busy status checking.
 612         * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
 613         * instance's registers are accessed
 614         * (0 for 1st instance, 10 for 2nd instance).
 615         *
 616         *VCE_STATUS
 617         *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
 618         *|----+----+-----------+----+----+----+----------+---------+----|
 619         *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
 620         *
 621         * VCE team suggest use bit 3--bit 6 for busy status check
 622         */
 623        mutex_lock(&adev->grbm_idx_mutex);
 624        WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
 625        if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
 626                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
 627                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
 628        }
 629        WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
 630        if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
 631                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
 632                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
 633        }
 634        WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
 635        mutex_unlock(&adev->grbm_idx_mutex);
 636
 637        if (srbm_soft_reset) {
 638                adev->vce.srbm_soft_reset = srbm_soft_reset;
 639                return true;
 640        } else {
 641                adev->vce.srbm_soft_reset = 0;
 642                return false;
 643        }
 644}
 645
 646static int vce_v3_0_soft_reset(void *handle)
 647{
 648        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 649        u32 srbm_soft_reset;
 650
 651        if (!adev->vce.srbm_soft_reset)
 652                return 0;
 653        srbm_soft_reset = adev->vce.srbm_soft_reset;
 654
 655        if (srbm_soft_reset) {
 656                u32 tmp;
 657
 658                tmp = RREG32(mmSRBM_SOFT_RESET);
 659                tmp |= srbm_soft_reset;
 660                dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
 661                WREG32(mmSRBM_SOFT_RESET, tmp);
 662                tmp = RREG32(mmSRBM_SOFT_RESET);
 663
 664                udelay(50);
 665
 666                tmp &= ~srbm_soft_reset;
 667                WREG32(mmSRBM_SOFT_RESET, tmp);
 668                tmp = RREG32(mmSRBM_SOFT_RESET);
 669
 670                /* Wait a little for things to settle down */
 671                udelay(50);
 672        }
 673
 674        return 0;
 675}
 676
 677static int vce_v3_0_pre_soft_reset(void *handle)
 678{
 679        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 680
 681        if (!adev->vce.srbm_soft_reset)
 682                return 0;
 683
 684        mdelay(5);
 685
 686        return vce_v3_0_suspend(adev);
 687}
 688
 689
 690static int vce_v3_0_post_soft_reset(void *handle)
 691{
 692        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 693
 694        if (!adev->vce.srbm_soft_reset)
 695                return 0;
 696
 697        mdelay(5);
 698
 699        return vce_v3_0_resume(adev);
 700}
 701
 702static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
 703                                        struct amdgpu_irq_src *source,
 704                                        unsigned type,
 705                                        enum amdgpu_interrupt_state state)
 706{
 707        uint32_t val = 0;
 708
 709        if (state == AMDGPU_IRQ_STATE_ENABLE)
 710                val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
 711
 712        WREG32_P(mmVCE_SYS_INT_EN, val, ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
 713        return 0;
 714}
 715
 716static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
 717                                      struct amdgpu_irq_src *source,
 718                                      struct amdgpu_iv_entry *entry)
 719{
 720        DRM_DEBUG("IH: VCE\n");
 721
 722        WREG32_FIELD(VCE_SYS_INT_STATUS, VCE_SYS_INT_TRAP_INTERRUPT_INT, 1);
 723
 724        switch (entry->src_data[0]) {
 725        case 0:
 726        case 1:
 727        case 2:
 728                amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
 729                break;
 730        default:
 731                DRM_ERROR("Unhandled interrupt: %d %d\n",
 732                          entry->src_id, entry->src_data[0]);
 733                break;
 734        }
 735
 736        return 0;
 737}
 738
 739static int vce_v3_0_set_clockgating_state(void *handle,
 740                                          enum amd_clockgating_state state)
 741{
 742        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 743        bool enable = (state == AMD_CG_STATE_GATE);
 744        int i;
 745
 746        if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
 747                return 0;
 748
 749        mutex_lock(&adev->grbm_idx_mutex);
 750        for (i = 0; i < 2; i++) {
 751                /* Program VCE Instance 0 or 1 if not harvested */
 752                if (adev->vce.harvest_config & (1 << i))
 753                        continue;
 754
 755                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(i));
 756
 757                if (!enable) {
 758                        /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
 759                        uint32_t data = RREG32(mmVCE_CLOCK_GATING_A);
 760                        data &= ~(0xf | 0xff0);
 761                        data |= ((0x0 << 0) | (0x04 << 4));
 762                        WREG32(mmVCE_CLOCK_GATING_A, data);
 763
 764                        /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
 765                        data = RREG32(mmVCE_UENC_CLOCK_GATING);
 766                        data &= ~(0xf | 0xff0);
 767                        data |= ((0x0 << 0) | (0x04 << 4));
 768                        WREG32(mmVCE_UENC_CLOCK_GATING, data);
 769                }
 770
 771                vce_v3_0_set_vce_sw_clock_gating(adev, enable);
 772        }
 773
 774        WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
 775        mutex_unlock(&adev->grbm_idx_mutex);
 776
 777        return 0;
 778}
 779
 780static int vce_v3_0_set_powergating_state(void *handle,
 781                                          enum amd_powergating_state state)
 782{
 783        /* This doesn't actually powergate the VCE block.
 784         * That's done in the dpm code via the SMC.  This
 785         * just re-inits the block as necessary.  The actual
 786         * gating still happens in the dpm code.  We should
 787         * revisit this when there is a cleaner line between
 788         * the smc and the hw blocks
 789         */
 790        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 791        int ret = 0;
 792
 793        if (state == AMD_PG_STATE_GATE) {
 794                ret = vce_v3_0_stop(adev);
 795                if (ret)
 796                        goto out;
 797        } else {
 798                ret = vce_v3_0_start(adev);
 799                if (ret)
 800                        goto out;
 801        }
 802
 803out:
 804        return ret;
 805}
 806
 807static void vce_v3_0_get_clockgating_state(void *handle, u32 *flags)
 808{
 809        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 810        int data;
 811
 812        mutex_lock(&adev->pm.mutex);
 813
 814        if (adev->flags & AMD_IS_APU)
 815                data = RREG32_SMC(ixCURRENT_PG_STATUS_APU);
 816        else
 817                data = RREG32_SMC(ixCURRENT_PG_STATUS);
 818
 819        if (data & CURRENT_PG_STATUS__VCE_PG_STATUS_MASK) {
 820                DRM_INFO("Cannot get clockgating state when VCE is powergated.\n");
 821                goto out;
 822        }
 823
 824        WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
 825
 826        /* AMD_CG_SUPPORT_VCE_MGCG */
 827        data = RREG32(mmVCE_CLOCK_GATING_A);
 828        if (data & (0x04 << 4))
 829                *flags |= AMD_CG_SUPPORT_VCE_MGCG;
 830
 831out:
 832        mutex_unlock(&adev->pm.mutex);
 833}
 834
 835static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
 836                                  struct amdgpu_job *job,
 837                                  struct amdgpu_ib *ib,
 838                                  uint32_t flags)
 839{
 840        unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 841
 842        amdgpu_ring_write(ring, VCE_CMD_IB_VM);
 843        amdgpu_ring_write(ring, vmid);
 844        amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
 845        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
 846        amdgpu_ring_write(ring, ib->length_dw);
 847}
 848
 849static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring,
 850                                   unsigned int vmid, uint64_t pd_addr)
 851{
 852        amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB);
 853        amdgpu_ring_write(ring, vmid);
 854        amdgpu_ring_write(ring, pd_addr >> 12);
 855
 856        amdgpu_ring_write(ring, VCE_CMD_FLUSH_TLB);
 857        amdgpu_ring_write(ring, vmid);
 858        amdgpu_ring_write(ring, VCE_CMD_END);
 859}
 860
 861static void vce_v3_0_emit_pipeline_sync(struct amdgpu_ring *ring)
 862{
 863        uint32_t seq = ring->fence_drv.sync_seq;
 864        uint64_t addr = ring->fence_drv.gpu_addr;
 865
 866        amdgpu_ring_write(ring, VCE_CMD_WAIT_GE);
 867        amdgpu_ring_write(ring, lower_32_bits(addr));
 868        amdgpu_ring_write(ring, upper_32_bits(addr));
 869        amdgpu_ring_write(ring, seq);
 870}
 871
 872static const struct amd_ip_funcs vce_v3_0_ip_funcs = {
 873        .name = "vce_v3_0",
 874        .early_init = vce_v3_0_early_init,
 875        .late_init = NULL,
 876        .sw_init = vce_v3_0_sw_init,
 877        .sw_fini = vce_v3_0_sw_fini,
 878        .hw_init = vce_v3_0_hw_init,
 879        .hw_fini = vce_v3_0_hw_fini,
 880        .suspend = vce_v3_0_suspend,
 881        .resume = vce_v3_0_resume,
 882        .is_idle = vce_v3_0_is_idle,
 883        .wait_for_idle = vce_v3_0_wait_for_idle,
 884        .check_soft_reset = vce_v3_0_check_soft_reset,
 885        .pre_soft_reset = vce_v3_0_pre_soft_reset,
 886        .soft_reset = vce_v3_0_soft_reset,
 887        .post_soft_reset = vce_v3_0_post_soft_reset,
 888        .set_clockgating_state = vce_v3_0_set_clockgating_state,
 889        .set_powergating_state = vce_v3_0_set_powergating_state,
 890        .get_clockgating_state = vce_v3_0_get_clockgating_state,
 891};
 892
 893static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
 894        .type = AMDGPU_RING_TYPE_VCE,
 895        .align_mask = 0xf,
 896        .nop = VCE_CMD_NO_OP,
 897        .support_64bit_ptrs = false,
 898        .no_user_fence = true,
 899        .get_rptr = vce_v3_0_ring_get_rptr,
 900        .get_wptr = vce_v3_0_ring_get_wptr,
 901        .set_wptr = vce_v3_0_ring_set_wptr,
 902        .parse_cs = amdgpu_vce_ring_parse_cs,
 903        .emit_frame_size =
 904                4 + /* vce_v3_0_emit_pipeline_sync */
 905                6, /* amdgpu_vce_ring_emit_fence x1 no user fence */
 906        .emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
 907        .emit_ib = amdgpu_vce_ring_emit_ib,
 908        .emit_fence = amdgpu_vce_ring_emit_fence,
 909        .test_ring = amdgpu_vce_ring_test_ring,
 910        .test_ib = amdgpu_vce_ring_test_ib,
 911        .insert_nop = amdgpu_ring_insert_nop,
 912        .pad_ib = amdgpu_ring_generic_pad_ib,
 913        .begin_use = amdgpu_vce_ring_begin_use,
 914        .end_use = amdgpu_vce_ring_end_use,
 915};
 916
 917static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
 918        .type = AMDGPU_RING_TYPE_VCE,
 919        .align_mask = 0xf,
 920        .nop = VCE_CMD_NO_OP,
 921        .support_64bit_ptrs = false,
 922        .no_user_fence = true,
 923        .get_rptr = vce_v3_0_ring_get_rptr,
 924        .get_wptr = vce_v3_0_ring_get_wptr,
 925        .set_wptr = vce_v3_0_ring_set_wptr,
 926        .parse_cs = amdgpu_vce_ring_parse_cs_vm,
 927        .emit_frame_size =
 928                6 + /* vce_v3_0_emit_vm_flush */
 929                4 + /* vce_v3_0_emit_pipeline_sync */
 930                6 + 6, /* amdgpu_vce_ring_emit_fence x2 vm fence */
 931        .emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */
 932        .emit_ib = vce_v3_0_ring_emit_ib,
 933        .emit_vm_flush = vce_v3_0_emit_vm_flush,
 934        .emit_pipeline_sync = vce_v3_0_emit_pipeline_sync,
 935        .emit_fence = amdgpu_vce_ring_emit_fence,
 936        .test_ring = amdgpu_vce_ring_test_ring,
 937        .test_ib = amdgpu_vce_ring_test_ib,
 938        .insert_nop = amdgpu_ring_insert_nop,
 939        .pad_ib = amdgpu_ring_generic_pad_ib,
 940        .begin_use = amdgpu_vce_ring_begin_use,
 941        .end_use = amdgpu_vce_ring_end_use,
 942};
 943
 944static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
 945{
 946        int i;
 947
 948        if (adev->asic_type >= CHIP_STONEY) {
 949                for (i = 0; i < adev->vce.num_rings; i++) {
 950                        adev->vce.ring[i].funcs = &vce_v3_0_ring_vm_funcs;
 951                        adev->vce.ring[i].me = i;
 952                }
 953                DRM_INFO("VCE enabled in VM mode\n");
 954        } else {
 955                for (i = 0; i < adev->vce.num_rings; i++) {
 956                        adev->vce.ring[i].funcs = &vce_v3_0_ring_phys_funcs;
 957                        adev->vce.ring[i].me = i;
 958                }
 959                DRM_INFO("VCE enabled in physical mode\n");
 960        }
 961}
 962
 963static const struct amdgpu_irq_src_funcs vce_v3_0_irq_funcs = {
 964        .set = vce_v3_0_set_interrupt_state,
 965        .process = vce_v3_0_process_interrupt,
 966};
 967
 968static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev)
 969{
 970        adev->vce.irq.num_types = 1;
 971        adev->vce.irq.funcs = &vce_v3_0_irq_funcs;
 972};
 973
 974const struct amdgpu_ip_block_version vce_v3_0_ip_block =
 975{
 976        .type = AMD_IP_BLOCK_TYPE_VCE,
 977        .major = 3,
 978        .minor = 0,
 979        .rev = 0,
 980        .funcs = &vce_v3_0_ip_funcs,
 981};
 982
 983const struct amdgpu_ip_block_version vce_v3_1_ip_block =
 984{
 985        .type = AMD_IP_BLOCK_TYPE_VCE,
 986        .major = 3,
 987        .minor = 1,
 988        .rev = 0,
 989        .funcs = &vce_v3_0_ip_funcs,
 990};
 991
 992const struct amdgpu_ip_block_version vce_v3_4_ip_block =
 993{
 994        .type = AMD_IP_BLOCK_TYPE_VCE,
 995        .major = 3,
 996        .minor = 4,
 997        .rev = 0,
 998        .funcs = &vce_v3_0_ip_funcs,
 999};
1000