linux/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the
   7 * "Software"), to deal in the Software without restriction, including
   8 * without limitation the rights to use, copy, modify, merge, publish,
   9 * distribute, sub license, and/or sell copies of the Software, and to
  10 * permit persons to whom the Software is furnished to do so, subject to
  11 * the following conditions:
  12 *
  13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20 *
  21 * The above copyright notice and this permission notice (including the
  22 * next paragraph) shall be included in all copies or substantial portions
  23 * of the Software.
  24 *
  25 * Authors: Christian König <christian.koenig@amd.com>
  26 */
  27
  28#include <linux/firmware.h>
  29#include <drm/drmP.h>
  30#include "amdgpu.h"
  31#include "amdgpu_vce.h"
  32#include "vid.h"
  33#include "vce/vce_3_0_d.h"
  34#include "vce/vce_3_0_sh_mask.h"
  35#include "oss/oss_3_0_d.h"
  36#include "oss/oss_3_0_sh_mask.h"
  37#include "gca/gfx_8_0_d.h"
  38#include "smu/smu_7_1_2_d.h"
  39#include "smu/smu_7_1_2_sh_mask.h"
  40#include "gca/gfx_8_0_d.h"
  41#include "gca/gfx_8_0_sh_mask.h"
  42
  43
  44#define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT     0x04
  45#define GRBM_GFX_INDEX__VCE_INSTANCE_MASK       0x10
  46#define GRBM_GFX_INDEX__VCE_ALL_PIPE            0x07
  47
  48#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0 0x8616
  49#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1 0x8617
  50#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2 0x8618
  51#define mmGRBM_GFX_INDEX_DEFAULT 0xE0000000
  52
  53#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK   0x02
  54
  55#define VCE_V3_0_FW_SIZE        (384 * 1024)
  56#define VCE_V3_0_STACK_SIZE     (64 * 1024)
  57#define VCE_V3_0_DATA_SIZE      ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
  58
  59#define FW_52_8_3       ((52 << 24) | (8 << 16) | (3 << 8))
  60
  61#define GET_VCE_INSTANCE(i)  ((i) << GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT \
  62                                        | GRBM_GFX_INDEX__VCE_ALL_PIPE)
  63
  64static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
  65static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
  66static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
  67static int vce_v3_0_wait_for_idle(void *handle);
  68static int vce_v3_0_set_clockgating_state(void *handle,
  69                                          enum amd_clockgating_state state);
  70/**
  71 * vce_v3_0_ring_get_rptr - get read pointer
  72 *
  73 * @ring: amdgpu_ring pointer
  74 *
  75 * Returns the current hardware read pointer
  76 */
  77static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
  78{
  79        struct amdgpu_device *adev = ring->adev;
  80        u32 v;
  81
  82        mutex_lock(&adev->grbm_idx_mutex);
  83        if (adev->vce.harvest_config == 0 ||
  84                adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
  85                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
  86        else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
  87                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
  88
  89        if (ring == &adev->vce.ring[0])
  90                v = RREG32(mmVCE_RB_RPTR);
  91        else if (ring == &adev->vce.ring[1])
  92                v = RREG32(mmVCE_RB_RPTR2);
  93        else
  94                v = RREG32(mmVCE_RB_RPTR3);
  95
  96        WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
  97        mutex_unlock(&adev->grbm_idx_mutex);
  98
  99        return v;
 100}
 101
 102/**
 103 * vce_v3_0_ring_get_wptr - get write pointer
 104 *
 105 * @ring: amdgpu_ring pointer
 106 *
 107 * Returns the current hardware write pointer
 108 */
 109static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
 110{
 111        struct amdgpu_device *adev = ring->adev;
 112        u32 v;
 113
 114        mutex_lock(&adev->grbm_idx_mutex);
 115        if (adev->vce.harvest_config == 0 ||
 116                adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
 117                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
 118        else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
 119                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
 120
 121        if (ring == &adev->vce.ring[0])
 122                v = RREG32(mmVCE_RB_WPTR);
 123        else if (ring == &adev->vce.ring[1])
 124                v = RREG32(mmVCE_RB_WPTR2);
 125        else
 126                v = RREG32(mmVCE_RB_WPTR3);
 127
 128        WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
 129        mutex_unlock(&adev->grbm_idx_mutex);
 130
 131        return v;
 132}
 133
 134/**
 135 * vce_v3_0_ring_set_wptr - set write pointer
 136 *
 137 * @ring: amdgpu_ring pointer
 138 *
 139 * Commits the write pointer to the hardware
 140 */
 141static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
 142{
 143        struct amdgpu_device *adev = ring->adev;
 144
 145        mutex_lock(&adev->grbm_idx_mutex);
 146        if (adev->vce.harvest_config == 0 ||
 147                adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
 148                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
 149        else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
 150                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
 151
 152        if (ring == &adev->vce.ring[0])
 153                WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
 154        else if (ring == &adev->vce.ring[1])
 155                WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
 156        else
 157                WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
 158
 159        WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
 160        mutex_unlock(&adev->grbm_idx_mutex);
 161}
 162
 163static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
 164{
 165        WREG32_FIELD(VCE_RB_ARB_CTRL, VCE_CGTT_OVERRIDE, override ? 1 : 0);
 166}
 167
 168static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
 169                                             bool gated)
 170{
 171        u32 data;
 172
 173        /* Set Override to disable Clock Gating */
 174        vce_v3_0_override_vce_clock_gating(adev, true);
 175
 176        /* This function enables MGCG which is controlled by firmware.
 177           With the clocks in the gated state the core is still
 178           accessible but the firmware will throttle the clocks on the
 179           fly as necessary.
 180        */
 181        if (!gated) {
 182                data = RREG32(mmVCE_CLOCK_GATING_B);
 183                data |= 0x1ff;
 184                data &= ~0xef0000;
 185                WREG32(mmVCE_CLOCK_GATING_B, data);
 186
 187                data = RREG32(mmVCE_UENC_CLOCK_GATING);
 188                data |= 0x3ff000;
 189                data &= ~0xffc00000;
 190                WREG32(mmVCE_UENC_CLOCK_GATING, data);
 191
 192                data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
 193                data |= 0x2;
 194                data &= ~0x00010000;
 195                WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
 196
 197                data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
 198                data |= 0x37f;
 199                WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
 200
 201                data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
 202                data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
 203                        VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
 204                        VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
 205                        0x8;
 206                WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
 207        } else {
 208                data = RREG32(mmVCE_CLOCK_GATING_B);
 209                data &= ~0x80010;
 210                data |= 0xe70008;
 211                WREG32(mmVCE_CLOCK_GATING_B, data);
 212
 213                data = RREG32(mmVCE_UENC_CLOCK_GATING);
 214                data |= 0xffc00000;
 215                WREG32(mmVCE_UENC_CLOCK_GATING, data);
 216
 217                data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
 218                data |= 0x10000;
 219                WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
 220
 221                data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
 222                data &= ~0x3ff;
 223                WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
 224
 225                data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
 226                data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
 227                          VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
 228                          VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
 229                          0x8);
 230                WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
 231        }
 232        vce_v3_0_override_vce_clock_gating(adev, false);
 233}
 234
 235static int vce_v3_0_firmware_loaded(struct amdgpu_device *adev)
 236{
 237        int i, j;
 238
 239        for (i = 0; i < 10; ++i) {
 240                for (j = 0; j < 100; ++j) {
 241                        uint32_t status = RREG32(mmVCE_STATUS);
 242
 243                        if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
 244                                return 0;
 245                        mdelay(10);
 246                }
 247
 248                DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
 249                WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
 250                mdelay(10);
 251                WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
 252                mdelay(10);
 253        }
 254
 255        return -ETIMEDOUT;
 256}
 257
 258/**
 259 * vce_v3_0_start - start VCE block
 260 *
 261 * @adev: amdgpu_device pointer
 262 *
 263 * Setup and start the VCE block
 264 */
 265static int vce_v3_0_start(struct amdgpu_device *adev)
 266{
 267        struct amdgpu_ring *ring;
 268        int idx, r;
 269
 270        mutex_lock(&adev->grbm_idx_mutex);
 271        for (idx = 0; idx < 2; ++idx) {
 272                if (adev->vce.harvest_config & (1 << idx))
 273                        continue;
 274
 275                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
 276
 277                /* Program instance 0 reg space for two instances or instance 0 case
 278                program instance 1 reg space for only instance 1 available case */
 279                if (idx != 1 || adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) {
 280                        ring = &adev->vce.ring[0];
 281                        WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
 282                        WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
 283                        WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
 284                        WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
 285                        WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
 286
 287                        ring = &adev->vce.ring[1];
 288                        WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
 289                        WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
 290                        WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
 291                        WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
 292                        WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
 293
 294                        ring = &adev->vce.ring[2];
 295                        WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr));
 296                        WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
 297                        WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr);
 298                        WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr));
 299                        WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4);
 300                }
 301
 302                vce_v3_0_mc_resume(adev, idx);
 303                WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1);
 304
 305                if (adev->asic_type >= CHIP_STONEY)
 306                        WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
 307                else
 308                        WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 1);
 309
 310                WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
 311                mdelay(100);
 312
 313                r = vce_v3_0_firmware_loaded(adev);
 314
 315                /* clear BUSY flag */
 316                WREG32_FIELD(VCE_STATUS, JOB_BUSY, 0);
 317
 318                if (r) {
 319                        DRM_ERROR("VCE not responding, giving up!!!\n");
 320                        mutex_unlock(&adev->grbm_idx_mutex);
 321                        return r;
 322                }
 323        }
 324
 325        WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
 326        mutex_unlock(&adev->grbm_idx_mutex);
 327
 328        return 0;
 329}
 330
 331static int vce_v3_0_stop(struct amdgpu_device *adev)
 332{
 333        int idx;
 334
 335        mutex_lock(&adev->grbm_idx_mutex);
 336        for (idx = 0; idx < 2; ++idx) {
 337                if (adev->vce.harvest_config & (1 << idx))
 338                        continue;
 339
 340                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
 341
 342                if (adev->asic_type >= CHIP_STONEY)
 343                        WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001);
 344                else
 345                        WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 0);
 346
 347                /* hold on ECPU */
 348                WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
 349
 350                /* clear VCE STATUS */
 351                WREG32(mmVCE_STATUS, 0);
 352        }
 353
 354        WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
 355        mutex_unlock(&adev->grbm_idx_mutex);
 356
 357        return 0;
 358}
 359
 360#define ixVCE_HARVEST_FUSE_MACRO__ADDRESS     0xC0014074
 361#define VCE_HARVEST_FUSE_MACRO__SHIFT       27
 362#define VCE_HARVEST_FUSE_MACRO__MASK        0x18000000
 363
 364static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
 365{
 366        u32 tmp;
 367
 368        /* Fiji, Stoney, Polaris10, Polaris11, Polaris12 are single pipe */
 369        if ((adev->asic_type == CHIP_FIJI) ||
 370            (adev->asic_type == CHIP_STONEY) ||
 371            (adev->asic_type == CHIP_POLARIS10) ||
 372            (adev->asic_type == CHIP_POLARIS11) ||
 373            (adev->asic_type == CHIP_POLARIS12))
 374                return AMDGPU_VCE_HARVEST_VCE1;
 375
 376        /* Tonga and CZ are dual or single pipe */
 377        if (adev->flags & AMD_IS_APU)
 378                tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) &
 379                       VCE_HARVEST_FUSE_MACRO__MASK) >>
 380                        VCE_HARVEST_FUSE_MACRO__SHIFT;
 381        else
 382                tmp = (RREG32_SMC(ixCC_HARVEST_FUSES) &
 383                       CC_HARVEST_FUSES__VCE_DISABLE_MASK) >>
 384                        CC_HARVEST_FUSES__VCE_DISABLE__SHIFT;
 385
 386        switch (tmp) {
 387        case 1:
 388                return AMDGPU_VCE_HARVEST_VCE0;
 389        case 2:
 390                return AMDGPU_VCE_HARVEST_VCE1;
 391        case 3:
 392                return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1;
 393        default:
 394                return 0;
 395        }
 396}
 397
 398static int vce_v3_0_early_init(void *handle)
 399{
 400        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 401
 402        adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
 403
 404        if ((adev->vce.harvest_config &
 405             (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) ==
 406            (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
 407                return -ENOENT;
 408
 409        adev->vce.num_rings = 3;
 410
 411        vce_v3_0_set_ring_funcs(adev);
 412        vce_v3_0_set_irq_funcs(adev);
 413
 414        return 0;
 415}
 416
 417static int vce_v3_0_sw_init(void *handle)
 418{
 419        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 420        struct amdgpu_ring *ring;
 421        int r, i;
 422
 423        /* VCE */
 424        r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 167, &adev->vce.irq);
 425        if (r)
 426                return r;
 427
 428        r = amdgpu_vce_sw_init(adev, VCE_V3_0_FW_SIZE +
 429                (VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE) * 2);
 430        if (r)
 431                return r;
 432
 433        /* 52.8.3 required for 3 ring support */
 434        if (adev->vce.fw_version < FW_52_8_3)
 435                adev->vce.num_rings = 2;
 436
 437        r = amdgpu_vce_resume(adev);
 438        if (r)
 439                return r;
 440
 441        for (i = 0; i < adev->vce.num_rings; i++) {
 442                ring = &adev->vce.ring[i];
 443                sprintf(ring->name, "vce%d", i);
 444                r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
 445                if (r)
 446                        return r;
 447        }
 448
 449        return r;
 450}
 451
 452static int vce_v3_0_sw_fini(void *handle)
 453{
 454        int r;
 455        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 456
 457        r = amdgpu_vce_suspend(adev);
 458        if (r)
 459                return r;
 460
 461        return amdgpu_vce_sw_fini(adev);
 462}
 463
 464static int vce_v3_0_hw_init(void *handle)
 465{
 466        int r, i;
 467        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 468
 469        vce_v3_0_override_vce_clock_gating(adev, true);
 470        if (!(adev->flags & AMD_IS_APU))
 471                amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
 472
 473        for (i = 0; i < adev->vce.num_rings; i++)
 474                adev->vce.ring[i].ready = false;
 475
 476        for (i = 0; i < adev->vce.num_rings; i++) {
 477                r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
 478                if (r)
 479                        return r;
 480                else
 481                        adev->vce.ring[i].ready = true;
 482        }
 483
 484        DRM_INFO("VCE initialized successfully.\n");
 485
 486        return 0;
 487}
 488
 489static int vce_v3_0_hw_fini(void *handle)
 490{
 491        int r;
 492        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 493
 494        r = vce_v3_0_wait_for_idle(handle);
 495        if (r)
 496                return r;
 497
 498        vce_v3_0_stop(adev);
 499        return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE);
 500}
 501
 502static int vce_v3_0_suspend(void *handle)
 503{
 504        int r;
 505        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 506
 507        r = vce_v3_0_hw_fini(adev);
 508        if (r)
 509                return r;
 510
 511        return amdgpu_vce_suspend(adev);
 512}
 513
 514static int vce_v3_0_resume(void *handle)
 515{
 516        int r;
 517        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 518
 519        r = amdgpu_vce_resume(adev);
 520        if (r)
 521                return r;
 522
 523        return vce_v3_0_hw_init(adev);
 524}
 525
 526static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
 527{
 528        uint32_t offset, size;
 529
 530        WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
 531        WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
 532        WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
 533        WREG32(mmVCE_CLOCK_GATING_B, 0x1FF);
 534
 535        WREG32(mmVCE_LMI_CTRL, 0x00398000);
 536        WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
 537        WREG32(mmVCE_LMI_SWAP_CNTL, 0);
 538        WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
 539        WREG32(mmVCE_LMI_VM_CTRL, 0);
 540        WREG32_OR(mmVCE_VCPU_CNTL, 0x00100000);
 541
 542        if (adev->asic_type >= CHIP_STONEY) {
 543                WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
 544                WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
 545                WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
 546        } else
 547                WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
 548        offset = AMDGPU_VCE_FIRMWARE_OFFSET;
 549        size = VCE_V3_0_FW_SIZE;
 550        WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
 551        WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
 552
 553        if (idx == 0) {
 554                offset += size;
 555                size = VCE_V3_0_STACK_SIZE;
 556                WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
 557                WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
 558                offset += size;
 559                size = VCE_V3_0_DATA_SIZE;
 560                WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
 561                WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
 562        } else {
 563                offset += size + VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE;
 564                size = VCE_V3_0_STACK_SIZE;
 565                WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0xfffffff);
 566                WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
 567                offset += size;
 568                size = VCE_V3_0_DATA_SIZE;
 569                WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0xfffffff);
 570                WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
 571        }
 572
 573        WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
 574        WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
 575}
 576
 577static bool vce_v3_0_is_idle(void *handle)
 578{
 579        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 580        u32 mask = 0;
 581
 582        mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
 583        mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
 584
 585        return !(RREG32(mmSRBM_STATUS2) & mask);
 586}
 587
 588static int vce_v3_0_wait_for_idle(void *handle)
 589{
 590        unsigned i;
 591        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 592
 593        for (i = 0; i < adev->usec_timeout; i++)
 594                if (vce_v3_0_is_idle(handle))
 595                        return 0;
 596
 597        return -ETIMEDOUT;
 598}
 599
 600#define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
 601#define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
 602#define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
 603#define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
 604                                      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
 605
 606static bool vce_v3_0_check_soft_reset(void *handle)
 607{
 608        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 609        u32 srbm_soft_reset = 0;
 610
 611        /* According to VCE team , we should use VCE_STATUS instead
 612         * SRBM_STATUS.VCE_BUSY bit for busy status checking.
 613         * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
 614         * instance's registers are accessed
 615         * (0 for 1st instance, 10 for 2nd instance).
 616         *
 617         *VCE_STATUS
 618         *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
 619         *|----+----+-----------+----+----+----+----------+---------+----|
 620         *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
 621         *
 622         * VCE team suggest use bit 3--bit 6 for busy status check
 623         */
 624        mutex_lock(&adev->grbm_idx_mutex);
 625        WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
 626        if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
 627                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
 628                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
 629        }
 630        WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
 631        if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
 632                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
 633                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
 634        }
 635        WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
 636        mutex_unlock(&adev->grbm_idx_mutex);
 637
 638        if (srbm_soft_reset) {
 639                adev->vce.srbm_soft_reset = srbm_soft_reset;
 640                return true;
 641        } else {
 642                adev->vce.srbm_soft_reset = 0;
 643                return false;
 644        }
 645}
 646
 647static int vce_v3_0_soft_reset(void *handle)
 648{
 649        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 650        u32 srbm_soft_reset;
 651
 652        if (!adev->vce.srbm_soft_reset)
 653                return 0;
 654        srbm_soft_reset = adev->vce.srbm_soft_reset;
 655
 656        if (srbm_soft_reset) {
 657                u32 tmp;
 658
 659                tmp = RREG32(mmSRBM_SOFT_RESET);
 660                tmp |= srbm_soft_reset;
 661                dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
 662                WREG32(mmSRBM_SOFT_RESET, tmp);
 663                tmp = RREG32(mmSRBM_SOFT_RESET);
 664
 665                udelay(50);
 666
 667                tmp &= ~srbm_soft_reset;
 668                WREG32(mmSRBM_SOFT_RESET, tmp);
 669                tmp = RREG32(mmSRBM_SOFT_RESET);
 670
 671                /* Wait a little for things to settle down */
 672                udelay(50);
 673        }
 674
 675        return 0;
 676}
 677
 678static int vce_v3_0_pre_soft_reset(void *handle)
 679{
 680        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 681
 682        if (!adev->vce.srbm_soft_reset)
 683                return 0;
 684
 685        mdelay(5);
 686
 687        return vce_v3_0_suspend(adev);
 688}
 689
 690
 691static int vce_v3_0_post_soft_reset(void *handle)
 692{
 693        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 694
 695        if (!adev->vce.srbm_soft_reset)
 696                return 0;
 697
 698        mdelay(5);
 699
 700        return vce_v3_0_resume(adev);
 701}
 702
 703static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
 704                                        struct amdgpu_irq_src *source,
 705                                        unsigned type,
 706                                        enum amdgpu_interrupt_state state)
 707{
 708        uint32_t val = 0;
 709
 710        if (state == AMDGPU_IRQ_STATE_ENABLE)
 711                val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
 712
 713        WREG32_P(mmVCE_SYS_INT_EN, val, ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
 714        return 0;
 715}
 716
 717static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
 718                                      struct amdgpu_irq_src *source,
 719                                      struct amdgpu_iv_entry *entry)
 720{
 721        DRM_DEBUG("IH: VCE\n");
 722
 723        WREG32_FIELD(VCE_SYS_INT_STATUS, VCE_SYS_INT_TRAP_INTERRUPT_INT, 1);
 724
 725        switch (entry->src_data[0]) {
 726        case 0:
 727        case 1:
 728        case 2:
 729                amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
 730                break;
 731        default:
 732                DRM_ERROR("Unhandled interrupt: %d %d\n",
 733                          entry->src_id, entry->src_data[0]);
 734                break;
 735        }
 736
 737        return 0;
 738}
 739
 740static int vce_v3_0_set_clockgating_state(void *handle,
 741                                          enum amd_clockgating_state state)
 742{
 743        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 744        bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
 745        int i;
 746
 747        if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
 748                return 0;
 749
 750        mutex_lock(&adev->grbm_idx_mutex);
 751        for (i = 0; i < 2; i++) {
 752                /* Program VCE Instance 0 or 1 if not harvested */
 753                if (adev->vce.harvest_config & (1 << i))
 754                        continue;
 755
 756                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(i));
 757
 758                if (!enable) {
 759                        /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
 760                        uint32_t data = RREG32(mmVCE_CLOCK_GATING_A);
 761                        data &= ~(0xf | 0xff0);
 762                        data |= ((0x0 << 0) | (0x04 << 4));
 763                        WREG32(mmVCE_CLOCK_GATING_A, data);
 764
 765                        /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
 766                        data = RREG32(mmVCE_UENC_CLOCK_GATING);
 767                        data &= ~(0xf | 0xff0);
 768                        data |= ((0x0 << 0) | (0x04 << 4));
 769                        WREG32(mmVCE_UENC_CLOCK_GATING, data);
 770                }
 771
 772                vce_v3_0_set_vce_sw_clock_gating(adev, enable);
 773        }
 774
 775        WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
 776        mutex_unlock(&adev->grbm_idx_mutex);
 777
 778        return 0;
 779}
 780
 781static int vce_v3_0_set_powergating_state(void *handle,
 782                                          enum amd_powergating_state state)
 783{
 784        /* This doesn't actually powergate the VCE block.
 785         * That's done in the dpm code via the SMC.  This
 786         * just re-inits the block as necessary.  The actual
 787         * gating still happens in the dpm code.  We should
 788         * revisit this when there is a cleaner line between
 789         * the smc and the hw blocks
 790         */
 791        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 792        int ret = 0;
 793
 794        if (state == AMD_PG_STATE_GATE) {
 795                ret = vce_v3_0_stop(adev);
 796                if (ret)
 797                        goto out;
 798        } else {
 799                ret = vce_v3_0_start(adev);
 800                if (ret)
 801                        goto out;
 802        }
 803
 804out:
 805        return ret;
 806}
 807
 808static void vce_v3_0_get_clockgating_state(void *handle, u32 *flags)
 809{
 810        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 811        int data;
 812
 813        mutex_lock(&adev->pm.mutex);
 814
 815        if (adev->flags & AMD_IS_APU)
 816                data = RREG32_SMC(ixCURRENT_PG_STATUS_APU);
 817        else
 818                data = RREG32_SMC(ixCURRENT_PG_STATUS);
 819
 820        if (data & CURRENT_PG_STATUS__VCE_PG_STATUS_MASK) {
 821                DRM_INFO("Cannot get clockgating state when VCE is powergated.\n");
 822                goto out;
 823        }
 824
 825        WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
 826
 827        /* AMD_CG_SUPPORT_VCE_MGCG */
 828        data = RREG32(mmVCE_CLOCK_GATING_A);
 829        if (data & (0x04 << 4))
 830                *flags |= AMD_CG_SUPPORT_VCE_MGCG;
 831
 832out:
 833        mutex_unlock(&adev->pm.mutex);
 834}
 835
 836static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
 837                struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
 838{
 839        amdgpu_ring_write(ring, VCE_CMD_IB_VM);
 840        amdgpu_ring_write(ring, vm_id);
 841        amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
 842        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
 843        amdgpu_ring_write(ring, ib->length_dw);
 844}
 845
 846static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring,
 847                         unsigned int vm_id, uint64_t pd_addr)
 848{
 849        amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB);
 850        amdgpu_ring_write(ring, vm_id);
 851        amdgpu_ring_write(ring, pd_addr >> 12);
 852
 853        amdgpu_ring_write(ring, VCE_CMD_FLUSH_TLB);
 854        amdgpu_ring_write(ring, vm_id);
 855        amdgpu_ring_write(ring, VCE_CMD_END);
 856}
 857
 858static void vce_v3_0_emit_pipeline_sync(struct amdgpu_ring *ring)
 859{
 860        uint32_t seq = ring->fence_drv.sync_seq;
 861        uint64_t addr = ring->fence_drv.gpu_addr;
 862
 863        amdgpu_ring_write(ring, VCE_CMD_WAIT_GE);
 864        amdgpu_ring_write(ring, lower_32_bits(addr));
 865        amdgpu_ring_write(ring, upper_32_bits(addr));
 866        amdgpu_ring_write(ring, seq);
 867}
 868
 869static const struct amd_ip_funcs vce_v3_0_ip_funcs = {
 870        .name = "vce_v3_0",
 871        .early_init = vce_v3_0_early_init,
 872        .late_init = NULL,
 873        .sw_init = vce_v3_0_sw_init,
 874        .sw_fini = vce_v3_0_sw_fini,
 875        .hw_init = vce_v3_0_hw_init,
 876        .hw_fini = vce_v3_0_hw_fini,
 877        .suspend = vce_v3_0_suspend,
 878        .resume = vce_v3_0_resume,
 879        .is_idle = vce_v3_0_is_idle,
 880        .wait_for_idle = vce_v3_0_wait_for_idle,
 881        .check_soft_reset = vce_v3_0_check_soft_reset,
 882        .pre_soft_reset = vce_v3_0_pre_soft_reset,
 883        .soft_reset = vce_v3_0_soft_reset,
 884        .post_soft_reset = vce_v3_0_post_soft_reset,
 885        .set_clockgating_state = vce_v3_0_set_clockgating_state,
 886        .set_powergating_state = vce_v3_0_set_powergating_state,
 887        .get_clockgating_state = vce_v3_0_get_clockgating_state,
 888};
 889
 890static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
 891        .type = AMDGPU_RING_TYPE_VCE,
 892        .align_mask = 0xf,
 893        .nop = VCE_CMD_NO_OP,
 894        .support_64bit_ptrs = false,
 895        .get_rptr = vce_v3_0_ring_get_rptr,
 896        .get_wptr = vce_v3_0_ring_get_wptr,
 897        .set_wptr = vce_v3_0_ring_set_wptr,
 898        .parse_cs = amdgpu_vce_ring_parse_cs,
 899        .emit_frame_size =
 900                4 + /* vce_v3_0_emit_pipeline_sync */
 901                6, /* amdgpu_vce_ring_emit_fence x1 no user fence */
 902        .emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */
 903        .emit_ib = amdgpu_vce_ring_emit_ib,
 904        .emit_fence = amdgpu_vce_ring_emit_fence,
 905        .test_ring = amdgpu_vce_ring_test_ring,
 906        .test_ib = amdgpu_vce_ring_test_ib,
 907        .insert_nop = amdgpu_ring_insert_nop,
 908        .pad_ib = amdgpu_ring_generic_pad_ib,
 909        .begin_use = amdgpu_vce_ring_begin_use,
 910        .end_use = amdgpu_vce_ring_end_use,
 911};
 912
 913static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
 914        .type = AMDGPU_RING_TYPE_VCE,
 915        .align_mask = 0xf,
 916        .nop = VCE_CMD_NO_OP,
 917        .support_64bit_ptrs = false,
 918        .get_rptr = vce_v3_0_ring_get_rptr,
 919        .get_wptr = vce_v3_0_ring_get_wptr,
 920        .set_wptr = vce_v3_0_ring_set_wptr,
 921        .parse_cs = amdgpu_vce_ring_parse_cs_vm,
 922        .emit_frame_size =
 923                6 + /* vce_v3_0_emit_vm_flush */
 924                4 + /* vce_v3_0_emit_pipeline_sync */
 925                6 + 6, /* amdgpu_vce_ring_emit_fence x2 vm fence */
 926        .emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
 927        .emit_ib = vce_v3_0_ring_emit_ib,
 928        .emit_vm_flush = vce_v3_0_emit_vm_flush,
 929        .emit_pipeline_sync = vce_v3_0_emit_pipeline_sync,
 930        .emit_fence = amdgpu_vce_ring_emit_fence,
 931        .test_ring = amdgpu_vce_ring_test_ring,
 932        .test_ib = amdgpu_vce_ring_test_ib,
 933        .insert_nop = amdgpu_ring_insert_nop,
 934        .pad_ib = amdgpu_ring_generic_pad_ib,
 935        .begin_use = amdgpu_vce_ring_begin_use,
 936        .end_use = amdgpu_vce_ring_end_use,
 937};
 938
 939static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
 940{
 941        int i;
 942
 943        if (adev->asic_type >= CHIP_STONEY) {
 944                for (i = 0; i < adev->vce.num_rings; i++)
 945                        adev->vce.ring[i].funcs = &vce_v3_0_ring_vm_funcs;
 946                DRM_INFO("VCE enabled in VM mode\n");
 947        } else {
 948                for (i = 0; i < adev->vce.num_rings; i++)
 949                        adev->vce.ring[i].funcs = &vce_v3_0_ring_phys_funcs;
 950                DRM_INFO("VCE enabled in physical mode\n");
 951        }
 952}
 953
 954static const struct amdgpu_irq_src_funcs vce_v3_0_irq_funcs = {
 955        .set = vce_v3_0_set_interrupt_state,
 956        .process = vce_v3_0_process_interrupt,
 957};
 958
 959static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev)
 960{
 961        adev->vce.irq.num_types = 1;
 962        adev->vce.irq.funcs = &vce_v3_0_irq_funcs;
 963};
 964
 965const struct amdgpu_ip_block_version vce_v3_0_ip_block =
 966{
 967        .type = AMD_IP_BLOCK_TYPE_VCE,
 968        .major = 3,
 969        .minor = 0,
 970        .rev = 0,
 971        .funcs = &vce_v3_0_ip_funcs,
 972};
 973
 974const struct amdgpu_ip_block_version vce_v3_1_ip_block =
 975{
 976        .type = AMD_IP_BLOCK_TYPE_VCE,
 977        .major = 3,
 978        .minor = 1,
 979        .rev = 0,
 980        .funcs = &vce_v3_0_ip_funcs,
 981};
 982
 983const struct amdgpu_ip_block_version vce_v3_4_ip_block =
 984{
 985        .type = AMD_IP_BLOCK_TYPE_VCE,
 986        .major = 3,
 987        .minor = 4,
 988        .rev = 0,
 989        .funcs = &vce_v3_0_ip_funcs,
 990};
 991