linux/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the
   7 * "Software"), to deal in the Software without restriction, including
   8 * without limitation the rights to use, copy, modify, merge, publish,
   9 * distribute, sub license, and/or sell copies of the Software, and to
  10 * permit persons to whom the Software is furnished to do so, subject to
  11 * the following conditions:
  12 *
  13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20 *
  21 * The above copyright notice and this permission notice (including the
  22 * next paragraph) shall be included in all copies or substantial portions
  23 * of the Software.
  24 *
  25 * Authors: Christian König <christian.koenig@amd.com>
  26 */
  27
  28#include <linux/firmware.h>
  29
  30#include "amdgpu.h"
  31#include "amdgpu_vce.h"
  32#include "vid.h"
  33#include "vce/vce_3_0_d.h"
  34#include "vce/vce_3_0_sh_mask.h"
  35#include "oss/oss_3_0_d.h"
  36#include "oss/oss_3_0_sh_mask.h"
  37#include "gca/gfx_8_0_d.h"
  38#include "smu/smu_7_1_2_d.h"
  39#include "smu/smu_7_1_2_sh_mask.h"
  40#include "gca/gfx_8_0_sh_mask.h"
  41#include "ivsrcid/ivsrcid_vislands30.h"
  42
  43
  44#define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT     0x04
  45#define GRBM_GFX_INDEX__VCE_INSTANCE_MASK       0x10
  46#define GRBM_GFX_INDEX__VCE_ALL_PIPE            0x07
  47
  48#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0 0x8616
  49#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1 0x8617
  50#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2 0x8618
  51#define mmGRBM_GFX_INDEX_DEFAULT 0xE0000000
  52
  53#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK   0x02
  54
  55#define VCE_V3_0_FW_SIZE        (384 * 1024)
  56#define VCE_V3_0_STACK_SIZE     (64 * 1024)
  57#define VCE_V3_0_DATA_SIZE      ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
  58
  59#define FW_52_8_3       ((52 << 24) | (8 << 16) | (3 << 8))
  60
  61#define GET_VCE_INSTANCE(i)  ((i) << GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT \
  62                                        | GRBM_GFX_INDEX__VCE_ALL_PIPE)
  63
  64static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
  65static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
  66static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
  67static int vce_v3_0_wait_for_idle(void *handle);
  68static int vce_v3_0_set_clockgating_state(void *handle,
  69                                          enum amd_clockgating_state state);
  70/**
  71 * vce_v3_0_ring_get_rptr - get read pointer
  72 *
  73 * @ring: amdgpu_ring pointer
  74 *
  75 * Returns the current hardware read pointer
  76 */
  77static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
  78{
  79        struct amdgpu_device *adev = ring->adev;
  80        u32 v;
  81
  82        mutex_lock(&adev->grbm_idx_mutex);
  83        if (adev->vce.harvest_config == 0 ||
  84                adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
  85                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
  86        else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
  87                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
  88
  89        if (ring->me == 0)
  90                v = RREG32(mmVCE_RB_RPTR);
  91        else if (ring->me == 1)
  92                v = RREG32(mmVCE_RB_RPTR2);
  93        else
  94                v = RREG32(mmVCE_RB_RPTR3);
  95
  96        WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
  97        mutex_unlock(&adev->grbm_idx_mutex);
  98
  99        return v;
 100}
 101
 102/**
 103 * vce_v3_0_ring_get_wptr - get write pointer
 104 *
 105 * @ring: amdgpu_ring pointer
 106 *
 107 * Returns the current hardware write pointer
 108 */
 109static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
 110{
 111        struct amdgpu_device *adev = ring->adev;
 112        u32 v;
 113
 114        mutex_lock(&adev->grbm_idx_mutex);
 115        if (adev->vce.harvest_config == 0 ||
 116                adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
 117                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
 118        else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
 119                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
 120
 121        if (ring->me == 0)
 122                v = RREG32(mmVCE_RB_WPTR);
 123        else if (ring->me == 1)
 124                v = RREG32(mmVCE_RB_WPTR2);
 125        else
 126                v = RREG32(mmVCE_RB_WPTR3);
 127
 128        WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
 129        mutex_unlock(&adev->grbm_idx_mutex);
 130
 131        return v;
 132}
 133
 134/**
 135 * vce_v3_0_ring_set_wptr - set write pointer
 136 *
 137 * @ring: amdgpu_ring pointer
 138 *
 139 * Commits the write pointer to the hardware
 140 */
 141static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
 142{
 143        struct amdgpu_device *adev = ring->adev;
 144
 145        mutex_lock(&adev->grbm_idx_mutex);
 146        if (adev->vce.harvest_config == 0 ||
 147                adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
 148                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
 149        else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
 150                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
 151
 152        if (ring->me == 0)
 153                WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
 154        else if (ring->me == 1)
 155                WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
 156        else
 157                WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
 158
 159        WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
 160        mutex_unlock(&adev->grbm_idx_mutex);
 161}
 162
 163static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
 164{
 165        WREG32_FIELD(VCE_RB_ARB_CTRL, VCE_CGTT_OVERRIDE, override ? 1 : 0);
 166}
 167
 168static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
 169                                             bool gated)
 170{
 171        u32 data;
 172
 173        /* Set Override to disable Clock Gating */
 174        vce_v3_0_override_vce_clock_gating(adev, true);
 175
 176        /* This function enables MGCG which is controlled by firmware.
 177           With the clocks in the gated state the core is still
 178           accessible but the firmware will throttle the clocks on the
 179           fly as necessary.
 180        */
 181        if (!gated) {
 182                data = RREG32(mmVCE_CLOCK_GATING_B);
 183                data |= 0x1ff;
 184                data &= ~0xef0000;
 185                WREG32(mmVCE_CLOCK_GATING_B, data);
 186
 187                data = RREG32(mmVCE_UENC_CLOCK_GATING);
 188                data |= 0x3ff000;
 189                data &= ~0xffc00000;
 190                WREG32(mmVCE_UENC_CLOCK_GATING, data);
 191
 192                data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
 193                data |= 0x2;
 194                data &= ~0x00010000;
 195                WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
 196
 197                data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
 198                data |= 0x37f;
 199                WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
 200
 201                data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
 202                data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
 203                        VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
 204                        VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
 205                        0x8;
 206                WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
 207        } else {
 208                data = RREG32(mmVCE_CLOCK_GATING_B);
 209                data &= ~0x80010;
 210                data |= 0xe70008;
 211                WREG32(mmVCE_CLOCK_GATING_B, data);
 212
 213                data = RREG32(mmVCE_UENC_CLOCK_GATING);
 214                data |= 0xffc00000;
 215                WREG32(mmVCE_UENC_CLOCK_GATING, data);
 216
 217                data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
 218                data |= 0x10000;
 219                WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
 220
 221                data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
 222                data &= ~0x3ff;
 223                WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
 224
 225                data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
 226                data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
 227                          VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
 228                          VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
 229                          0x8);
 230                WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
 231        }
 232        vce_v3_0_override_vce_clock_gating(adev, false);
 233}
 234
 235static int vce_v3_0_firmware_loaded(struct amdgpu_device *adev)
 236{
 237        int i, j;
 238
 239        for (i = 0; i < 10; ++i) {
 240                for (j = 0; j < 100; ++j) {
 241                        uint32_t status = RREG32(mmVCE_STATUS);
 242
 243                        if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
 244                                return 0;
 245                        mdelay(10);
 246                }
 247
 248                DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
 249                WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
 250                mdelay(10);
 251                WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
 252                mdelay(10);
 253        }
 254
 255        return -ETIMEDOUT;
 256}
 257
 258/**
 259 * vce_v3_0_start - start VCE block
 260 *
 261 * @adev: amdgpu_device pointer
 262 *
 263 * Setup and start the VCE block
 264 */
 265static int vce_v3_0_start(struct amdgpu_device *adev)
 266{
 267        struct amdgpu_ring *ring;
 268        int idx, r;
 269
 270        mutex_lock(&adev->grbm_idx_mutex);
 271        for (idx = 0; idx < 2; ++idx) {
 272                if (adev->vce.harvest_config & (1 << idx))
 273                        continue;
 274
 275                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
 276
 277                /* Program instance 0 reg space for two instances or instance 0 case
 278                program instance 1 reg space for only instance 1 available case */
 279                if (idx != 1 || adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) {
 280                        ring = &adev->vce.ring[0];
 281                        WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
 282                        WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
 283                        WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
 284                        WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
 285                        WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
 286
 287                        ring = &adev->vce.ring[1];
 288                        WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
 289                        WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
 290                        WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
 291                        WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
 292                        WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
 293
 294                        ring = &adev->vce.ring[2];
 295                        WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr));
 296                        WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
 297                        WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr);
 298                        WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr));
 299                        WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4);
 300                }
 301
 302                vce_v3_0_mc_resume(adev, idx);
 303                WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1);
 304
 305                if (adev->asic_type >= CHIP_STONEY)
 306                        WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
 307                else
 308                        WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 1);
 309
 310                WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
 311                mdelay(100);
 312
 313                r = vce_v3_0_firmware_loaded(adev);
 314
 315                /* clear BUSY flag */
 316                WREG32_FIELD(VCE_STATUS, JOB_BUSY, 0);
 317
 318                if (r) {
 319                        DRM_ERROR("VCE not responding, giving up!!!\n");
 320                        mutex_unlock(&adev->grbm_idx_mutex);
 321                        return r;
 322                }
 323        }
 324
 325        WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
 326        mutex_unlock(&adev->grbm_idx_mutex);
 327
 328        return 0;
 329}
 330
 331static int vce_v3_0_stop(struct amdgpu_device *adev)
 332{
 333        int idx;
 334
 335        mutex_lock(&adev->grbm_idx_mutex);
 336        for (idx = 0; idx < 2; ++idx) {
 337                if (adev->vce.harvest_config & (1 << idx))
 338                        continue;
 339
 340                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
 341
 342                if (adev->asic_type >= CHIP_STONEY)
 343                        WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001);
 344                else
 345                        WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 0);
 346
 347                /* hold on ECPU */
 348                WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
 349
 350                /* clear VCE STATUS */
 351                WREG32(mmVCE_STATUS, 0);
 352        }
 353
 354        WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
 355        mutex_unlock(&adev->grbm_idx_mutex);
 356
 357        return 0;
 358}
 359
 360#define ixVCE_HARVEST_FUSE_MACRO__ADDRESS     0xC0014074
 361#define VCE_HARVEST_FUSE_MACRO__SHIFT       27
 362#define VCE_HARVEST_FUSE_MACRO__MASK        0x18000000
 363
 364static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
 365{
 366        u32 tmp;
 367
 368        if ((adev->asic_type == CHIP_FIJI) ||
 369            (adev->asic_type == CHIP_STONEY))
 370                return AMDGPU_VCE_HARVEST_VCE1;
 371
 372        if (adev->flags & AMD_IS_APU)
 373                tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) &
 374                       VCE_HARVEST_FUSE_MACRO__MASK) >>
 375                        VCE_HARVEST_FUSE_MACRO__SHIFT;
 376        else
 377                tmp = (RREG32_SMC(ixCC_HARVEST_FUSES) &
 378                       CC_HARVEST_FUSES__VCE_DISABLE_MASK) >>
 379                        CC_HARVEST_FUSES__VCE_DISABLE__SHIFT;
 380
 381        switch (tmp) {
 382        case 1:
 383                return AMDGPU_VCE_HARVEST_VCE0;
 384        case 2:
 385                return AMDGPU_VCE_HARVEST_VCE1;
 386        case 3:
 387                return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1;
 388        default:
 389                if ((adev->asic_type == CHIP_POLARIS10) ||
 390                    (adev->asic_type == CHIP_POLARIS11) ||
 391                    (adev->asic_type == CHIP_POLARIS12) ||
 392                    (adev->asic_type == CHIP_VEGAM))
 393                        return AMDGPU_VCE_HARVEST_VCE1;
 394
 395                return 0;
 396        }
 397}
 398
 399static int vce_v3_0_early_init(void *handle)
 400{
 401        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 402
 403        adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
 404
 405        if ((adev->vce.harvest_config &
 406             (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) ==
 407            (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
 408                return -ENOENT;
 409
 410        adev->vce.num_rings = 3;
 411
 412        vce_v3_0_set_ring_funcs(adev);
 413        vce_v3_0_set_irq_funcs(adev);
 414
 415        return 0;
 416}
 417
 418static int vce_v3_0_sw_init(void *handle)
 419{
 420        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 421        struct amdgpu_ring *ring;
 422        int r, i;
 423
 424        /* VCE */
 425        r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_VCE_TRAP, &adev->vce.irq);
 426        if (r)
 427                return r;
 428
 429        r = amdgpu_vce_sw_init(adev, VCE_V3_0_FW_SIZE +
 430                (VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE) * 2);
 431        if (r)
 432                return r;
 433
 434        /* 52.8.3 required for 3 ring support */
 435        if (adev->vce.fw_version < FW_52_8_3)
 436                adev->vce.num_rings = 2;
 437
 438        r = amdgpu_vce_resume(adev);
 439        if (r)
 440                return r;
 441
 442        for (i = 0; i < adev->vce.num_rings; i++) {
 443                ring = &adev->vce.ring[i];
 444                sprintf(ring->name, "vce%d", i);
 445                r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
 446                if (r)
 447                        return r;
 448        }
 449
 450        r = amdgpu_vce_entity_init(adev);
 451
 452        return r;
 453}
 454
 455static int vce_v3_0_sw_fini(void *handle)
 456{
 457        int r;
 458        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 459
 460        r = amdgpu_vce_suspend(adev);
 461        if (r)
 462                return r;
 463
 464        return amdgpu_vce_sw_fini(adev);
 465}
 466
 467static int vce_v3_0_hw_init(void *handle)
 468{
 469        int r, i;
 470        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 471
 472        vce_v3_0_override_vce_clock_gating(adev, true);
 473
 474        amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
 475
 476        for (i = 0; i < adev->vce.num_rings; i++) {
 477                r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
 478                if (r)
 479                        return r;
 480        }
 481
 482        DRM_INFO("VCE initialized successfully.\n");
 483
 484        return 0;
 485}
 486
 487static int vce_v3_0_hw_fini(void *handle)
 488{
 489        int r;
 490        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 491
 492        r = vce_v3_0_wait_for_idle(handle);
 493        if (r)
 494                return r;
 495
 496        vce_v3_0_stop(adev);
 497        return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE);
 498}
 499
 500static int vce_v3_0_suspend(void *handle)
 501{
 502        int r;
 503        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 504
 505        r = vce_v3_0_hw_fini(adev);
 506        if (r)
 507                return r;
 508
 509        return amdgpu_vce_suspend(adev);
 510}
 511
 512static int vce_v3_0_resume(void *handle)
 513{
 514        int r;
 515        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 516
 517        r = amdgpu_vce_resume(adev);
 518        if (r)
 519                return r;
 520
 521        return vce_v3_0_hw_init(adev);
 522}
 523
 524static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
 525{
 526        uint32_t offset, size;
 527
 528        WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
 529        WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
 530        WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
 531        WREG32(mmVCE_CLOCK_GATING_B, 0x1FF);
 532
 533        WREG32(mmVCE_LMI_CTRL, 0x00398000);
 534        WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
 535        WREG32(mmVCE_LMI_SWAP_CNTL, 0);
 536        WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
 537        WREG32(mmVCE_LMI_VM_CTRL, 0);
 538        WREG32_OR(mmVCE_VCPU_CNTL, 0x00100000);
 539
 540        if (adev->asic_type >= CHIP_STONEY) {
 541                WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
 542                WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
 543                WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
 544        } else
 545                WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
 546        offset = AMDGPU_VCE_FIRMWARE_OFFSET;
 547        size = VCE_V3_0_FW_SIZE;
 548        WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
 549        WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
 550
 551        if (idx == 0) {
 552                offset += size;
 553                size = VCE_V3_0_STACK_SIZE;
 554                WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
 555                WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
 556                offset += size;
 557                size = VCE_V3_0_DATA_SIZE;
 558                WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
 559                WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
 560        } else {
 561                offset += size + VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE;
 562                size = VCE_V3_0_STACK_SIZE;
 563                WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0xfffffff);
 564                WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
 565                offset += size;
 566                size = VCE_V3_0_DATA_SIZE;
 567                WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0xfffffff);
 568                WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
 569        }
 570
 571        WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
 572        WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
 573}
 574
 575static bool vce_v3_0_is_idle(void *handle)
 576{
 577        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 578        u32 mask = 0;
 579
 580        mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
 581        mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
 582
 583        return !(RREG32(mmSRBM_STATUS2) & mask);
 584}
 585
 586static int vce_v3_0_wait_for_idle(void *handle)
 587{
 588        unsigned i;
 589        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 590
 591        for (i = 0; i < adev->usec_timeout; i++)
 592                if (vce_v3_0_is_idle(handle))
 593                        return 0;
 594
 595        return -ETIMEDOUT;
 596}
 597
 598#define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
 599#define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
 600#define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
 601#define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
 602                                      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
 603
 604static bool vce_v3_0_check_soft_reset(void *handle)
 605{
 606        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 607        u32 srbm_soft_reset = 0;
 608
 609        /* According to VCE team , we should use VCE_STATUS instead
 610         * SRBM_STATUS.VCE_BUSY bit for busy status checking.
 611         * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
 612         * instance's registers are accessed
 613         * (0 for 1st instance, 10 for 2nd instance).
 614         *
 615         *VCE_STATUS
 616         *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
 617         *|----+----+-----------+----+----+----+----------+---------+----|
 618         *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
 619         *
 620         * VCE team suggest use bit 3--bit 6 for busy status check
 621         */
 622        mutex_lock(&adev->grbm_idx_mutex);
 623        WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
 624        if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
 625                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
 626                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
 627        }
 628        WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
 629        if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
 630                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
 631                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
 632        }
 633        WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
 634        mutex_unlock(&adev->grbm_idx_mutex);
 635
 636        if (srbm_soft_reset) {
 637                adev->vce.srbm_soft_reset = srbm_soft_reset;
 638                return true;
 639        } else {
 640                adev->vce.srbm_soft_reset = 0;
 641                return false;
 642        }
 643}
 644
 645static int vce_v3_0_soft_reset(void *handle)
 646{
 647        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 648        u32 srbm_soft_reset;
 649
 650        if (!adev->vce.srbm_soft_reset)
 651                return 0;
 652        srbm_soft_reset = adev->vce.srbm_soft_reset;
 653
 654        if (srbm_soft_reset) {
 655                u32 tmp;
 656
 657                tmp = RREG32(mmSRBM_SOFT_RESET);
 658                tmp |= srbm_soft_reset;
 659                dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
 660                WREG32(mmSRBM_SOFT_RESET, tmp);
 661                tmp = RREG32(mmSRBM_SOFT_RESET);
 662
 663                udelay(50);
 664
 665                tmp &= ~srbm_soft_reset;
 666                WREG32(mmSRBM_SOFT_RESET, tmp);
 667                tmp = RREG32(mmSRBM_SOFT_RESET);
 668
 669                /* Wait a little for things to settle down */
 670                udelay(50);
 671        }
 672
 673        return 0;
 674}
 675
 676static int vce_v3_0_pre_soft_reset(void *handle)
 677{
 678        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 679
 680        if (!adev->vce.srbm_soft_reset)
 681                return 0;
 682
 683        mdelay(5);
 684
 685        return vce_v3_0_suspend(adev);
 686}
 687
 688
 689static int vce_v3_0_post_soft_reset(void *handle)
 690{
 691        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 692
 693        if (!adev->vce.srbm_soft_reset)
 694                return 0;
 695
 696        mdelay(5);
 697
 698        return vce_v3_0_resume(adev);
 699}
 700
 701static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
 702                                        struct amdgpu_irq_src *source,
 703                                        unsigned type,
 704                                        enum amdgpu_interrupt_state state)
 705{
 706        uint32_t val = 0;
 707
 708        if (state == AMDGPU_IRQ_STATE_ENABLE)
 709                val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
 710
 711        WREG32_P(mmVCE_SYS_INT_EN, val, ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
 712        return 0;
 713}
 714
 715static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
 716                                      struct amdgpu_irq_src *source,
 717                                      struct amdgpu_iv_entry *entry)
 718{
 719        DRM_DEBUG("IH: VCE\n");
 720
 721        WREG32_FIELD(VCE_SYS_INT_STATUS, VCE_SYS_INT_TRAP_INTERRUPT_INT, 1);
 722
 723        switch (entry->src_data[0]) {
 724        case 0:
 725        case 1:
 726        case 2:
 727                amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
 728                break;
 729        default:
 730                DRM_ERROR("Unhandled interrupt: %d %d\n",
 731                          entry->src_id, entry->src_data[0]);
 732                break;
 733        }
 734
 735        return 0;
 736}
 737
 738static int vce_v3_0_set_clockgating_state(void *handle,
 739                                          enum amd_clockgating_state state)
 740{
 741        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 742        bool enable = (state == AMD_CG_STATE_GATE);
 743        int i;
 744
 745        if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
 746                return 0;
 747
 748        mutex_lock(&adev->grbm_idx_mutex);
 749        for (i = 0; i < 2; i++) {
 750                /* Program VCE Instance 0 or 1 if not harvested */
 751                if (adev->vce.harvest_config & (1 << i))
 752                        continue;
 753
 754                WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(i));
 755
 756                if (!enable) {
 757                        /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
 758                        uint32_t data = RREG32(mmVCE_CLOCK_GATING_A);
 759                        data &= ~(0xf | 0xff0);
 760                        data |= ((0x0 << 0) | (0x04 << 4));
 761                        WREG32(mmVCE_CLOCK_GATING_A, data);
 762
 763                        /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
 764                        data = RREG32(mmVCE_UENC_CLOCK_GATING);
 765                        data &= ~(0xf | 0xff0);
 766                        data |= ((0x0 << 0) | (0x04 << 4));
 767                        WREG32(mmVCE_UENC_CLOCK_GATING, data);
 768                }
 769
 770                vce_v3_0_set_vce_sw_clock_gating(adev, enable);
 771        }
 772
 773        WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
 774        mutex_unlock(&adev->grbm_idx_mutex);
 775
 776        return 0;
 777}
 778
 779static int vce_v3_0_set_powergating_state(void *handle,
 780                                          enum amd_powergating_state state)
 781{
 782        /* This doesn't actually powergate the VCE block.
 783         * That's done in the dpm code via the SMC.  This
 784         * just re-inits the block as necessary.  The actual
 785         * gating still happens in the dpm code.  We should
 786         * revisit this when there is a cleaner line between
 787         * the smc and the hw blocks
 788         */
 789        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 790        int ret = 0;
 791
 792        if (state == AMD_PG_STATE_GATE) {
 793                ret = vce_v3_0_stop(adev);
 794                if (ret)
 795                        goto out;
 796        } else {
 797                ret = vce_v3_0_start(adev);
 798                if (ret)
 799                        goto out;
 800        }
 801
 802out:
 803        return ret;
 804}
 805
 806static void vce_v3_0_get_clockgating_state(void *handle, u32 *flags)
 807{
 808        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 809        int data;
 810
 811        mutex_lock(&adev->pm.mutex);
 812
 813        if (adev->flags & AMD_IS_APU)
 814                data = RREG32_SMC(ixCURRENT_PG_STATUS_APU);
 815        else
 816                data = RREG32_SMC(ixCURRENT_PG_STATUS);
 817
 818        if (data & CURRENT_PG_STATUS__VCE_PG_STATUS_MASK) {
 819                DRM_INFO("Cannot get clockgating state when VCE is powergated.\n");
 820                goto out;
 821        }
 822
 823        WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
 824
 825        /* AMD_CG_SUPPORT_VCE_MGCG */
 826        data = RREG32(mmVCE_CLOCK_GATING_A);
 827        if (data & (0x04 << 4))
 828                *flags |= AMD_CG_SUPPORT_VCE_MGCG;
 829
 830out:
 831        mutex_unlock(&adev->pm.mutex);
 832}
 833
 834static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
 835                                  struct amdgpu_job *job,
 836                                  struct amdgpu_ib *ib,
 837                                  uint32_t flags)
 838{
 839        unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 840
 841        amdgpu_ring_write(ring, VCE_CMD_IB_VM);
 842        amdgpu_ring_write(ring, vmid);
 843        amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
 844        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
 845        amdgpu_ring_write(ring, ib->length_dw);
 846}
 847
 848static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring,
 849                                   unsigned int vmid, uint64_t pd_addr)
 850{
 851        amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB);
 852        amdgpu_ring_write(ring, vmid);
 853        amdgpu_ring_write(ring, pd_addr >> 12);
 854
 855        amdgpu_ring_write(ring, VCE_CMD_FLUSH_TLB);
 856        amdgpu_ring_write(ring, vmid);
 857        amdgpu_ring_write(ring, VCE_CMD_END);
 858}
 859
 860static void vce_v3_0_emit_pipeline_sync(struct amdgpu_ring *ring)
 861{
 862        uint32_t seq = ring->fence_drv.sync_seq;
 863        uint64_t addr = ring->fence_drv.gpu_addr;
 864
 865        amdgpu_ring_write(ring, VCE_CMD_WAIT_GE);
 866        amdgpu_ring_write(ring, lower_32_bits(addr));
 867        amdgpu_ring_write(ring, upper_32_bits(addr));
 868        amdgpu_ring_write(ring, seq);
 869}
 870
 871static const struct amd_ip_funcs vce_v3_0_ip_funcs = {
 872        .name = "vce_v3_0",
 873        .early_init = vce_v3_0_early_init,
 874        .late_init = NULL,
 875        .sw_init = vce_v3_0_sw_init,
 876        .sw_fini = vce_v3_0_sw_fini,
 877        .hw_init = vce_v3_0_hw_init,
 878        .hw_fini = vce_v3_0_hw_fini,
 879        .suspend = vce_v3_0_suspend,
 880        .resume = vce_v3_0_resume,
 881        .is_idle = vce_v3_0_is_idle,
 882        .wait_for_idle = vce_v3_0_wait_for_idle,
 883        .check_soft_reset = vce_v3_0_check_soft_reset,
 884        .pre_soft_reset = vce_v3_0_pre_soft_reset,
 885        .soft_reset = vce_v3_0_soft_reset,
 886        .post_soft_reset = vce_v3_0_post_soft_reset,
 887        .set_clockgating_state = vce_v3_0_set_clockgating_state,
 888        .set_powergating_state = vce_v3_0_set_powergating_state,
 889        .get_clockgating_state = vce_v3_0_get_clockgating_state,
 890};
 891
 892static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
 893        .type = AMDGPU_RING_TYPE_VCE,
 894        .align_mask = 0xf,
 895        .nop = VCE_CMD_NO_OP,
 896        .support_64bit_ptrs = false,
 897        .no_user_fence = true,
 898        .get_rptr = vce_v3_0_ring_get_rptr,
 899        .get_wptr = vce_v3_0_ring_get_wptr,
 900        .set_wptr = vce_v3_0_ring_set_wptr,
 901        .parse_cs = amdgpu_vce_ring_parse_cs,
 902        .emit_frame_size =
 903                4 + /* vce_v3_0_emit_pipeline_sync */
 904                6, /* amdgpu_vce_ring_emit_fence x1 no user fence */
 905        .emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
 906        .emit_ib = amdgpu_vce_ring_emit_ib,
 907        .emit_fence = amdgpu_vce_ring_emit_fence,
 908        .test_ring = amdgpu_vce_ring_test_ring,
 909        .test_ib = amdgpu_vce_ring_test_ib,
 910        .insert_nop = amdgpu_ring_insert_nop,
 911        .pad_ib = amdgpu_ring_generic_pad_ib,
 912        .begin_use = amdgpu_vce_ring_begin_use,
 913        .end_use = amdgpu_vce_ring_end_use,
 914};
 915
 916static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
 917        .type = AMDGPU_RING_TYPE_VCE,
 918        .align_mask = 0xf,
 919        .nop = VCE_CMD_NO_OP,
 920        .support_64bit_ptrs = false,
 921        .no_user_fence = true,
 922        .get_rptr = vce_v3_0_ring_get_rptr,
 923        .get_wptr = vce_v3_0_ring_get_wptr,
 924        .set_wptr = vce_v3_0_ring_set_wptr,
 925        .parse_cs = amdgpu_vce_ring_parse_cs_vm,
 926        .emit_frame_size =
 927                6 + /* vce_v3_0_emit_vm_flush */
 928                4 + /* vce_v3_0_emit_pipeline_sync */
 929                6 + 6, /* amdgpu_vce_ring_emit_fence x2 vm fence */
 930        .emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */
 931        .emit_ib = vce_v3_0_ring_emit_ib,
 932        .emit_vm_flush = vce_v3_0_emit_vm_flush,
 933        .emit_pipeline_sync = vce_v3_0_emit_pipeline_sync,
 934        .emit_fence = amdgpu_vce_ring_emit_fence,
 935        .test_ring = amdgpu_vce_ring_test_ring,
 936        .test_ib = amdgpu_vce_ring_test_ib,
 937        .insert_nop = amdgpu_ring_insert_nop,
 938        .pad_ib = amdgpu_ring_generic_pad_ib,
 939        .begin_use = amdgpu_vce_ring_begin_use,
 940        .end_use = amdgpu_vce_ring_end_use,
 941};
 942
 943static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
 944{
 945        int i;
 946
 947        if (adev->asic_type >= CHIP_STONEY) {
 948                for (i = 0; i < adev->vce.num_rings; i++) {
 949                        adev->vce.ring[i].funcs = &vce_v3_0_ring_vm_funcs;
 950                        adev->vce.ring[i].me = i;
 951                }
 952                DRM_INFO("VCE enabled in VM mode\n");
 953        } else {
 954                for (i = 0; i < adev->vce.num_rings; i++) {
 955                        adev->vce.ring[i].funcs = &vce_v3_0_ring_phys_funcs;
 956                        adev->vce.ring[i].me = i;
 957                }
 958                DRM_INFO("VCE enabled in physical mode\n");
 959        }
 960}
 961
 962static const struct amdgpu_irq_src_funcs vce_v3_0_irq_funcs = {
 963        .set = vce_v3_0_set_interrupt_state,
 964        .process = vce_v3_0_process_interrupt,
 965};
 966
 967static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev)
 968{
 969        adev->vce.irq.num_types = 1;
 970        adev->vce.irq.funcs = &vce_v3_0_irq_funcs;
 971};
 972
 973const struct amdgpu_ip_block_version vce_v3_0_ip_block =
 974{
 975        .type = AMD_IP_BLOCK_TYPE_VCE,
 976        .major = 3,
 977        .minor = 0,
 978        .rev = 0,
 979        .funcs = &vce_v3_0_ip_funcs,
 980};
 981
 982const struct amdgpu_ip_block_version vce_v3_1_ip_block =
 983{
 984        .type = AMD_IP_BLOCK_TYPE_VCE,
 985        .major = 3,
 986        .minor = 1,
 987        .rev = 0,
 988        .funcs = &vce_v3_0_ip_funcs,
 989};
 990
 991const struct amdgpu_ip_block_version vce_v3_4_ip_block =
 992{
 993        .type = AMD_IP_BLOCK_TYPE_VCE,
 994        .major = 3,
 995        .minor = 4,
 996        .rev = 0,
 997        .funcs = &vce_v3_0_ip_funcs,
 998};
 999