linux/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
<<
>>
Prefs
   1/*
   2 * Copyright 2011 Advanced Micro Devices, Inc.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the
   7 * "Software"), to deal in the Software without restriction, including
   8 * without limitation the rights to use, copy, modify, merge, publish,
   9 * distribute, sub license, and/or sell copies of the Software, and to
  10 * permit persons to whom the Software is furnished to do so, subject to
  11 * the following conditions:
  12 *
  13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20 *
  21 * The above copyright notice and this permission notice (including the
  22 * next paragraph) shall be included in all copies or substantial portions
  23 * of the Software.
  24 *
  25 */
  26/*
  27 * Authors:
  28 *    Christian König <deathsimple@vodafone.de>
  29 */
  30
  31#include <linux/firmware.h>
  32#include <linux/module.h>
  33
  34#include <drm/drm.h>
  35#include <drm/drm_drv.h>
  36
  37#include "amdgpu.h"
  38#include "amdgpu_pm.h"
  39#include "amdgpu_uvd.h"
  40#include "cikd.h"
  41#include "uvd/uvd_4_2_d.h"
  42
  43#include "amdgpu_ras.h"
  44
  45/* 1 second timeout */
  46#define UVD_IDLE_TIMEOUT        msecs_to_jiffies(1000)
  47
  48/* Firmware versions for VI */
  49#define FW_1_65_10      ((1 << 24) | (65 << 16) | (10 << 8))
  50#define FW_1_87_11      ((1 << 24) | (87 << 16) | (11 << 8))
  51#define FW_1_87_12      ((1 << 24) | (87 << 16) | (12 << 8))
  52#define FW_1_37_15      ((1 << 24) | (37 << 16) | (15 << 8))
  53
  54/* Polaris10/11 firmware version */
  55#define FW_1_66_16      ((1 << 24) | (66 << 16) | (16 << 8))
  56
  57/* Firmware Names */
  58#ifdef CONFIG_DRM_AMDGPU_SI
  59#define FIRMWARE_TAHITI         "amdgpu/tahiti_uvd.bin"
  60#define FIRMWARE_VERDE          "amdgpu/verde_uvd.bin"
  61#define FIRMWARE_PITCAIRN       "amdgpu/pitcairn_uvd.bin"
  62#define FIRMWARE_OLAND          "amdgpu/oland_uvd.bin"
  63#endif
  64#ifdef CONFIG_DRM_AMDGPU_CIK
  65#define FIRMWARE_BONAIRE        "amdgpu/bonaire_uvd.bin"
  66#define FIRMWARE_KABINI "amdgpu/kabini_uvd.bin"
  67#define FIRMWARE_KAVERI "amdgpu/kaveri_uvd.bin"
  68#define FIRMWARE_HAWAII "amdgpu/hawaii_uvd.bin"
  69#define FIRMWARE_MULLINS        "amdgpu/mullins_uvd.bin"
  70#endif
  71#define FIRMWARE_TONGA          "amdgpu/tonga_uvd.bin"
  72#define FIRMWARE_CARRIZO        "amdgpu/carrizo_uvd.bin"
  73#define FIRMWARE_FIJI           "amdgpu/fiji_uvd.bin"
  74#define FIRMWARE_STONEY         "amdgpu/stoney_uvd.bin"
  75#define FIRMWARE_POLARIS10      "amdgpu/polaris10_uvd.bin"
  76#define FIRMWARE_POLARIS11      "amdgpu/polaris11_uvd.bin"
  77#define FIRMWARE_POLARIS12      "amdgpu/polaris12_uvd.bin"
  78#define FIRMWARE_VEGAM          "amdgpu/vegam_uvd.bin"
  79
  80#define FIRMWARE_VEGA10         "amdgpu/vega10_uvd.bin"
  81#define FIRMWARE_VEGA12         "amdgpu/vega12_uvd.bin"
  82#define FIRMWARE_VEGA20         "amdgpu/vega20_uvd.bin"
  83
  84/* These are common relative offsets for all asics, from uvd_7_0_offset.h,  */
  85#define UVD_GPCOM_VCPU_CMD              0x03c3
  86#define UVD_GPCOM_VCPU_DATA0    0x03c4
  87#define UVD_GPCOM_VCPU_DATA1    0x03c5
  88#define UVD_NO_OP                               0x03ff
  89#define UVD_BASE_SI                             0x3800
  90
  91/*
  92 * amdgpu_uvd_cs_ctx - Command submission parser context
  93 *
  94 * Used for emulating virtual memory support on UVD 4.2.
  95 */
  96struct amdgpu_uvd_cs_ctx {
  97        struct amdgpu_cs_parser *parser;
  98        unsigned reg, count;
  99        unsigned data0, data1;
 100        unsigned idx;
 101        unsigned ib_idx;
 102
 103        /* does the IB has a msg command */
 104        bool has_msg_cmd;
 105
 106        /* minimum buffer sizes */
 107        unsigned *buf_sizes;
 108};
 109
 110#ifdef CONFIG_DRM_AMDGPU_SI
 111MODULE_FIRMWARE(FIRMWARE_TAHITI);
 112MODULE_FIRMWARE(FIRMWARE_VERDE);
 113MODULE_FIRMWARE(FIRMWARE_PITCAIRN);
 114MODULE_FIRMWARE(FIRMWARE_OLAND);
 115#endif
 116#ifdef CONFIG_DRM_AMDGPU_CIK
 117MODULE_FIRMWARE(FIRMWARE_BONAIRE);
 118MODULE_FIRMWARE(FIRMWARE_KABINI);
 119MODULE_FIRMWARE(FIRMWARE_KAVERI);
 120MODULE_FIRMWARE(FIRMWARE_HAWAII);
 121MODULE_FIRMWARE(FIRMWARE_MULLINS);
 122#endif
 123MODULE_FIRMWARE(FIRMWARE_TONGA);
 124MODULE_FIRMWARE(FIRMWARE_CARRIZO);
 125MODULE_FIRMWARE(FIRMWARE_FIJI);
 126MODULE_FIRMWARE(FIRMWARE_STONEY);
 127MODULE_FIRMWARE(FIRMWARE_POLARIS10);
 128MODULE_FIRMWARE(FIRMWARE_POLARIS11);
 129MODULE_FIRMWARE(FIRMWARE_POLARIS12);
 130MODULE_FIRMWARE(FIRMWARE_VEGAM);
 131
 132MODULE_FIRMWARE(FIRMWARE_VEGA10);
 133MODULE_FIRMWARE(FIRMWARE_VEGA12);
 134MODULE_FIRMWARE(FIRMWARE_VEGA20);
 135
 136static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
 137static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo);
 138
 139static int amdgpu_uvd_create_msg_bo_helper(struct amdgpu_device *adev,
 140                                           uint32_t size,
 141                                           struct amdgpu_bo **bo_ptr)
 142{
 143        struct ttm_operation_ctx ctx = { true, false };
 144        struct amdgpu_bo *bo = NULL;
 145        void *addr;
 146        int r;
 147
 148        r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,
 149                                      AMDGPU_GEM_DOMAIN_GTT,
 150                                      &bo, NULL, &addr);
 151        if (r)
 152                return r;
 153
 154        if (adev->uvd.address_64_bit)
 155                goto succ;
 156
 157        amdgpu_bo_kunmap(bo);
 158        amdgpu_bo_unpin(bo);
 159        amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
 160        amdgpu_uvd_force_into_uvd_segment(bo);
 161        r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 162        if (r)
 163                goto err;
 164        r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_VRAM);
 165        if (r)
 166                goto err_pin;
 167        r = amdgpu_bo_kmap(bo, &addr);
 168        if (r)
 169                goto err_kmap;
 170succ:
 171        amdgpu_bo_unreserve(bo);
 172        *bo_ptr = bo;
 173        return 0;
 174err_kmap:
 175        amdgpu_bo_unpin(bo);
 176err_pin:
 177err:
 178        amdgpu_bo_unreserve(bo);
 179        amdgpu_bo_unref(&bo);
 180        return r;
 181}
 182
 183int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 184{
 185        unsigned long bo_size;
 186        const char *fw_name;
 187        const struct common_firmware_header *hdr;
 188        unsigned family_id;
 189        int i, j, r;
 190
 191        INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
 192
 193        switch (adev->asic_type) {
 194#ifdef CONFIG_DRM_AMDGPU_SI
 195        case CHIP_TAHITI:
 196                fw_name = FIRMWARE_TAHITI;
 197                break;
 198        case CHIP_VERDE:
 199                fw_name = FIRMWARE_VERDE;
 200                break;
 201        case CHIP_PITCAIRN:
 202                fw_name = FIRMWARE_PITCAIRN;
 203                break;
 204        case CHIP_OLAND:
 205                fw_name = FIRMWARE_OLAND;
 206                break;
 207#endif
 208#ifdef CONFIG_DRM_AMDGPU_CIK
 209        case CHIP_BONAIRE:
 210                fw_name = FIRMWARE_BONAIRE;
 211                break;
 212        case CHIP_KABINI:
 213                fw_name = FIRMWARE_KABINI;
 214                break;
 215        case CHIP_KAVERI:
 216                fw_name = FIRMWARE_KAVERI;
 217                break;
 218        case CHIP_HAWAII:
 219                fw_name = FIRMWARE_HAWAII;
 220                break;
 221        case CHIP_MULLINS:
 222                fw_name = FIRMWARE_MULLINS;
 223                break;
 224#endif
 225        case CHIP_TONGA:
 226                fw_name = FIRMWARE_TONGA;
 227                break;
 228        case CHIP_FIJI:
 229                fw_name = FIRMWARE_FIJI;
 230                break;
 231        case CHIP_CARRIZO:
 232                fw_name = FIRMWARE_CARRIZO;
 233                break;
 234        case CHIP_STONEY:
 235                fw_name = FIRMWARE_STONEY;
 236                break;
 237        case CHIP_POLARIS10:
 238                fw_name = FIRMWARE_POLARIS10;
 239                break;
 240        case CHIP_POLARIS11:
 241                fw_name = FIRMWARE_POLARIS11;
 242                break;
 243        case CHIP_POLARIS12:
 244                fw_name = FIRMWARE_POLARIS12;
 245                break;
 246        case CHIP_VEGA10:
 247                fw_name = FIRMWARE_VEGA10;
 248                break;
 249        case CHIP_VEGA12:
 250                fw_name = FIRMWARE_VEGA12;
 251                break;
 252        case CHIP_VEGAM:
 253                fw_name = FIRMWARE_VEGAM;
 254                break;
 255        case CHIP_VEGA20:
 256                fw_name = FIRMWARE_VEGA20;
 257                break;
 258        default:
 259                return -EINVAL;
 260        }
 261
 262        r = request_firmware(&adev->uvd.fw, fw_name, adev->dev);
 263        if (r) {
 264                dev_err(adev->dev, "amdgpu_uvd: Can't load firmware \"%s\"\n",
 265                        fw_name);
 266                return r;
 267        }
 268
 269        r = amdgpu_ucode_validate(adev->uvd.fw);
 270        if (r) {
 271                dev_err(adev->dev, "amdgpu_uvd: Can't validate firmware \"%s\"\n",
 272                        fw_name);
 273                release_firmware(adev->uvd.fw);
 274                adev->uvd.fw = NULL;
 275                return r;
 276        }
 277
 278        /* Set the default UVD handles that the firmware can handle */
 279        adev->uvd.max_handles = AMDGPU_DEFAULT_UVD_HANDLES;
 280
 281        hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
 282        family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
 283
 284        if (adev->asic_type < CHIP_VEGA20) {
 285                unsigned version_major, version_minor;
 286
 287                version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
 288                version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
 289                DRM_INFO("Found UVD firmware Version: %u.%u Family ID: %u\n",
 290                        version_major, version_minor, family_id);
 291
 292                /*
 293                 * Limit the number of UVD handles depending on microcode major
 294                 * and minor versions. The firmware version which has 40 UVD
 295                 * instances support is 1.80. So all subsequent versions should
 296                 * also have the same support.
 297                 */
 298                if ((version_major > 0x01) ||
 299                    ((version_major == 0x01) && (version_minor >= 0x50)))
 300                        adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES;
 301
 302                adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) |
 303                                        (family_id << 8));
 304
 305                if ((adev->asic_type == CHIP_POLARIS10 ||
 306                     adev->asic_type == CHIP_POLARIS11) &&
 307                    (adev->uvd.fw_version < FW_1_66_16))
 308                        DRM_ERROR("POLARIS10/11 UVD firmware version %u.%u is too old.\n",
 309                                  version_major, version_minor);
 310        } else {
 311                unsigned int enc_major, enc_minor, dec_minor;
 312
 313                dec_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
 314                enc_minor = (le32_to_cpu(hdr->ucode_version) >> 24) & 0x3f;
 315                enc_major = (le32_to_cpu(hdr->ucode_version) >> 30) & 0x3;
 316                DRM_INFO("Found UVD firmware ENC: %u.%u DEC: .%u Family ID: %u\n",
 317                        enc_major, enc_minor, dec_minor, family_id);
 318
 319                adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES;
 320
 321                adev->uvd.fw_version = le32_to_cpu(hdr->ucode_version);
 322        }
 323
 324        bo_size = AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE
 325                  +  AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles;
 326        if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
 327                bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
 328
 329        for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
 330                if (adev->uvd.harvest_config & (1 << j))
 331                        continue;
 332                r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
 333                                            AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst[j].vcpu_bo,
 334                                            &adev->uvd.inst[j].gpu_addr, &adev->uvd.inst[j].cpu_addr);
 335                if (r) {
 336                        dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
 337                        return r;
 338                }
 339        }
 340
 341        for (i = 0; i < adev->uvd.max_handles; ++i) {
 342                atomic_set(&adev->uvd.handles[i], 0);
 343                adev->uvd.filp[i] = NULL;
 344        }
 345
 346        /* from uvd v5.0 HW addressing capacity increased to 64 bits */
 347        if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
 348                adev->uvd.address_64_bit = true;
 349
 350        r = amdgpu_uvd_create_msg_bo_helper(adev, 128 << 10, &adev->uvd.ib_bo);
 351        if (r)
 352                return r;
 353
 354        switch (adev->asic_type) {
 355        case CHIP_TONGA:
 356                adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_65_10;
 357                break;
 358        case CHIP_CARRIZO:
 359                adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_87_11;
 360                break;
 361        case CHIP_FIJI:
 362                adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_87_12;
 363                break;
 364        case CHIP_STONEY:
 365                adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_37_15;
 366                break;
 367        default:
 368                adev->uvd.use_ctx_buf = adev->asic_type >= CHIP_POLARIS10;
 369        }
 370
 371        return 0;
 372}
 373
 374int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
 375{
 376        void *addr = amdgpu_bo_kptr(adev->uvd.ib_bo);
 377        int i, j;
 378
 379        drm_sched_entity_destroy(&adev->uvd.entity);
 380
 381        for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
 382                if (adev->uvd.harvest_config & (1 << j))
 383                        continue;
 384                kvfree(adev->uvd.inst[j].saved_bo);
 385
 386                amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo,
 387                                      &adev->uvd.inst[j].gpu_addr,
 388                                      (void **)&adev->uvd.inst[j].cpu_addr);
 389
 390                amdgpu_ring_fini(&adev->uvd.inst[j].ring);
 391
 392                for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
 393                        amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
 394        }
 395        amdgpu_bo_free_kernel(&adev->uvd.ib_bo, NULL, &addr);
 396        release_firmware(adev->uvd.fw);
 397
 398        return 0;
 399}
 400
 401/**
 402 * amdgpu_uvd_entity_init - init entity
 403 *
 404 * @adev: amdgpu_device pointer
 405 *
 406 */
 407int amdgpu_uvd_entity_init(struct amdgpu_device *adev)
 408{
 409        struct amdgpu_ring *ring;
 410        struct drm_gpu_scheduler *sched;
 411        int r;
 412
 413        ring = &adev->uvd.inst[0].ring;
 414        sched = &ring->sched;
 415        r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL,
 416                                  &sched, 1, NULL);
 417        if (r) {
 418                DRM_ERROR("Failed setting up UVD kernel entity.\n");
 419                return r;
 420        }
 421
 422        return 0;
 423}
 424
 425int amdgpu_uvd_suspend(struct amdgpu_device *adev)
 426{
 427        unsigned size;
 428        void *ptr;
 429        int i, j, idx;
 430        bool in_ras_intr = amdgpu_ras_intr_triggered();
 431
 432        cancel_delayed_work_sync(&adev->uvd.idle_work);
 433
 434        /* only valid for physical mode */
 435        if (adev->asic_type < CHIP_POLARIS10) {
 436                for (i = 0; i < adev->uvd.max_handles; ++i)
 437                        if (atomic_read(&adev->uvd.handles[i]))
 438                                break;
 439
 440                if (i == adev->uvd.max_handles)
 441                        return 0;
 442        }
 443
 444        for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
 445                if (adev->uvd.harvest_config & (1 << j))
 446                        continue;
 447                if (adev->uvd.inst[j].vcpu_bo == NULL)
 448                        continue;
 449
 450                size = amdgpu_bo_size(adev->uvd.inst[j].vcpu_bo);
 451                ptr = adev->uvd.inst[j].cpu_addr;
 452
 453                adev->uvd.inst[j].saved_bo = kvmalloc(size, GFP_KERNEL);
 454                if (!adev->uvd.inst[j].saved_bo)
 455                        return -ENOMEM;
 456
 457                if (drm_dev_enter(adev_to_drm(adev), &idx)) {
 458                        /* re-write 0 since err_event_athub will corrupt VCPU buffer */
 459                        if (in_ras_intr)
 460                                memset(adev->uvd.inst[j].saved_bo, 0, size);
 461                        else
 462                                memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
 463
 464                        drm_dev_exit(idx);
 465                }
 466        }
 467
 468        if (in_ras_intr)
 469                DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n");
 470
 471        return 0;
 472}
 473
 474int amdgpu_uvd_resume(struct amdgpu_device *adev)
 475{
 476        unsigned size;
 477        void *ptr;
 478        int i, idx;
 479
 480        for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
 481                if (adev->uvd.harvest_config & (1 << i))
 482                        continue;
 483                if (adev->uvd.inst[i].vcpu_bo == NULL)
 484                        return -EINVAL;
 485
 486                size = amdgpu_bo_size(adev->uvd.inst[i].vcpu_bo);
 487                ptr = adev->uvd.inst[i].cpu_addr;
 488
 489                if (adev->uvd.inst[i].saved_bo != NULL) {
 490                        if (drm_dev_enter(adev_to_drm(adev), &idx)) {
 491                                memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size);
 492                                drm_dev_exit(idx);
 493                        }
 494                        kvfree(adev->uvd.inst[i].saved_bo);
 495                        adev->uvd.inst[i].saved_bo = NULL;
 496                } else {
 497                        const struct common_firmware_header *hdr;
 498                        unsigned offset;
 499
 500                        hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
 501                        if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
 502                                offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
 503                                if (drm_dev_enter(adev_to_drm(adev), &idx)) {
 504                                        memcpy_toio(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset,
 505                                                    le32_to_cpu(hdr->ucode_size_bytes));
 506                                        drm_dev_exit(idx);
 507                                }
 508                                size -= le32_to_cpu(hdr->ucode_size_bytes);
 509                                ptr += le32_to_cpu(hdr->ucode_size_bytes);
 510                        }
 511                        memset_io(ptr, 0, size);
 512                        /* to restore uvd fence seq */
 513                        amdgpu_fence_driver_force_completion(&adev->uvd.inst[i].ring);
 514                }
 515        }
 516        return 0;
 517}
 518
 519void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
 520{
 521        struct amdgpu_ring *ring = &adev->uvd.inst[0].ring;
 522        int i, r;
 523
 524        for (i = 0; i < adev->uvd.max_handles; ++i) {
 525                uint32_t handle = atomic_read(&adev->uvd.handles[i]);
 526
 527                if (handle != 0 && adev->uvd.filp[i] == filp) {
 528                        struct dma_fence *fence;
 529
 530                        r = amdgpu_uvd_get_destroy_msg(ring, handle, false,
 531                                                       &fence);
 532                        if (r) {
 533                                DRM_ERROR("Error destroying UVD %d!\n", r);
 534                                continue;
 535                        }
 536
 537                        dma_fence_wait(fence, false);
 538                        dma_fence_put(fence);
 539
 540                        adev->uvd.filp[i] = NULL;
 541                        atomic_set(&adev->uvd.handles[i], 0);
 542                }
 543        }
 544}
 545
 546static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo)
 547{
 548        int i;
 549        for (i = 0; i < abo->placement.num_placement; ++i) {
 550                abo->placements[i].fpfn = 0 >> PAGE_SHIFT;
 551                abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
 552        }
 553}
 554
 555static u64 amdgpu_uvd_get_addr_from_ctx(struct amdgpu_uvd_cs_ctx *ctx)
 556{
 557        uint32_t lo, hi;
 558        uint64_t addr;
 559
 560        lo = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data0);
 561        hi = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data1);
 562        addr = ((uint64_t)lo) | (((uint64_t)hi) << 32);
 563
 564        return addr;
 565}
 566
 567/**
 568 * amdgpu_uvd_cs_pass1 - first parsing round
 569 *
 570 * @ctx: UVD parser context
 571 *
 572 * Make sure UVD message and feedback buffers are in VRAM and
 573 * nobody is violating an 256MB boundary.
 574 */
 575static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
 576{
 577        struct ttm_operation_ctx tctx = { false, false };
 578        struct amdgpu_bo_va_mapping *mapping;
 579        struct amdgpu_bo *bo;
 580        uint32_t cmd;
 581        uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx);
 582        int r = 0;
 583
 584        r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping);
 585        if (r) {
 586                DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr);
 587                return r;
 588        }
 589
 590        if (!ctx->parser->adev->uvd.address_64_bit) {
 591                /* check if it's a message or feedback command */
 592                cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1;
 593                if (cmd == 0x0 || cmd == 0x3) {
 594                        /* yes, force it into VRAM */
 595                        uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
 596                        amdgpu_bo_placement_from_domain(bo, domain);
 597                }
 598                amdgpu_uvd_force_into_uvd_segment(bo);
 599
 600                r = ttm_bo_validate(&bo->tbo, &bo->placement, &tctx);
 601        }
 602
 603        return r;
 604}
 605
 606/**
 607 * amdgpu_uvd_cs_msg_decode - handle UVD decode message
 608 *
 609 * @adev: amdgpu_device pointer
 610 * @msg: pointer to message structure
 611 * @buf_sizes: placeholder to put the different buffer lengths
 612 *
 613 * Peek into the decode message and calculate the necessary buffer sizes.
 614 */
 615static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
 616        unsigned buf_sizes[])
 617{
 618        unsigned stream_type = msg[4];
 619        unsigned width = msg[6];
 620        unsigned height = msg[7];
 621        unsigned dpb_size = msg[9];
 622        unsigned pitch = msg[28];
 623        unsigned level = msg[57];
 624
 625        unsigned width_in_mb = width / 16;
 626        unsigned height_in_mb = ALIGN(height / 16, 2);
 627        unsigned fs_in_mb = width_in_mb * height_in_mb;
 628
 629        unsigned image_size, tmp, min_dpb_size, num_dpb_buffer;
 630        unsigned min_ctx_size = ~0;
 631
 632        image_size = width * height;
 633        image_size += image_size / 2;
 634        image_size = ALIGN(image_size, 1024);
 635
 636        switch (stream_type) {
 637        case 0: /* H264 */
 638                switch(level) {
 639                case 30:
 640                        num_dpb_buffer = 8100 / fs_in_mb;
 641                        break;
 642                case 31:
 643                        num_dpb_buffer = 18000 / fs_in_mb;
 644                        break;
 645                case 32:
 646                        num_dpb_buffer = 20480 / fs_in_mb;
 647                        break;
 648                case 41:
 649                        num_dpb_buffer = 32768 / fs_in_mb;
 650                        break;
 651                case 42:
 652                        num_dpb_buffer = 34816 / fs_in_mb;
 653                        break;
 654                case 50:
 655                        num_dpb_buffer = 110400 / fs_in_mb;
 656                        break;
 657                case 51:
 658                        num_dpb_buffer = 184320 / fs_in_mb;
 659                        break;
 660                default:
 661                        num_dpb_buffer = 184320 / fs_in_mb;
 662                        break;
 663                }
 664                num_dpb_buffer++;
 665                if (num_dpb_buffer > 17)
 666                        num_dpb_buffer = 17;
 667
 668                /* reference picture buffer */
 669                min_dpb_size = image_size * num_dpb_buffer;
 670
 671                /* macroblock context buffer */
 672                min_dpb_size += width_in_mb * height_in_mb * num_dpb_buffer * 192;
 673
 674                /* IT surface buffer */
 675                min_dpb_size += width_in_mb * height_in_mb * 32;
 676                break;
 677
 678        case 1: /* VC1 */
 679
 680                /* reference picture buffer */
 681                min_dpb_size = image_size * 3;
 682
 683                /* CONTEXT_BUFFER */
 684                min_dpb_size += width_in_mb * height_in_mb * 128;
 685
 686                /* IT surface buffer */
 687                min_dpb_size += width_in_mb * 64;
 688
 689                /* DB surface buffer */
 690                min_dpb_size += width_in_mb * 128;
 691
 692                /* BP */
 693                tmp = max(width_in_mb, height_in_mb);
 694                min_dpb_size += ALIGN(tmp * 7 * 16, 64);
 695                break;
 696
 697        case 3: /* MPEG2 */
 698
 699                /* reference picture buffer */
 700                min_dpb_size = image_size * 3;
 701                break;
 702
 703        case 4: /* MPEG4 */
 704
 705                /* reference picture buffer */
 706                min_dpb_size = image_size * 3;
 707
 708                /* CM */
 709                min_dpb_size += width_in_mb * height_in_mb * 64;
 710
 711                /* IT surface buffer */
 712                min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64);
 713                break;
 714
 715        case 7: /* H264 Perf */
 716                switch(level) {
 717                case 30:
 718                        num_dpb_buffer = 8100 / fs_in_mb;
 719                        break;
 720                case 31:
 721                        num_dpb_buffer = 18000 / fs_in_mb;
 722                        break;
 723                case 32:
 724                        num_dpb_buffer = 20480 / fs_in_mb;
 725                        break;
 726                case 41:
 727                        num_dpb_buffer = 32768 / fs_in_mb;
 728                        break;
 729                case 42:
 730                        num_dpb_buffer = 34816 / fs_in_mb;
 731                        break;
 732                case 50:
 733                        num_dpb_buffer = 110400 / fs_in_mb;
 734                        break;
 735                case 51:
 736                        num_dpb_buffer = 184320 / fs_in_mb;
 737                        break;
 738                default:
 739                        num_dpb_buffer = 184320 / fs_in_mb;
 740                        break;
 741                }
 742                num_dpb_buffer++;
 743                if (num_dpb_buffer > 17)
 744                        num_dpb_buffer = 17;
 745
 746                /* reference picture buffer */
 747                min_dpb_size = image_size * num_dpb_buffer;
 748
 749                if (!adev->uvd.use_ctx_buf){
 750                        /* macroblock context buffer */
 751                        min_dpb_size +=
 752                                width_in_mb * height_in_mb * num_dpb_buffer * 192;
 753
 754                        /* IT surface buffer */
 755                        min_dpb_size += width_in_mb * height_in_mb * 32;
 756                } else {
 757                        /* macroblock context buffer */
 758                        min_ctx_size =
 759                                width_in_mb * height_in_mb * num_dpb_buffer * 192;
 760                }
 761                break;
 762
 763        case 8: /* MJPEG */
 764                min_dpb_size = 0;
 765                break;
 766
 767        case 16: /* H265 */
 768                image_size = (ALIGN(width, 16) * ALIGN(height, 16) * 3) / 2;
 769                image_size = ALIGN(image_size, 256);
 770
 771                num_dpb_buffer = (le32_to_cpu(msg[59]) & 0xff) + 2;
 772                min_dpb_size = image_size * num_dpb_buffer;
 773                min_ctx_size = ((width + 255) / 16) * ((height + 255) / 16)
 774                                           * 16 * num_dpb_buffer + 52 * 1024;
 775                break;
 776
 777        default:
 778                DRM_ERROR("UVD codec not handled %d!\n", stream_type);
 779                return -EINVAL;
 780        }
 781
 782        if (width > pitch) {
 783                DRM_ERROR("Invalid UVD decoding target pitch!\n");
 784                return -EINVAL;
 785        }
 786
 787        if (dpb_size < min_dpb_size) {
 788                DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n",
 789                          dpb_size, min_dpb_size);
 790                return -EINVAL;
 791        }
 792
 793        buf_sizes[0x1] = dpb_size;
 794        buf_sizes[0x2] = image_size;
 795        buf_sizes[0x4] = min_ctx_size;
 796        /* store image width to adjust nb memory pstate */
 797        adev->uvd.decode_image_width = width;
 798        return 0;
 799}
 800
 801/**
 802 * amdgpu_uvd_cs_msg - handle UVD message
 803 *
 804 * @ctx: UVD parser context
 805 * @bo: buffer object containing the message
 806 * @offset: offset into the buffer object
 807 *
 808 * Peek into the UVD message and extract the session id.
 809 * Make sure that we don't open up to many sessions.
 810 */
 811static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
 812                             struct amdgpu_bo *bo, unsigned offset)
 813{
 814        struct amdgpu_device *adev = ctx->parser->adev;
 815        int32_t *msg, msg_type, handle;
 816        void *ptr;
 817        long r;
 818        int i;
 819
 820        if (offset & 0x3F) {
 821                DRM_ERROR("UVD messages must be 64 byte aligned!\n");
 822                return -EINVAL;
 823        }
 824
 825        r = amdgpu_bo_kmap(bo, &ptr);
 826        if (r) {
 827                DRM_ERROR("Failed mapping the UVD) message (%ld)!\n", r);
 828                return r;
 829        }
 830
 831        msg = ptr + offset;
 832
 833        msg_type = msg[1];
 834        handle = msg[2];
 835
 836        if (handle == 0) {
 837                DRM_ERROR("Invalid UVD handle!\n");
 838                return -EINVAL;
 839        }
 840
 841        switch (msg_type) {
 842        case 0:
 843                /* it's a create msg, calc image size (width * height) */
 844                amdgpu_bo_kunmap(bo);
 845
 846                /* try to alloc a new handle */
 847                for (i = 0; i < adev->uvd.max_handles; ++i) {
 848                        if (atomic_read(&adev->uvd.handles[i]) == handle) {
 849                                DRM_ERROR(")Handle 0x%x already in use!\n",
 850                                          handle);
 851                                return -EINVAL;
 852                        }
 853
 854                        if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) {
 855                                adev->uvd.filp[i] = ctx->parser->filp;
 856                                return 0;
 857                        }
 858                }
 859
 860                DRM_ERROR("No more free UVD handles!\n");
 861                return -ENOSPC;
 862
 863        case 1:
 864                /* it's a decode msg, calc buffer sizes */
 865                r = amdgpu_uvd_cs_msg_decode(adev, msg, ctx->buf_sizes);
 866                amdgpu_bo_kunmap(bo);
 867                if (r)
 868                        return r;
 869
 870                /* validate the handle */
 871                for (i = 0; i < adev->uvd.max_handles; ++i) {
 872                        if (atomic_read(&adev->uvd.handles[i]) == handle) {
 873                                if (adev->uvd.filp[i] != ctx->parser->filp) {
 874                                        DRM_ERROR("UVD handle collision detected!\n");
 875                                        return -EINVAL;
 876                                }
 877                                return 0;
 878                        }
 879                }
 880
 881                DRM_ERROR("Invalid UVD handle 0x%x!\n", handle);
 882                return -ENOENT;
 883
 884        case 2:
 885                /* it's a destroy msg, free the handle */
 886                for (i = 0; i < adev->uvd.max_handles; ++i)
 887                        atomic_cmpxchg(&adev->uvd.handles[i], handle, 0);
 888                amdgpu_bo_kunmap(bo);
 889                return 0;
 890
 891        default:
 892                DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
 893        }
 894
 895        return -EINVAL;
 896}
 897
 898/**
 899 * amdgpu_uvd_cs_pass2 - second parsing round
 900 *
 901 * @ctx: UVD parser context
 902 *
 903 * Patch buffer addresses, make sure buffer sizes are correct.
 904 */
 905static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
 906{
 907        struct amdgpu_bo_va_mapping *mapping;
 908        struct amdgpu_bo *bo;
 909        uint32_t cmd;
 910        uint64_t start, end;
 911        uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx);
 912        int r;
 913
 914        r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping);
 915        if (r) {
 916                DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr);
 917                return r;
 918        }
 919
 920        start = amdgpu_bo_gpu_offset(bo);
 921
 922        end = (mapping->last + 1 - mapping->start);
 923        end = end * AMDGPU_GPU_PAGE_SIZE + start;
 924
 925        addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE;
 926        start += addr;
 927
 928        amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data0,
 929                            lower_32_bits(start));
 930        amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data1,
 931                            upper_32_bits(start));
 932
 933        cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1;
 934        if (cmd < 0x4) {
 935                if ((end - start) < ctx->buf_sizes[cmd]) {
 936                        DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
 937                                  (unsigned)(end - start),
 938                                  ctx->buf_sizes[cmd]);
 939                        return -EINVAL;
 940                }
 941
 942        } else if (cmd == 0x206) {
 943                if ((end - start) < ctx->buf_sizes[4]) {
 944                        DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
 945                                          (unsigned)(end - start),
 946                                          ctx->buf_sizes[4]);
 947                        return -EINVAL;
 948                }
 949        } else if ((cmd != 0x100) && (cmd != 0x204)) {
 950                DRM_ERROR("invalid UVD command %X!\n", cmd);
 951                return -EINVAL;
 952        }
 953
 954        if (!ctx->parser->adev->uvd.address_64_bit) {
 955                if ((start >> 28) != ((end - 1) >> 28)) {
 956                        DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n",
 957                                  start, end);
 958                        return -EINVAL;
 959                }
 960
 961                if ((cmd == 0 || cmd == 0x3) &&
 962                    (start >> 28) != (ctx->parser->adev->uvd.inst->gpu_addr >> 28)) {
 963                        DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n",
 964                                  start, end);
 965                        return -EINVAL;
 966                }
 967        }
 968
 969        if (cmd == 0) {
 970                ctx->has_msg_cmd = true;
 971                r = amdgpu_uvd_cs_msg(ctx, bo, addr);
 972                if (r)
 973                        return r;
 974        } else if (!ctx->has_msg_cmd) {
 975                DRM_ERROR("Message needed before other commands are send!\n");
 976                return -EINVAL;
 977        }
 978
 979        return 0;
 980}
 981
 982/**
 983 * amdgpu_uvd_cs_reg - parse register writes
 984 *
 985 * @ctx: UVD parser context
 986 * @cb: callback function
 987 *
 988 * Parse the register writes, call cb on each complete command.
 989 */
 990static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx,
 991                             int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
 992{
 993        struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx];
 994        int i, r;
 995
 996        ctx->idx++;
 997        for (i = 0; i <= ctx->count; ++i) {
 998                unsigned reg = ctx->reg + i;
 999
1000                if (ctx->idx >= ib->length_dw) {
1001                        DRM_ERROR("Register command after end of CS!\n");
1002                        return -EINVAL;
1003                }
1004
1005                switch (reg) {
1006                case mmUVD_GPCOM_VCPU_DATA0:
1007                        ctx->data0 = ctx->idx;
1008                        break;
1009                case mmUVD_GPCOM_VCPU_DATA1:
1010                        ctx->data1 = ctx->idx;
1011                        break;
1012                case mmUVD_GPCOM_VCPU_CMD:
1013                        r = cb(ctx);
1014                        if (r)
1015                                return r;
1016                        break;
1017                case mmUVD_ENGINE_CNTL:
1018                case mmUVD_NO_OP:
1019                        break;
1020                default:
1021                        DRM_ERROR("Invalid reg 0x%X!\n", reg);
1022                        return -EINVAL;
1023                }
1024                ctx->idx++;
1025        }
1026        return 0;
1027}
1028
1029/**
1030 * amdgpu_uvd_cs_packets - parse UVD packets
1031 *
1032 * @ctx: UVD parser context
1033 * @cb: callback function
1034 *
1035 * Parse the command stream packets.
1036 */
1037static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx,
1038                                 int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
1039{
1040        struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx];
1041        int r;
1042
1043        for (ctx->idx = 0 ; ctx->idx < ib->length_dw; ) {
1044                uint32_t cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx);
1045                unsigned type = CP_PACKET_GET_TYPE(cmd);
1046                switch (type) {
1047                case PACKET_TYPE0:
1048                        ctx->reg = CP_PACKET0_GET_REG(cmd);
1049                        ctx->count = CP_PACKET_GET_COUNT(cmd);
1050                        r = amdgpu_uvd_cs_reg(ctx, cb);
1051                        if (r)
1052                                return r;
1053                        break;
1054                case PACKET_TYPE2:
1055                        ++ctx->idx;
1056                        break;
1057                default:
1058                        DRM_ERROR("Unknown packet type %d !\n", type);
1059                        return -EINVAL;
1060                }
1061        }
1062        return 0;
1063}
1064
1065/**
1066 * amdgpu_uvd_ring_parse_cs - UVD command submission parser
1067 *
1068 * @parser: Command submission parser context
1069 * @ib_idx: Which indirect buffer to use
1070 *
1071 * Parse the command stream, patch in addresses as necessary.
1072 */
1073int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
1074{
1075        struct amdgpu_uvd_cs_ctx ctx = {};
1076        unsigned buf_sizes[] = {
1077                [0x00000000]    =       2048,
1078                [0x00000001]    =       0xFFFFFFFF,
1079                [0x00000002]    =       0xFFFFFFFF,
1080                [0x00000003]    =       2048,
1081                [0x00000004]    =       0xFFFFFFFF,
1082        };
1083        struct amdgpu_ib *ib = &parser->job->ibs[ib_idx];
1084        int r;
1085
1086        parser->job->vm = NULL;
1087        ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
1088
1089        if (ib->length_dw % 16) {
1090                DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
1091                          ib->length_dw);
1092                return -EINVAL;
1093        }
1094
1095        ctx.parser = parser;
1096        ctx.buf_sizes = buf_sizes;
1097        ctx.ib_idx = ib_idx;
1098
1099        /* first round only required on chips without UVD 64 bit address support */
1100        if (!parser->adev->uvd.address_64_bit) {
1101                /* first round, make sure the buffers are actually in the UVD segment */
1102                r = amdgpu_uvd_cs_packets(&ctx, amdgpu_uvd_cs_pass1);
1103                if (r)
1104                        return r;
1105        }
1106
1107        /* second round, patch buffer addresses into the command stream */
1108        r = amdgpu_uvd_cs_packets(&ctx, amdgpu_uvd_cs_pass2);
1109        if (r)
1110                return r;
1111
1112        if (!ctx.has_msg_cmd) {
1113                DRM_ERROR("UVD-IBs need a msg command!\n");
1114                return -EINVAL;
1115        }
1116
1117        return 0;
1118}
1119
1120static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
1121                               bool direct, struct dma_fence **fence)
1122{
1123        struct amdgpu_device *adev = ring->adev;
1124        struct dma_fence *f = NULL;
1125        struct amdgpu_job *job;
1126        struct amdgpu_ib *ib;
1127        uint32_t data[4];
1128        uint64_t addr;
1129        long r;
1130        int i;
1131        unsigned offset_idx = 0;
1132        unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
1133
1134        r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
1135                                     AMDGPU_IB_POOL_DELAYED, &job);
1136        if (r)
1137                return r;
1138
1139        if (adev->asic_type >= CHIP_VEGA10) {
1140                offset_idx = 1 + ring->me;
1141                offset[1] = adev->reg_offset[UVD_HWIP][0][1];
1142                offset[2] = adev->reg_offset[UVD_HWIP][1][1];
1143        }
1144
1145        data[0] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA0, 0);
1146        data[1] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA1, 0);
1147        data[2] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_CMD, 0);
1148        data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0);
1149
1150        ib = &job->ibs[0];
1151        addr = amdgpu_bo_gpu_offset(bo);
1152        ib->ptr[0] = data[0];
1153        ib->ptr[1] = addr;
1154        ib->ptr[2] = data[1];
1155        ib->ptr[3] = addr >> 32;
1156        ib->ptr[4] = data[2];
1157        ib->ptr[5] = 0;
1158        for (i = 6; i < 16; i += 2) {
1159                ib->ptr[i] = data[3];
1160                ib->ptr[i+1] = 0;
1161        }
1162        ib->length_dw = 16;
1163
1164        if (direct) {
1165                r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false,
1166                                          msecs_to_jiffies(10));
1167                if (r == 0)
1168                        r = -ETIMEDOUT;
1169                if (r < 0)
1170                        goto err_free;
1171
1172                r = amdgpu_job_submit_direct(job, ring, &f);
1173                if (r)
1174                        goto err_free;
1175        } else {
1176                r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.base.resv,
1177                                     AMDGPU_SYNC_ALWAYS,
1178                                     AMDGPU_FENCE_OWNER_UNDEFINED);
1179                if (r)
1180                        goto err_free;
1181
1182                r = amdgpu_job_submit(job, &adev->uvd.entity,
1183                                      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
1184                if (r)
1185                        goto err_free;
1186        }
1187
1188        amdgpu_bo_reserve(bo, true);
1189        amdgpu_bo_fence(bo, f, false);
1190        amdgpu_bo_unreserve(bo);
1191
1192        if (fence)
1193                *fence = dma_fence_get(f);
1194        dma_fence_put(f);
1195
1196        return 0;
1197
1198err_free:
1199        amdgpu_job_free(job);
1200        return r;
1201}
1202
1203/* multiple fence commands without any stream commands in between can
1204   crash the vcpu so just try to emmit a dummy create/destroy msg to
1205   avoid this */
1206int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
1207                              struct dma_fence **fence)
1208{
1209        struct amdgpu_device *adev = ring->adev;
1210        struct amdgpu_bo *bo = adev->uvd.ib_bo;
1211        uint32_t *msg;
1212        int i;
1213
1214        msg = amdgpu_bo_kptr(bo);
1215        /* stitch together an UVD create msg */
1216        msg[0] = cpu_to_le32(0x00000de4);
1217        msg[1] = cpu_to_le32(0x00000000);
1218        msg[2] = cpu_to_le32(handle);
1219        msg[3] = cpu_to_le32(0x00000000);
1220        msg[4] = cpu_to_le32(0x00000000);
1221        msg[5] = cpu_to_le32(0x00000000);
1222        msg[6] = cpu_to_le32(0x00000000);
1223        msg[7] = cpu_to_le32(0x00000780);
1224        msg[8] = cpu_to_le32(0x00000440);
1225        msg[9] = cpu_to_le32(0x00000000);
1226        msg[10] = cpu_to_le32(0x01b37000);
1227        for (i = 11; i < 1024; ++i)
1228                msg[i] = cpu_to_le32(0x0);
1229
1230        return amdgpu_uvd_send_msg(ring, bo, true, fence);
1231
1232}
1233
1234int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
1235                               bool direct, struct dma_fence **fence)
1236{
1237        struct amdgpu_device *adev = ring->adev;
1238        struct amdgpu_bo *bo = NULL;
1239        uint32_t *msg;
1240        int r, i;
1241
1242        if (direct) {
1243                bo = adev->uvd.ib_bo;
1244        } else {
1245                r = amdgpu_uvd_create_msg_bo_helper(adev, 4096, &bo);
1246                if (r)
1247                        return r;
1248        }
1249
1250        msg = amdgpu_bo_kptr(bo);
1251        /* stitch together an UVD destroy msg */
1252        msg[0] = cpu_to_le32(0x00000de4);
1253        msg[1] = cpu_to_le32(0x00000002);
1254        msg[2] = cpu_to_le32(handle);
1255        msg[3] = cpu_to_le32(0x00000000);
1256        for (i = 4; i < 1024; ++i)
1257                msg[i] = cpu_to_le32(0x0);
1258
1259        r = amdgpu_uvd_send_msg(ring, bo, direct, fence);
1260
1261        if (!direct)
1262                amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg);
1263
1264        return r;
1265}
1266
1267static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
1268{
1269        struct amdgpu_device *adev =
1270                container_of(work, struct amdgpu_device, uvd.idle_work.work);
1271        unsigned fences = 0, i, j;
1272
1273        for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
1274                if (adev->uvd.harvest_config & (1 << i))
1275                        continue;
1276                fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring);
1277                for (j = 0; j < adev->uvd.num_enc_rings; ++j) {
1278                        fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]);
1279                }
1280        }
1281
1282        if (fences == 0) {
1283                if (adev->pm.dpm_enabled) {
1284                        amdgpu_dpm_enable_uvd(adev, false);
1285                } else {
1286                        amdgpu_asic_set_uvd_clocks(adev, 0, 0);
1287                        /* shutdown the UVD block */
1288                        amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
1289                                                               AMD_PG_STATE_GATE);
1290                        amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
1291                                                               AMD_CG_STATE_GATE);
1292                }
1293        } else {
1294                schedule_delayed_work(&adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
1295        }
1296}
1297
1298void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
1299{
1300        struct amdgpu_device *adev = ring->adev;
1301        bool set_clocks;
1302
1303        if (amdgpu_sriov_vf(adev))
1304                return;
1305
1306        set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work);
1307        if (set_clocks) {
1308                if (adev->pm.dpm_enabled) {
1309                        amdgpu_dpm_enable_uvd(adev, true);
1310                } else {
1311                        amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
1312                        amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
1313                                                               AMD_CG_STATE_UNGATE);
1314                        amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
1315                                                               AMD_PG_STATE_UNGATE);
1316                }
1317        }
1318}
1319
1320void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)
1321{
1322        if (!amdgpu_sriov_vf(ring->adev))
1323                schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
1324}
1325
1326/**
1327 * amdgpu_uvd_ring_test_ib - test ib execution
1328 *
1329 * @ring: amdgpu_ring pointer
1330 * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
1331 *
1332 * Test if we can successfully execute an IB
1333 */
1334int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1335{
1336        struct dma_fence *fence;
1337        long r;
1338
1339        r = amdgpu_uvd_get_create_msg(ring, 1, &fence);
1340        if (r)
1341                goto error;
1342
1343        r = dma_fence_wait_timeout(fence, false, timeout);
1344        dma_fence_put(fence);
1345        if (r == 0)
1346                r = -ETIMEDOUT;
1347        if (r < 0)
1348                goto error;
1349
1350        r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
1351        if (r)
1352                goto error;
1353
1354        r = dma_fence_wait_timeout(fence, false, timeout);
1355        if (r == 0)
1356                r = -ETIMEDOUT;
1357        else if (r > 0)
1358                r = 0;
1359
1360        dma_fence_put(fence);
1361
1362error:
1363        return r;
1364}
1365
1366/**
1367 * amdgpu_uvd_used_handles - returns used UVD handles
1368 *
1369 * @adev: amdgpu_device pointer
1370 *
1371 * Returns the number of UVD handles in use
1372 */
1373uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev)
1374{
1375        unsigned i;
1376        uint32_t used_handles = 0;
1377
1378        for (i = 0; i < adev->uvd.max_handles; ++i) {
1379                /*
1380                 * Handles can be freed in any order, and not
1381                 * necessarily linear. So we need to count
1382                 * all non-zero handles.
1383                 */
1384                if (atomic_read(&adev->uvd.handles[i]))
1385                        used_handles++;
1386        }
1387
1388        return used_handles;
1389}
1390