linux/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
<<
>>
Prefs
   1/*
   2 * Copyright 2013 Advanced Micro Devices, Inc.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the
   7 * "Software"), to deal in the Software without restriction, including
   8 * without limitation the rights to use, copy, modify, merge, publish,
   9 * distribute, sub license, and/or sell copies of the Software, and to
  10 * permit persons to whom the Software is furnished to do so, subject to
  11 * the following conditions:
  12 *
  13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20 *
  21 * The above copyright notice and this permission notice (including the
  22 * next paragraph) shall be included in all copies or substantial portions
  23 * of the Software.
  24 *
  25 * Authors: Christian König <christian.koenig@amd.com>
  26 */
  27
  28#include <linux/firmware.h>
  29#include <linux/module.h>
  30
  31#include <drm/drm.h>
  32#include <drm/drm_drv.h>
  33
  34#include "amdgpu.h"
  35#include "amdgpu_pm.h"
  36#include "amdgpu_vce.h"
  37#include "cikd.h"
  38
  39/* 1 second timeout */
  40#define VCE_IDLE_TIMEOUT        msecs_to_jiffies(1000)
  41
  42/* Firmware Names */
  43#ifdef CONFIG_DRM_AMDGPU_CIK
  44#define FIRMWARE_BONAIRE        "amdgpu/bonaire_vce.bin"
  45#define FIRMWARE_KABINI "amdgpu/kabini_vce.bin"
  46#define FIRMWARE_KAVERI "amdgpu/kaveri_vce.bin"
  47#define FIRMWARE_HAWAII "amdgpu/hawaii_vce.bin"
  48#define FIRMWARE_MULLINS        "amdgpu/mullins_vce.bin"
  49#endif
  50#define FIRMWARE_TONGA          "amdgpu/tonga_vce.bin"
  51#define FIRMWARE_CARRIZO        "amdgpu/carrizo_vce.bin"
  52#define FIRMWARE_FIJI           "amdgpu/fiji_vce.bin"
  53#define FIRMWARE_STONEY         "amdgpu/stoney_vce.bin"
  54#define FIRMWARE_POLARIS10      "amdgpu/polaris10_vce.bin"
  55#define FIRMWARE_POLARIS11      "amdgpu/polaris11_vce.bin"
  56#define FIRMWARE_POLARIS12      "amdgpu/polaris12_vce.bin"
  57#define FIRMWARE_VEGAM          "amdgpu/vegam_vce.bin"
  58
  59#define FIRMWARE_VEGA10         "amdgpu/vega10_vce.bin"
  60#define FIRMWARE_VEGA12         "amdgpu/vega12_vce.bin"
  61#define FIRMWARE_VEGA20         "amdgpu/vega20_vce.bin"
  62
  63#ifdef CONFIG_DRM_AMDGPU_CIK
  64MODULE_FIRMWARE(FIRMWARE_BONAIRE);
  65MODULE_FIRMWARE(FIRMWARE_KABINI);
  66MODULE_FIRMWARE(FIRMWARE_KAVERI);
  67MODULE_FIRMWARE(FIRMWARE_HAWAII);
  68MODULE_FIRMWARE(FIRMWARE_MULLINS);
  69#endif
  70MODULE_FIRMWARE(FIRMWARE_TONGA);
  71MODULE_FIRMWARE(FIRMWARE_CARRIZO);
  72MODULE_FIRMWARE(FIRMWARE_FIJI);
  73MODULE_FIRMWARE(FIRMWARE_STONEY);
  74MODULE_FIRMWARE(FIRMWARE_POLARIS10);
  75MODULE_FIRMWARE(FIRMWARE_POLARIS11);
  76MODULE_FIRMWARE(FIRMWARE_POLARIS12);
  77MODULE_FIRMWARE(FIRMWARE_VEGAM);
  78
  79MODULE_FIRMWARE(FIRMWARE_VEGA10);
  80MODULE_FIRMWARE(FIRMWARE_VEGA12);
  81MODULE_FIRMWARE(FIRMWARE_VEGA20);
  82
  83static void amdgpu_vce_idle_work_handler(struct work_struct *work);
  84static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
  85                                     struct amdgpu_bo *bo,
  86                                     struct dma_fence **fence);
  87static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
  88                                      bool direct, struct dma_fence **fence);
  89
  90/**
  91 * amdgpu_vce_sw_init - allocate memory, load vce firmware
  92 *
  93 * @adev: amdgpu_device pointer
  94 * @size: size for the new BO
  95 *
  96 * First step to get VCE online, allocate memory and load the firmware
  97 */
  98int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
  99{
 100        const char *fw_name;
 101        const struct common_firmware_header *hdr;
 102        unsigned ucode_version, version_major, version_minor, binary_id;
 103        int i, r;
 104
 105        switch (adev->asic_type) {
 106#ifdef CONFIG_DRM_AMDGPU_CIK
 107        case CHIP_BONAIRE:
 108                fw_name = FIRMWARE_BONAIRE;
 109                break;
 110        case CHIP_KAVERI:
 111                fw_name = FIRMWARE_KAVERI;
 112                break;
 113        case CHIP_KABINI:
 114                fw_name = FIRMWARE_KABINI;
 115                break;
 116        case CHIP_HAWAII:
 117                fw_name = FIRMWARE_HAWAII;
 118                break;
 119        case CHIP_MULLINS:
 120                fw_name = FIRMWARE_MULLINS;
 121                break;
 122#endif
 123        case CHIP_TONGA:
 124                fw_name = FIRMWARE_TONGA;
 125                break;
 126        case CHIP_CARRIZO:
 127                fw_name = FIRMWARE_CARRIZO;
 128                break;
 129        case CHIP_FIJI:
 130                fw_name = FIRMWARE_FIJI;
 131                break;
 132        case CHIP_STONEY:
 133                fw_name = FIRMWARE_STONEY;
 134                break;
 135        case CHIP_POLARIS10:
 136                fw_name = FIRMWARE_POLARIS10;
 137                break;
 138        case CHIP_POLARIS11:
 139                fw_name = FIRMWARE_POLARIS11;
 140                break;
 141        case CHIP_POLARIS12:
 142                fw_name = FIRMWARE_POLARIS12;
 143                break;
 144        case CHIP_VEGAM:
 145                fw_name = FIRMWARE_VEGAM;
 146                break;
 147        case CHIP_VEGA10:
 148                fw_name = FIRMWARE_VEGA10;
 149                break;
 150        case CHIP_VEGA12:
 151                fw_name = FIRMWARE_VEGA12;
 152                break;
 153        case CHIP_VEGA20:
 154                fw_name = FIRMWARE_VEGA20;
 155                break;
 156
 157        default:
 158                return -EINVAL;
 159        }
 160
 161        r = request_firmware(&adev->vce.fw, fw_name, adev->dev);
 162        if (r) {
 163                dev_err(adev->dev, "amdgpu_vce: Can't load firmware \"%s\"\n",
 164                        fw_name);
 165                return r;
 166        }
 167
 168        r = amdgpu_ucode_validate(adev->vce.fw);
 169        if (r) {
 170                dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n",
 171                        fw_name);
 172                release_firmware(adev->vce.fw);
 173                adev->vce.fw = NULL;
 174                return r;
 175        }
 176
 177        hdr = (const struct common_firmware_header *)adev->vce.fw->data;
 178
 179        ucode_version = le32_to_cpu(hdr->ucode_version);
 180        version_major = (ucode_version >> 20) & 0xfff;
 181        version_minor = (ucode_version >> 8) & 0xfff;
 182        binary_id = ucode_version & 0xff;
 183        DRM_INFO("Found VCE firmware Version: %d.%d Binary ID: %d\n",
 184                version_major, version_minor, binary_id);
 185        adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) |
 186                                (binary_id << 8));
 187
 188        r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
 189                                    AMDGPU_GEM_DOMAIN_VRAM, &adev->vce.vcpu_bo,
 190                                    &adev->vce.gpu_addr, &adev->vce.cpu_addr);
 191        if (r) {
 192                dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r);
 193                return r;
 194        }
 195
 196        for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
 197                atomic_set(&adev->vce.handles[i], 0);
 198                adev->vce.filp[i] = NULL;
 199        }
 200
 201        INIT_DELAYED_WORK(&adev->vce.idle_work, amdgpu_vce_idle_work_handler);
 202        mutex_init(&adev->vce.idle_mutex);
 203
 204        return 0;
 205}
 206
 207/**
 208 * amdgpu_vce_sw_fini - free memory
 209 *
 210 * @adev: amdgpu_device pointer
 211 *
 212 * Last step on VCE teardown, free firmware memory
 213 */
 214int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
 215{
 216        unsigned i;
 217
 218        if (adev->vce.vcpu_bo == NULL)
 219                return 0;
 220
 221        drm_sched_entity_destroy(&adev->vce.entity);
 222
 223        amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
 224                (void **)&adev->vce.cpu_addr);
 225
 226        for (i = 0; i < adev->vce.num_rings; i++)
 227                amdgpu_ring_fini(&adev->vce.ring[i]);
 228
 229        release_firmware(adev->vce.fw);
 230        mutex_destroy(&adev->vce.idle_mutex);
 231
 232        return 0;
 233}
 234
 235/**
 236 * amdgpu_vce_entity_init - init entity
 237 *
 238 * @adev: amdgpu_device pointer
 239 *
 240 */
 241int amdgpu_vce_entity_init(struct amdgpu_device *adev)
 242{
 243        struct amdgpu_ring *ring;
 244        struct drm_gpu_scheduler *sched;
 245        int r;
 246
 247        ring = &adev->vce.ring[0];
 248        sched = &ring->sched;
 249        r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL,
 250                                  &sched, 1, NULL);
 251        if (r != 0) {
 252                DRM_ERROR("Failed setting up VCE run queue.\n");
 253                return r;
 254        }
 255
 256        return 0;
 257}
 258
 259/**
 260 * amdgpu_vce_suspend - unpin VCE fw memory
 261 *
 262 * @adev: amdgpu_device pointer
 263 *
 264 */
 265int amdgpu_vce_suspend(struct amdgpu_device *adev)
 266{
 267        int i;
 268
 269        cancel_delayed_work_sync(&adev->vce.idle_work);
 270
 271        if (adev->vce.vcpu_bo == NULL)
 272                return 0;
 273
 274        for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
 275                if (atomic_read(&adev->vce.handles[i]))
 276                        break;
 277
 278        if (i == AMDGPU_MAX_VCE_HANDLES)
 279                return 0;
 280
 281        /* TODO: suspending running encoding sessions isn't supported */
 282        return -EINVAL;
 283}
 284
 285/**
 286 * amdgpu_vce_resume - pin VCE fw memory
 287 *
 288 * @adev: amdgpu_device pointer
 289 *
 290 */
 291int amdgpu_vce_resume(struct amdgpu_device *adev)
 292{
 293        void *cpu_addr;
 294        const struct common_firmware_header *hdr;
 295        unsigned offset;
 296        int r, idx;
 297
 298        if (adev->vce.vcpu_bo == NULL)
 299                return -EINVAL;
 300
 301        r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false);
 302        if (r) {
 303                dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r);
 304                return r;
 305        }
 306
 307        r = amdgpu_bo_kmap(adev->vce.vcpu_bo, &cpu_addr);
 308        if (r) {
 309                amdgpu_bo_unreserve(adev->vce.vcpu_bo);
 310                dev_err(adev->dev, "(%d) VCE map failed\n", r);
 311                return r;
 312        }
 313
 314        hdr = (const struct common_firmware_header *)adev->vce.fw->data;
 315        offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
 316
 317        if (drm_dev_enter(&adev->ddev, &idx)) {
 318                memcpy_toio(cpu_addr, adev->vce.fw->data + offset,
 319                            adev->vce.fw->size - offset);
 320                drm_dev_exit(idx);
 321        }
 322
 323        amdgpu_bo_kunmap(adev->vce.vcpu_bo);
 324
 325        amdgpu_bo_unreserve(adev->vce.vcpu_bo);
 326
 327        return 0;
 328}
 329
 330/**
 331 * amdgpu_vce_idle_work_handler - power off VCE
 332 *
 333 * @work: pointer to work structure
 334 *
 335 * power of VCE when it's not used any more
 336 */
 337static void amdgpu_vce_idle_work_handler(struct work_struct *work)
 338{
 339        struct amdgpu_device *adev =
 340                container_of(work, struct amdgpu_device, vce.idle_work.work);
 341        unsigned i, count = 0;
 342
 343        for (i = 0; i < adev->vce.num_rings; i++)
 344                count += amdgpu_fence_count_emitted(&adev->vce.ring[i]);
 345
 346        if (count == 0) {
 347                if (adev->pm.dpm_enabled) {
 348                        amdgpu_dpm_enable_vce(adev, false);
 349                } else {
 350                        amdgpu_asic_set_vce_clocks(adev, 0, 0);
 351                        amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
 352                                                               AMD_PG_STATE_GATE);
 353                        amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
 354                                                               AMD_CG_STATE_GATE);
 355                }
 356        } else {
 357                schedule_delayed_work(&adev->vce.idle_work, VCE_IDLE_TIMEOUT);
 358        }
 359}
 360
 361/**
 362 * amdgpu_vce_ring_begin_use - power up VCE
 363 *
 364 * @ring: amdgpu ring
 365 *
 366 * Make sure VCE is powerd up when we want to use it
 367 */
 368void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring)
 369{
 370        struct amdgpu_device *adev = ring->adev;
 371        bool set_clocks;
 372
 373        if (amdgpu_sriov_vf(adev))
 374                return;
 375
 376        mutex_lock(&adev->vce.idle_mutex);
 377        set_clocks = !cancel_delayed_work_sync(&adev->vce.idle_work);
 378        if (set_clocks) {
 379                if (adev->pm.dpm_enabled) {
 380                        amdgpu_dpm_enable_vce(adev, true);
 381                } else {
 382                        amdgpu_asic_set_vce_clocks(adev, 53300, 40000);
 383                        amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
 384                                                               AMD_CG_STATE_UNGATE);
 385                        amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
 386                                                               AMD_PG_STATE_UNGATE);
 387
 388                }
 389        }
 390        mutex_unlock(&adev->vce.idle_mutex);
 391}
 392
 393/**
 394 * amdgpu_vce_ring_end_use - power VCE down
 395 *
 396 * @ring: amdgpu ring
 397 *
 398 * Schedule work to power VCE down again
 399 */
 400void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring)
 401{
 402        if (!amdgpu_sriov_vf(ring->adev))
 403                schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT);
 404}
 405
 406/**
 407 * amdgpu_vce_free_handles - free still open VCE handles
 408 *
 409 * @adev: amdgpu_device pointer
 410 * @filp: drm file pointer
 411 *
 412 * Close all VCE handles still open by this file pointer
 413 */
 414void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
 415{
 416        struct amdgpu_ring *ring = &adev->vce.ring[0];
 417        int i, r;
 418        for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
 419                uint32_t handle = atomic_read(&adev->vce.handles[i]);
 420
 421                if (!handle || adev->vce.filp[i] != filp)
 422                        continue;
 423
 424                r = amdgpu_vce_get_destroy_msg(ring, handle, false, NULL);
 425                if (r)
 426                        DRM_ERROR("Error destroying VCE handle (%d)!\n", r);
 427
 428                adev->vce.filp[i] = NULL;
 429                atomic_set(&adev->vce.handles[i], 0);
 430        }
 431}
 432
 433/**
 434 * amdgpu_vce_get_create_msg - generate a VCE create msg
 435 *
 436 * @ring: ring we should submit the msg to
 437 * @handle: VCE session handle to use
 438 * @bo: amdgpu object for which we query the offset
 439 * @fence: optional fence to return
 440 *
 441 * Open up a stream for HW test
 442 */
 443static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 444                                     struct amdgpu_bo *bo,
 445                                     struct dma_fence **fence)
 446{
 447        const unsigned ib_size_dw = 1024;
 448        struct amdgpu_job *job;
 449        struct amdgpu_ib *ib;
 450        struct dma_fence *f = NULL;
 451        uint64_t addr;
 452        int i, r;
 453
 454        r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
 455                                     AMDGPU_IB_POOL_DIRECT, &job);
 456        if (r)
 457                return r;
 458
 459        ib = &job->ibs[0];
 460
 461        addr = amdgpu_bo_gpu_offset(bo);
 462
 463        /* stitch together an VCE create msg */
 464        ib->length_dw = 0;
 465        ib->ptr[ib->length_dw++] = 0x0000000c; /* len */
 466        ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
 467        ib->ptr[ib->length_dw++] = handle;
 468
 469        if ((ring->adev->vce.fw_version >> 24) >= 52)
 470                ib->ptr[ib->length_dw++] = 0x00000040; /* len */
 471        else
 472                ib->ptr[ib->length_dw++] = 0x00000030; /* len */
 473        ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */
 474        ib->ptr[ib->length_dw++] = 0x00000000;
 475        ib->ptr[ib->length_dw++] = 0x00000042;
 476        ib->ptr[ib->length_dw++] = 0x0000000a;
 477        ib->ptr[ib->length_dw++] = 0x00000001;
 478        ib->ptr[ib->length_dw++] = 0x00000080;
 479        ib->ptr[ib->length_dw++] = 0x00000060;
 480        ib->ptr[ib->length_dw++] = 0x00000100;
 481        ib->ptr[ib->length_dw++] = 0x00000100;
 482        ib->ptr[ib->length_dw++] = 0x0000000c;
 483        ib->ptr[ib->length_dw++] = 0x00000000;
 484        if ((ring->adev->vce.fw_version >> 24) >= 52) {
 485                ib->ptr[ib->length_dw++] = 0x00000000;
 486                ib->ptr[ib->length_dw++] = 0x00000000;
 487                ib->ptr[ib->length_dw++] = 0x00000000;
 488                ib->ptr[ib->length_dw++] = 0x00000000;
 489        }
 490
 491        ib->ptr[ib->length_dw++] = 0x00000014; /* len */
 492        ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
 493        ib->ptr[ib->length_dw++] = upper_32_bits(addr);
 494        ib->ptr[ib->length_dw++] = addr;
 495        ib->ptr[ib->length_dw++] = 0x00000001;
 496
 497        for (i = ib->length_dw; i < ib_size_dw; ++i)
 498                ib->ptr[i] = 0x0;
 499
 500        r = amdgpu_job_submit_direct(job, ring, &f);
 501        if (r)
 502                goto err;
 503
 504        if (fence)
 505                *fence = dma_fence_get(f);
 506        dma_fence_put(f);
 507        return 0;
 508
 509err:
 510        amdgpu_job_free(job);
 511        return r;
 512}
 513
 514/**
 515 * amdgpu_vce_get_destroy_msg - generate a VCE destroy msg
 516 *
 517 * @ring: ring we should submit the msg to
 518 * @handle: VCE session handle to use
 519 * @direct: direct or delayed pool
 520 * @fence: optional fence to return
 521 *
 522 * Close up a stream for HW test or if userspace failed to do so
 523 */
 524static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 525                                      bool direct, struct dma_fence **fence)
 526{
 527        const unsigned ib_size_dw = 1024;
 528        struct amdgpu_job *job;
 529        struct amdgpu_ib *ib;
 530        struct dma_fence *f = NULL;
 531        int i, r;
 532
 533        r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
 534                                     direct ? AMDGPU_IB_POOL_DIRECT :
 535                                     AMDGPU_IB_POOL_DELAYED, &job);
 536        if (r)
 537                return r;
 538
 539        ib = &job->ibs[0];
 540
 541        /* stitch together an VCE destroy msg */
 542        ib->length_dw = 0;
 543        ib->ptr[ib->length_dw++] = 0x0000000c; /* len */
 544        ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
 545        ib->ptr[ib->length_dw++] = handle;
 546
 547        ib->ptr[ib->length_dw++] = 0x00000020; /* len */
 548        ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
 549        ib->ptr[ib->length_dw++] = 0xffffffff; /* next task info, set to 0xffffffff if no */
 550        ib->ptr[ib->length_dw++] = 0x00000001; /* destroy session */
 551        ib->ptr[ib->length_dw++] = 0x00000000;
 552        ib->ptr[ib->length_dw++] = 0x00000000;
 553        ib->ptr[ib->length_dw++] = 0xffffffff; /* feedback is not needed, set to 0xffffffff and firmware will not output feedback */
 554        ib->ptr[ib->length_dw++] = 0x00000000;
 555
 556        ib->ptr[ib->length_dw++] = 0x00000008; /* len */
 557        ib->ptr[ib->length_dw++] = 0x02000001; /* destroy cmd */
 558
 559        for (i = ib->length_dw; i < ib_size_dw; ++i)
 560                ib->ptr[i] = 0x0;
 561
 562        if (direct)
 563                r = amdgpu_job_submit_direct(job, ring, &f);
 564        else
 565                r = amdgpu_job_submit(job, &ring->adev->vce.entity,
 566                                      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
 567        if (r)
 568                goto err;
 569
 570        if (fence)
 571                *fence = dma_fence_get(f);
 572        dma_fence_put(f);
 573        return 0;
 574
 575err:
 576        amdgpu_job_free(job);
 577        return r;
 578}
 579
 580/**
 581 * amdgpu_vce_validate_bo - make sure not to cross 4GB boundary
 582 *
 583 * @p: parser context
 584 * @ib_idx: indirect buffer to use
 585 * @lo: address of lower dword
 586 * @hi: address of higher dword
 587 * @size: minimum size
 588 * @index: bs/fb index
 589 *
 590 * Make sure that no BO cross a 4GB boundary.
 591 */
 592static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx,
 593                                  int lo, int hi, unsigned size, int32_t index)
 594{
 595        int64_t offset = ((uint64_t)size) * ((int64_t)index);
 596        struct ttm_operation_ctx ctx = { false, false };
 597        struct amdgpu_bo_va_mapping *mapping;
 598        unsigned i, fpfn, lpfn;
 599        struct amdgpu_bo *bo;
 600        uint64_t addr;
 601        int r;
 602
 603        addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) |
 604               ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32;
 605        if (index >= 0) {
 606                addr += offset;
 607                fpfn = PAGE_ALIGN(offset) >> PAGE_SHIFT;
 608                lpfn = 0x100000000ULL >> PAGE_SHIFT;
 609        } else {
 610                fpfn = 0;
 611                lpfn = (0x100000000ULL - PAGE_ALIGN(offset)) >> PAGE_SHIFT;
 612        }
 613
 614        r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
 615        if (r) {
 616                DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
 617                          addr, lo, hi, size, index);
 618                return r;
 619        }
 620
 621        for (i = 0; i < bo->placement.num_placement; ++i) {
 622                bo->placements[i].fpfn = max(bo->placements[i].fpfn, fpfn);
 623                bo->placements[i].lpfn = bo->placements[i].lpfn ?
 624                        min(bo->placements[i].lpfn, lpfn) : lpfn;
 625        }
 626        return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 627}
 628
 629
 630/**
 631 * amdgpu_vce_cs_reloc - command submission relocation
 632 *
 633 * @p: parser context
 634 * @ib_idx: indirect buffer to use
 635 * @lo: address of lower dword
 636 * @hi: address of higher dword
 637 * @size: minimum size
 638 * @index: bs/fb index
 639 *
 640 * Patch relocation inside command stream with real buffer address
 641 */
 642static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
 643                               int lo, int hi, unsigned size, uint32_t index)
 644{
 645        struct amdgpu_bo_va_mapping *mapping;
 646        struct amdgpu_bo *bo;
 647        uint64_t addr;
 648        int r;
 649
 650        if (index == 0xffffffff)
 651                index = 0;
 652
 653        addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) |
 654               ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32;
 655        addr += ((uint64_t)size) * ((uint64_t)index);
 656
 657        r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
 658        if (r) {
 659                DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
 660                          addr, lo, hi, size, index);
 661                return r;
 662        }
 663
 664        if ((addr + (uint64_t)size) >
 665            (mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
 666                DRM_ERROR("BO too small for addr 0x%010Lx %d %d\n",
 667                          addr, lo, hi);
 668                return -EINVAL;
 669        }
 670
 671        addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE;
 672        addr += amdgpu_bo_gpu_offset(bo);
 673        addr -= ((uint64_t)size) * ((uint64_t)index);
 674
 675        amdgpu_set_ib_value(p, ib_idx, lo, lower_32_bits(addr));
 676        amdgpu_set_ib_value(p, ib_idx, hi, upper_32_bits(addr));
 677
 678        return 0;
 679}
 680
 681/**
 682 * amdgpu_vce_validate_handle - validate stream handle
 683 *
 684 * @p: parser context
 685 * @handle: handle to validate
 686 * @allocated: allocated a new handle?
 687 *
 688 * Validates the handle and return the found session index or -EINVAL
 689 * we we don't have another free session index.
 690 */
 691static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
 692                                      uint32_t handle, uint32_t *allocated)
 693{
 694        unsigned i;
 695
 696        /* validate the handle */
 697        for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
 698                if (atomic_read(&p->adev->vce.handles[i]) == handle) {
 699                        if (p->adev->vce.filp[i] != p->filp) {
 700                                DRM_ERROR("VCE handle collision detected!\n");
 701                                return -EINVAL;
 702                        }
 703                        return i;
 704                }
 705        }
 706
 707        /* handle not found try to alloc a new one */
 708        for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
 709                if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) {
 710                        p->adev->vce.filp[i] = p->filp;
 711                        p->adev->vce.img_size[i] = 0;
 712                        *allocated |= 1 << i;
 713                        return i;
 714                }
 715        }
 716
 717        DRM_ERROR("No more free VCE handles!\n");
 718        return -EINVAL;
 719}
 720
 721/**
 722 * amdgpu_vce_ring_parse_cs - parse and validate the command stream
 723 *
 724 * @p: parser context
 725 * @ib_idx: indirect buffer to use
 726 */
 727int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 728{
 729        struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
 730        unsigned fb_idx = 0, bs_idx = 0;
 731        int session_idx = -1;
 732        uint32_t destroyed = 0;
 733        uint32_t created = 0;
 734        uint32_t allocated = 0;
 735        uint32_t tmp, handle = 0;
 736        uint32_t *size = &tmp;
 737        unsigned idx;
 738        int i, r = 0;
 739
 740        p->job->vm = NULL;
 741        ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
 742
 743        for (idx = 0; idx < ib->length_dw;) {
 744                uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
 745                uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
 746
 747                if ((len < 8) || (len & 3)) {
 748                        DRM_ERROR("invalid VCE command length (%d)!\n", len);
 749                        r = -EINVAL;
 750                        goto out;
 751                }
 752
 753                switch (cmd) {
 754                case 0x00000002: /* task info */
 755                        fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6);
 756                        bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7);
 757                        break;
 758
 759                case 0x03000001: /* encode */
 760                        r = amdgpu_vce_validate_bo(p, ib_idx, idx + 10,
 761                                                   idx + 9, 0, 0);
 762                        if (r)
 763                                goto out;
 764
 765                        r = amdgpu_vce_validate_bo(p, ib_idx, idx + 12,
 766                                                   idx + 11, 0, 0);
 767                        if (r)
 768                                goto out;
 769                        break;
 770
 771                case 0x05000001: /* context buffer */
 772                        r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3,
 773                                                   idx + 2, 0, 0);
 774                        if (r)
 775                                goto out;
 776                        break;
 777
 778                case 0x05000004: /* video bitstream buffer */
 779                        tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4);
 780                        r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2,
 781                                                   tmp, bs_idx);
 782                        if (r)
 783                                goto out;
 784                        break;
 785
 786                case 0x05000005: /* feedback buffer */
 787                        r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2,
 788                                                   4096, fb_idx);
 789                        if (r)
 790                                goto out;
 791                        break;
 792
 793                case 0x0500000d: /* MV buffer */
 794                        r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3,
 795                                                        idx + 2, 0, 0);
 796                        if (r)
 797                                goto out;
 798
 799                        r = amdgpu_vce_validate_bo(p, ib_idx, idx + 8,
 800                                                        idx + 7, 0, 0);
 801                        if (r)
 802                                goto out;
 803                        break;
 804                }
 805
 806                idx += len / 4;
 807        }
 808
 809        for (idx = 0; idx < ib->length_dw;) {
 810                uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
 811                uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
 812
 813                switch (cmd) {
 814                case 0x00000001: /* session */
 815                        handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
 816                        session_idx = amdgpu_vce_validate_handle(p, handle,
 817                                                                 &allocated);
 818                        if (session_idx < 0) {
 819                                r = session_idx;
 820                                goto out;
 821                        }
 822                        size = &p->adev->vce.img_size[session_idx];
 823                        break;
 824
 825                case 0x00000002: /* task info */
 826                        fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6);
 827                        bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7);
 828                        break;
 829
 830                case 0x01000001: /* create */
 831                        created |= 1 << session_idx;
 832                        if (destroyed & (1 << session_idx)) {
 833                                destroyed &= ~(1 << session_idx);
 834                                allocated |= 1 << session_idx;
 835
 836                        } else if (!(allocated & (1 << session_idx))) {
 837                                DRM_ERROR("Handle already in use!\n");
 838                                r = -EINVAL;
 839                                goto out;
 840                        }
 841
 842                        *size = amdgpu_get_ib_value(p, ib_idx, idx + 8) *
 843                                amdgpu_get_ib_value(p, ib_idx, idx + 10) *
 844                                8 * 3 / 2;
 845                        break;
 846
 847                case 0x04000001: /* config extension */
 848                case 0x04000002: /* pic control */
 849                case 0x04000005: /* rate control */
 850                case 0x04000007: /* motion estimation */
 851                case 0x04000008: /* rdo */
 852                case 0x04000009: /* vui */
 853                case 0x05000002: /* auxiliary buffer */
 854                case 0x05000009: /* clock table */
 855                        break;
 856
 857                case 0x0500000c: /* hw config */
 858                        switch (p->adev->asic_type) {
 859#ifdef CONFIG_DRM_AMDGPU_CIK
 860                        case CHIP_KAVERI:
 861                        case CHIP_MULLINS:
 862#endif
 863                        case CHIP_CARRIZO:
 864                                break;
 865                        default:
 866                                r = -EINVAL;
 867                                goto out;
 868                        }
 869                        break;
 870
 871                case 0x03000001: /* encode */
 872                        r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9,
 873                                                *size, 0);
 874                        if (r)
 875                                goto out;
 876
 877                        r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11,
 878                                                *size / 3, 0);
 879                        if (r)
 880                                goto out;
 881                        break;
 882
 883                case 0x02000001: /* destroy */
 884                        destroyed |= 1 << session_idx;
 885                        break;
 886
 887                case 0x05000001: /* context buffer */
 888                        r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
 889                                                *size * 2, 0);
 890                        if (r)
 891                                goto out;
 892                        break;
 893
 894                case 0x05000004: /* video bitstream buffer */
 895                        tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4);
 896                        r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
 897                                                tmp, bs_idx);
 898                        if (r)
 899                                goto out;
 900                        break;
 901
 902                case 0x05000005: /* feedback buffer */
 903                        r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
 904                                                4096, fb_idx);
 905                        if (r)
 906                                goto out;
 907                        break;
 908
 909                case 0x0500000d: /* MV buffer */
 910                        r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3,
 911                                                        idx + 2, *size, 0);
 912                        if (r)
 913                                goto out;
 914
 915                        r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 8,
 916                                                        idx + 7, *size / 12, 0);
 917                        if (r)
 918                                goto out;
 919                        break;
 920
 921                default:
 922                        DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
 923                        r = -EINVAL;
 924                        goto out;
 925                }
 926
 927                if (session_idx == -1) {
 928                        DRM_ERROR("no session command at start of IB\n");
 929                        r = -EINVAL;
 930                        goto out;
 931                }
 932
 933                idx += len / 4;
 934        }
 935
 936        if (allocated & ~created) {
 937                DRM_ERROR("New session without create command!\n");
 938                r = -ENOENT;
 939        }
 940
 941out:
 942        if (!r) {
 943                /* No error, free all destroyed handle slots */
 944                tmp = destroyed;
 945        } else {
 946                /* Error during parsing, free all allocated handle slots */
 947                tmp = allocated;
 948        }
 949
 950        for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
 951                if (tmp & (1 << i))
 952                        atomic_set(&p->adev->vce.handles[i], 0);
 953
 954        return r;
 955}
 956
 957/**
 958 * amdgpu_vce_ring_parse_cs_vm - parse the command stream in VM mode
 959 *
 960 * @p: parser context
 961 * @ib_idx: indirect buffer to use
 962 */
 963int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 964{
 965        struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
 966        int session_idx = -1;
 967        uint32_t destroyed = 0;
 968        uint32_t created = 0;
 969        uint32_t allocated = 0;
 970        uint32_t tmp, handle = 0;
 971        int i, r = 0, idx = 0;
 972
 973        while (idx < ib->length_dw) {
 974                uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
 975                uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
 976
 977                if ((len < 8) || (len & 3)) {
 978                        DRM_ERROR("invalid VCE command length (%d)!\n", len);
 979                        r = -EINVAL;
 980                        goto out;
 981                }
 982
 983                switch (cmd) {
 984                case 0x00000001: /* session */
 985                        handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
 986                        session_idx = amdgpu_vce_validate_handle(p, handle,
 987                                                                 &allocated);
 988                        if (session_idx < 0) {
 989                                r = session_idx;
 990                                goto out;
 991                        }
 992                        break;
 993
 994                case 0x01000001: /* create */
 995                        created |= 1 << session_idx;
 996                        if (destroyed & (1 << session_idx)) {
 997                                destroyed &= ~(1 << session_idx);
 998                                allocated |= 1 << session_idx;
 999
1000                        } else if (!(allocated & (1 << session_idx))) {
1001                                DRM_ERROR("Handle already in use!\n");
1002                                r = -EINVAL;
1003                                goto out;
1004                        }
1005
1006                        break;
1007
1008                case 0x02000001: /* destroy */
1009                        destroyed |= 1 << session_idx;
1010                        break;
1011
1012                default:
1013                        break;
1014                }
1015
1016                if (session_idx == -1) {
1017                        DRM_ERROR("no session command at start of IB\n");
1018                        r = -EINVAL;
1019                        goto out;
1020                }
1021
1022                idx += len / 4;
1023        }
1024
1025        if (allocated & ~created) {
1026                DRM_ERROR("New session without create command!\n");
1027                r = -ENOENT;
1028        }
1029
1030out:
1031        if (!r) {
1032                /* No error, free all destroyed handle slots */
1033                tmp = destroyed;
1034                amdgpu_ib_free(p->adev, ib, NULL);
1035        } else {
1036                /* Error during parsing, free all allocated handle slots */
1037                tmp = allocated;
1038        }
1039
1040        for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
1041                if (tmp & (1 << i))
1042                        atomic_set(&p->adev->vce.handles[i], 0);
1043
1044        return r;
1045}
1046
1047/**
1048 * amdgpu_vce_ring_emit_ib - execute indirect buffer
1049 *
1050 * @ring: engine to use
1051 * @job: job to retrieve vmid from
1052 * @ib: the IB to execute
1053 * @flags: unused
1054 *
1055 */
1056void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring,
1057                                struct amdgpu_job *job,
1058                                struct amdgpu_ib *ib,
1059                                uint32_t flags)
1060{
1061        amdgpu_ring_write(ring, VCE_CMD_IB);
1062        amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
1063        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
1064        amdgpu_ring_write(ring, ib->length_dw);
1065}
1066
1067/**
1068 * amdgpu_vce_ring_emit_fence - add a fence command to the ring
1069 *
1070 * @ring: engine to use
1071 * @addr: address
1072 * @seq: sequence number
1073 * @flags: fence related flags
1074 *
1075 */
1076void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
1077                                unsigned flags)
1078{
1079        WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
1080
1081        amdgpu_ring_write(ring, VCE_CMD_FENCE);
1082        amdgpu_ring_write(ring, addr);
1083        amdgpu_ring_write(ring, upper_32_bits(addr));
1084        amdgpu_ring_write(ring, seq);
1085        amdgpu_ring_write(ring, VCE_CMD_TRAP);
1086        amdgpu_ring_write(ring, VCE_CMD_END);
1087}
1088
1089/**
1090 * amdgpu_vce_ring_test_ring - test if VCE ring is working
1091 *
1092 * @ring: the engine to test on
1093 *
1094 */
1095int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
1096{
1097        struct amdgpu_device *adev = ring->adev;
1098        uint32_t rptr;
1099        unsigned i;
1100        int r, timeout = adev->usec_timeout;
1101
1102        /* skip ring test for sriov*/
1103        if (amdgpu_sriov_vf(adev))
1104                return 0;
1105
1106        r = amdgpu_ring_alloc(ring, 16);
1107        if (r)
1108                return r;
1109
1110        rptr = amdgpu_ring_get_rptr(ring);
1111
1112        amdgpu_ring_write(ring, VCE_CMD_END);
1113        amdgpu_ring_commit(ring);
1114
1115        for (i = 0; i < timeout; i++) {
1116                if (amdgpu_ring_get_rptr(ring) != rptr)
1117                        break;
1118                udelay(1);
1119        }
1120
1121        if (i >= timeout)
1122                r = -ETIMEDOUT;
1123
1124        return r;
1125}
1126
1127/**
1128 * amdgpu_vce_ring_test_ib - test if VCE IBs are working
1129 *
1130 * @ring: the engine to test on
1131 * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
1132 *
1133 */
1134int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1135{
1136        struct dma_fence *fence = NULL;
1137        struct amdgpu_bo *bo = NULL;
1138        long r;
1139
1140        /* skip vce ring1/2 ib test for now, since it's not reliable */
1141        if (ring != &ring->adev->vce.ring[0])
1142                return 0;
1143
1144        r = amdgpu_bo_create_reserved(ring->adev, 512, PAGE_SIZE,
1145                                      AMDGPU_GEM_DOMAIN_VRAM,
1146                                      &bo, NULL, NULL);
1147        if (r)
1148                return r;
1149
1150        r = amdgpu_vce_get_create_msg(ring, 1, bo, NULL);
1151        if (r)
1152                goto error;
1153
1154        r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence);
1155        if (r)
1156                goto error;
1157
1158        r = dma_fence_wait_timeout(fence, false, timeout);
1159        if (r == 0)
1160                r = -ETIMEDOUT;
1161        else if (r > 0)
1162                r = 0;
1163
1164error:
1165        dma_fence_put(fence);
1166        amdgpu_bo_unreserve(bo);
1167        amdgpu_bo_free_kernel(&bo, NULL, NULL);
1168        return r;
1169}
1170