linux/drivers/gpu/drm/radeon/radeon_uvd.c
<<
>>
Prefs
   1/*
   2 * Copyright 2011 Advanced Micro Devices, Inc.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the
   7 * "Software"), to deal in the Software without restriction, including
   8 * without limitation the rights to use, copy, modify, merge, publish,
   9 * distribute, sub license, and/or sell copies of the Software, and to
  10 * permit persons to whom the Software is furnished to do so, subject to
  11 * the following conditions:
  12 *
  13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20 *
  21 * The above copyright notice and this permission notice (including the
  22 * next paragraph) shall be included in all copies or substantial portions
  23 * of the Software.
  24 *
  25 */
  26/*
  27 * Authors:
  28 *    Christian König <deathsimple@vodafone.de>
  29 */
  30
  31#include <linux/firmware.h>
  32#include <linux/module.h>
  33#include <drm/drmP.h>
  34#include <drm/drm.h>
  35
  36#include "radeon.h"
  37#include "r600d.h"
  38
  39/* 1 second timeout */
  40#define UVD_IDLE_TIMEOUT_MS     1000
  41
  42/* Firmware Names */
  43#define FIRMWARE_R600           "radeon/R600_uvd.bin"
  44#define FIRMWARE_RS780          "radeon/RS780_uvd.bin"
  45#define FIRMWARE_RV770          "radeon/RV770_uvd.bin"
  46#define FIRMWARE_RV710          "radeon/RV710_uvd.bin"
  47#define FIRMWARE_CYPRESS        "radeon/CYPRESS_uvd.bin"
  48#define FIRMWARE_SUMO           "radeon/SUMO_uvd.bin"
  49#define FIRMWARE_TAHITI         "radeon/TAHITI_uvd.bin"
  50#define FIRMWARE_BONAIRE        "radeon/BONAIRE_uvd.bin"
  51
  52MODULE_FIRMWARE(FIRMWARE_R600);
  53MODULE_FIRMWARE(FIRMWARE_RS780);
  54MODULE_FIRMWARE(FIRMWARE_RV770);
  55MODULE_FIRMWARE(FIRMWARE_RV710);
  56MODULE_FIRMWARE(FIRMWARE_CYPRESS);
  57MODULE_FIRMWARE(FIRMWARE_SUMO);
  58MODULE_FIRMWARE(FIRMWARE_TAHITI);
  59MODULE_FIRMWARE(FIRMWARE_BONAIRE);
  60
  61static void radeon_uvd_idle_work_handler(struct work_struct *work);
  62
  63int radeon_uvd_init(struct radeon_device *rdev)
  64{
  65        unsigned long bo_size;
  66        const char *fw_name;
  67        int i, r;
  68
  69        INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler);
  70
  71        switch (rdev->family) {
  72        case CHIP_RV610:
  73        case CHIP_RV630:
  74        case CHIP_RV670:
  75        case CHIP_RV620:
  76        case CHIP_RV635:
  77                fw_name = FIRMWARE_R600;
  78                break;
  79
  80        case CHIP_RS780:
  81        case CHIP_RS880:
  82                fw_name = FIRMWARE_RS780;
  83                break;
  84
  85        case CHIP_RV770:
  86                fw_name = FIRMWARE_RV770;
  87                break;
  88
  89        case CHIP_RV710:
  90        case CHIP_RV730:
  91        case CHIP_RV740:
  92                fw_name = FIRMWARE_RV710;
  93                break;
  94
  95        case CHIP_CYPRESS:
  96        case CHIP_HEMLOCK:
  97        case CHIP_JUNIPER:
  98        case CHIP_REDWOOD:
  99        case CHIP_CEDAR:
 100                fw_name = FIRMWARE_CYPRESS;
 101                break;
 102
 103        case CHIP_SUMO:
 104        case CHIP_SUMO2:
 105        case CHIP_PALM:
 106        case CHIP_CAYMAN:
 107        case CHIP_BARTS:
 108        case CHIP_TURKS:
 109        case CHIP_CAICOS:
 110                fw_name = FIRMWARE_SUMO;
 111                break;
 112
 113        case CHIP_TAHITI:
 114        case CHIP_VERDE:
 115        case CHIP_PITCAIRN:
 116        case CHIP_ARUBA:
 117        case CHIP_OLAND:
 118                fw_name = FIRMWARE_TAHITI;
 119                break;
 120
 121        case CHIP_BONAIRE:
 122        case CHIP_KABINI:
 123        case CHIP_KAVERI:
 124        case CHIP_HAWAII:
 125        case CHIP_MULLINS:
 126                fw_name = FIRMWARE_BONAIRE;
 127                break;
 128
 129        default:
 130                return -EINVAL;
 131        }
 132
 133        r = request_firmware(&rdev->uvd_fw, fw_name, rdev->dev);
 134        if (r) {
 135                dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n",
 136                        fw_name);
 137                return r;
 138        }
 139
 140        bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) +
 141                  RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE +
 142                  RADEON_GPU_PAGE_SIZE;
 143        r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true,
 144                             RADEON_GEM_DOMAIN_VRAM, 0, NULL,
 145                             NULL, &rdev->uvd.vcpu_bo);
 146        if (r) {
 147                dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r);
 148                return r;
 149        }
 150
 151        r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false);
 152        if (r) {
 153                radeon_bo_unref(&rdev->uvd.vcpu_bo);
 154                dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r);
 155                return r;
 156        }
 157
 158        r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM,
 159                          &rdev->uvd.gpu_addr);
 160        if (r) {
 161                radeon_bo_unreserve(rdev->uvd.vcpu_bo);
 162                radeon_bo_unref(&rdev->uvd.vcpu_bo);
 163                dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r);
 164                return r;
 165        }
 166
 167        r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr);
 168        if (r) {
 169                dev_err(rdev->dev, "(%d) UVD map failed\n", r);
 170                return r;
 171        }
 172
 173        radeon_bo_unreserve(rdev->uvd.vcpu_bo);
 174
 175        for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
 176                atomic_set(&rdev->uvd.handles[i], 0);
 177                rdev->uvd.filp[i] = NULL;
 178                rdev->uvd.img_size[i] = 0;
 179        }
 180
 181        return 0;
 182}
 183
 184void radeon_uvd_fini(struct radeon_device *rdev)
 185{
 186        int r;
 187
 188        if (rdev->uvd.vcpu_bo == NULL)
 189                return;
 190
 191        r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false);
 192        if (!r) {
 193                radeon_bo_kunmap(rdev->uvd.vcpu_bo);
 194                radeon_bo_unpin(rdev->uvd.vcpu_bo);
 195                radeon_bo_unreserve(rdev->uvd.vcpu_bo);
 196        }
 197
 198        radeon_bo_unref(&rdev->uvd.vcpu_bo);
 199
 200        radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX]);
 201
 202        release_firmware(rdev->uvd_fw);
 203}
 204
 205int radeon_uvd_suspend(struct radeon_device *rdev)
 206{
 207        int i, r;
 208
 209        if (rdev->uvd.vcpu_bo == NULL)
 210                return 0;
 211
 212        for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
 213                uint32_t handle = atomic_read(&rdev->uvd.handles[i]);
 214                if (handle != 0) {
 215                        struct radeon_fence *fence;
 216
 217                        radeon_uvd_note_usage(rdev);
 218
 219                        r = radeon_uvd_get_destroy_msg(rdev,
 220                                R600_RING_TYPE_UVD_INDEX, handle, &fence);
 221                        if (r) {
 222                                DRM_ERROR("Error destroying UVD (%d)!\n", r);
 223                                continue;
 224                        }
 225
 226                        radeon_fence_wait(fence, false);
 227                        radeon_fence_unref(&fence);
 228
 229                        rdev->uvd.filp[i] = NULL;
 230                        atomic_set(&rdev->uvd.handles[i], 0);
 231                }
 232        }
 233
 234        return 0;
 235}
 236
 237int radeon_uvd_resume(struct radeon_device *rdev)
 238{
 239        unsigned size;
 240        void *ptr;
 241
 242        if (rdev->uvd.vcpu_bo == NULL)
 243                return -EINVAL;
 244
 245        memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size);
 246
 247        size = radeon_bo_size(rdev->uvd.vcpu_bo);
 248        size -= rdev->uvd_fw->size;
 249
 250        ptr = rdev->uvd.cpu_addr;
 251        ptr += rdev->uvd_fw->size;
 252
 253        memset(ptr, 0, size);
 254
 255        return 0;
 256}
 257
 258void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo,
 259                                       uint32_t allowed_domains)
 260{
 261        int i;
 262
 263        for (i = 0; i < rbo->placement.num_placement; ++i) {
 264                rbo->placements[i].fpfn = 0 >> PAGE_SHIFT;
 265                rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
 266        }
 267
 268        /* If it must be in VRAM it must be in the first segment as well */
 269        if (allowed_domains == RADEON_GEM_DOMAIN_VRAM)
 270                return;
 271
 272        /* abort if we already have more than one placement */
 273        if (rbo->placement.num_placement > 1)
 274                return;
 275
 276        /* add another 256MB segment */
 277        rbo->placements[1] = rbo->placements[0];
 278        rbo->placements[1].fpfn += (256 * 1024 * 1024) >> PAGE_SHIFT;
 279        rbo->placements[1].lpfn += (256 * 1024 * 1024) >> PAGE_SHIFT;
 280        rbo->placement.num_placement++;
 281        rbo->placement.num_busy_placement++;
 282}
 283
 284void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp)
 285{
 286        int i, r;
 287        for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
 288                uint32_t handle = atomic_read(&rdev->uvd.handles[i]);
 289                if (handle != 0 && rdev->uvd.filp[i] == filp) {
 290                        struct radeon_fence *fence;
 291
 292                        radeon_uvd_note_usage(rdev);
 293
 294                        r = radeon_uvd_get_destroy_msg(rdev,
 295                                R600_RING_TYPE_UVD_INDEX, handle, &fence);
 296                        if (r) {
 297                                DRM_ERROR("Error destroying UVD (%d)!\n", r);
 298                                continue;
 299                        }
 300
 301                        radeon_fence_wait(fence, false);
 302                        radeon_fence_unref(&fence);
 303
 304                        rdev->uvd.filp[i] = NULL;
 305                        atomic_set(&rdev->uvd.handles[i], 0);
 306                }
 307        }
 308}
 309
 310static int radeon_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[])
 311{
 312        unsigned stream_type = msg[4];
 313        unsigned width = msg[6];
 314        unsigned height = msg[7];
 315        unsigned dpb_size = msg[9];
 316        unsigned pitch = msg[28];
 317
 318        unsigned width_in_mb = width / 16;
 319        unsigned height_in_mb = ALIGN(height / 16, 2);
 320
 321        unsigned image_size, tmp, min_dpb_size;
 322
 323        image_size = width * height;
 324        image_size += image_size / 2;
 325        image_size = ALIGN(image_size, 1024);
 326
 327        switch (stream_type) {
 328        case 0: /* H264 */
 329
 330                /* reference picture buffer */
 331                min_dpb_size = image_size * 17;
 332
 333                /* macroblock context buffer */
 334                min_dpb_size += width_in_mb * height_in_mb * 17 * 192;
 335
 336                /* IT surface buffer */
 337                min_dpb_size += width_in_mb * height_in_mb * 32;
 338                break;
 339
 340        case 1: /* VC1 */
 341
 342                /* reference picture buffer */
 343                min_dpb_size = image_size * 3;
 344
 345                /* CONTEXT_BUFFER */
 346                min_dpb_size += width_in_mb * height_in_mb * 128;
 347
 348                /* IT surface buffer */
 349                min_dpb_size += width_in_mb * 64;
 350
 351                /* DB surface buffer */
 352                min_dpb_size += width_in_mb * 128;
 353
 354                /* BP */
 355                tmp = max(width_in_mb, height_in_mb);
 356                min_dpb_size += ALIGN(tmp * 7 * 16, 64);
 357                break;
 358
 359        case 3: /* MPEG2 */
 360
 361                /* reference picture buffer */
 362                min_dpb_size = image_size * 3;
 363                break;
 364
 365        case 4: /* MPEG4 */
 366
 367                /* reference picture buffer */
 368                min_dpb_size = image_size * 3;
 369
 370                /* CM */
 371                min_dpb_size += width_in_mb * height_in_mb * 64;
 372
 373                /* IT surface buffer */
 374                min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64);
 375                break;
 376
 377        default:
 378                DRM_ERROR("UVD codec not handled %d!\n", stream_type);
 379                return -EINVAL;
 380        }
 381
 382        if (width > pitch) {
 383                DRM_ERROR("Invalid UVD decoding target pitch!\n");
 384                return -EINVAL;
 385        }
 386
 387        if (dpb_size < min_dpb_size) {
 388                DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n",
 389                          dpb_size, min_dpb_size);
 390                return -EINVAL;
 391        }
 392
 393        buf_sizes[0x1] = dpb_size;
 394        buf_sizes[0x2] = image_size;
 395        return 0;
 396}
 397
 398static int radeon_uvd_validate_codec(struct radeon_cs_parser *p,
 399                                     unsigned stream_type)
 400{
 401        switch (stream_type) {
 402        case 0: /* H264 */
 403        case 1: /* VC1 */
 404                /* always supported */
 405                return 0;
 406
 407        case 3: /* MPEG2 */
 408        case 4: /* MPEG4 */
 409                /* only since UVD 3 */
 410                if (p->rdev->family >= CHIP_PALM)
 411                        return 0;
 412
 413                /* fall through */
 414        default:
 415                DRM_ERROR("UVD codec not supported by hardware %d!\n",
 416                          stream_type);
 417                return -EINVAL;
 418        }
 419}
 420
 421static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
 422                             unsigned offset, unsigned buf_sizes[])
 423{
 424        int32_t *msg, msg_type, handle;
 425        unsigned img_size = 0;
 426        struct fence *f;
 427        void *ptr;
 428
 429        int i, r;
 430
 431        if (offset & 0x3F) {
 432                DRM_ERROR("UVD messages must be 64 byte aligned!\n");
 433                return -EINVAL;
 434        }
 435
 436        f = reservation_object_get_excl(bo->tbo.resv);
 437        if (f) {
 438                r = radeon_fence_wait((struct radeon_fence *)f, false);
 439                if (r) {
 440                        DRM_ERROR("Failed waiting for UVD message (%d)!\n", r);
 441                        return r;
 442                }
 443        }
 444
 445        r = radeon_bo_kmap(bo, &ptr);
 446        if (r) {
 447                DRM_ERROR("Failed mapping the UVD message (%d)!\n", r);
 448                return r;
 449        }
 450
 451        msg = ptr + offset;
 452
 453        msg_type = msg[1];
 454        handle = msg[2];
 455
 456        if (handle == 0) {
 457                DRM_ERROR("Invalid UVD handle!\n");
 458                return -EINVAL;
 459        }
 460
 461        switch (msg_type) {
 462        case 0:
 463                /* it's a create msg, calc image size (width * height) */
 464                img_size = msg[7] * msg[8];
 465
 466                r = radeon_uvd_validate_codec(p, msg[4]);
 467                radeon_bo_kunmap(bo);
 468                if (r)
 469                        return r;
 470
 471                /* try to alloc a new handle */
 472                for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
 473                        if (atomic_read(&p->rdev->uvd.handles[i]) == handle) {
 474                                DRM_ERROR("Handle 0x%x already in use!\n", handle);
 475                                return -EINVAL;
 476                        }
 477
 478                        if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) {
 479                                p->rdev->uvd.filp[i] = p->filp;
 480                                p->rdev->uvd.img_size[i] = img_size;
 481                                return 0;
 482                        }
 483                }
 484
 485                DRM_ERROR("No more free UVD handles!\n");
 486                return -EINVAL;
 487
 488        case 1:
 489                /* it's a decode msg, validate codec and calc buffer sizes */
 490                r = radeon_uvd_validate_codec(p, msg[4]);
 491                if (!r)
 492                        r = radeon_uvd_cs_msg_decode(msg, buf_sizes);
 493                radeon_bo_kunmap(bo);
 494                if (r)
 495                        return r;
 496
 497                /* validate the handle */
 498                for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
 499                        if (atomic_read(&p->rdev->uvd.handles[i]) == handle) {
 500                                if (p->rdev->uvd.filp[i] != p->filp) {
 501                                        DRM_ERROR("UVD handle collision detected!\n");
 502                                        return -EINVAL;
 503                                }
 504                                return 0;
 505                        }
 506                }
 507
 508                DRM_ERROR("Invalid UVD handle 0x%x!\n", handle);
 509                return -ENOENT;
 510
 511        case 2:
 512                /* it's a destroy msg, free the handle */
 513                for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i)
 514                        atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0);
 515                radeon_bo_kunmap(bo);
 516                return 0;
 517
 518        default:
 519
 520                DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
 521                return -EINVAL;
 522        }
 523
 524        BUG();
 525        return -EINVAL;
 526}
 527
 528static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p,
 529                               int data0, int data1,
 530                               unsigned buf_sizes[], bool *has_msg_cmd)
 531{
 532        struct radeon_cs_chunk *relocs_chunk;
 533        struct radeon_bo_list *reloc;
 534        unsigned idx, cmd, offset;
 535        uint64_t start, end;
 536        int r;
 537
 538        relocs_chunk = p->chunk_relocs;
 539        offset = radeon_get_ib_value(p, data0);
 540        idx = radeon_get_ib_value(p, data1);
 541        if (idx >= relocs_chunk->length_dw) {
 542                DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
 543                          idx, relocs_chunk->length_dw);
 544                return -EINVAL;
 545        }
 546
 547        reloc = &p->relocs[(idx / 4)];
 548        start = reloc->gpu_offset;
 549        end = start + radeon_bo_size(reloc->robj);
 550        start += offset;
 551
 552        p->ib.ptr[data0] = start & 0xFFFFFFFF;
 553        p->ib.ptr[data1] = start >> 32;
 554
 555        cmd = radeon_get_ib_value(p, p->idx) >> 1;
 556
 557        if (cmd < 0x4) {
 558                if (end <= start) {
 559                        DRM_ERROR("invalid reloc offset %X!\n", offset);
 560                        return -EINVAL;
 561                }
 562                if ((end - start) < buf_sizes[cmd]) {
 563                        DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
 564                                  (unsigned)(end - start), buf_sizes[cmd]);
 565                        return -EINVAL;
 566                }
 567
 568        } else if (cmd != 0x100) {
 569                DRM_ERROR("invalid UVD command %X!\n", cmd);
 570                return -EINVAL;
 571        }
 572
 573        if ((start >> 28) != ((end - 1) >> 28)) {
 574                DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n",
 575                          start, end);
 576                return -EINVAL;
 577        }
 578
 579        /* TODO: is this still necessary on NI+ ? */
 580        if ((cmd == 0 || cmd == 0x3) &&
 581            (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) {
 582                DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n",
 583                          start, end);
 584                return -EINVAL;
 585        }
 586
 587        if (cmd == 0) {
 588                if (*has_msg_cmd) {
 589                        DRM_ERROR("More than one message in a UVD-IB!\n");
 590                        return -EINVAL;
 591                }
 592                *has_msg_cmd = true;
 593                r = radeon_uvd_cs_msg(p, reloc->robj, offset, buf_sizes);
 594                if (r)
 595                        return r;
 596        } else if (!*has_msg_cmd) {
 597                DRM_ERROR("Message needed before other commands are send!\n");
 598                return -EINVAL;
 599        }
 600
 601        return 0;
 602}
 603
 604static int radeon_uvd_cs_reg(struct radeon_cs_parser *p,
 605                             struct radeon_cs_packet *pkt,
 606                             int *data0, int *data1,
 607                             unsigned buf_sizes[],
 608                             bool *has_msg_cmd)
 609{
 610        int i, r;
 611
 612        p->idx++;
 613        for (i = 0; i <= pkt->count; ++i) {
 614                switch (pkt->reg + i*4) {
 615                case UVD_GPCOM_VCPU_DATA0:
 616                        *data0 = p->idx;
 617                        break;
 618                case UVD_GPCOM_VCPU_DATA1:
 619                        *data1 = p->idx;
 620                        break;
 621                case UVD_GPCOM_VCPU_CMD:
 622                        r = radeon_uvd_cs_reloc(p, *data0, *data1,
 623                                                buf_sizes, has_msg_cmd);
 624                        if (r)
 625                                return r;
 626                        break;
 627                case UVD_ENGINE_CNTL:
 628                        break;
 629                default:
 630                        DRM_ERROR("Invalid reg 0x%X!\n",
 631                                  pkt->reg + i*4);
 632                        return -EINVAL;
 633                }
 634                p->idx++;
 635        }
 636        return 0;
 637}
 638
 639int radeon_uvd_cs_parse(struct radeon_cs_parser *p)
 640{
 641        struct radeon_cs_packet pkt;
 642        int r, data0 = 0, data1 = 0;
 643
 644        /* does the IB has a msg command */
 645        bool has_msg_cmd = false;
 646
 647        /* minimum buffer sizes */
 648        unsigned buf_sizes[] = {
 649                [0x00000000]    =       2048,
 650                [0x00000001]    =       32 * 1024 * 1024,
 651                [0x00000002]    =       2048 * 1152 * 3,
 652                [0x00000003]    =       2048,
 653        };
 654
 655        if (p->chunk_ib->length_dw % 16) {
 656                DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
 657                          p->chunk_ib->length_dw);
 658                return -EINVAL;
 659        }
 660
 661        if (p->chunk_relocs == NULL) {
 662                DRM_ERROR("No relocation chunk !\n");
 663                return -EINVAL;
 664        }
 665
 666
 667        do {
 668                r = radeon_cs_packet_parse(p, &pkt, p->idx);
 669                if (r)
 670                        return r;
 671                switch (pkt.type) {
 672                case RADEON_PACKET_TYPE0:
 673                        r = radeon_uvd_cs_reg(p, &pkt, &data0, &data1,
 674                                              buf_sizes, &has_msg_cmd);
 675                        if (r)
 676                                return r;
 677                        break;
 678                case RADEON_PACKET_TYPE2:
 679                        p->idx += pkt.count + 2;
 680                        break;
 681                default:
 682                        DRM_ERROR("Unknown packet type %d !\n", pkt.type);
 683                        return -EINVAL;
 684                }
 685        } while (p->idx < p->chunk_ib->length_dw);
 686
 687        if (!has_msg_cmd) {
 688                DRM_ERROR("UVD-IBs need a msg command!\n");
 689                return -EINVAL;
 690        }
 691
 692        return 0;
 693}
 694
 695static int radeon_uvd_send_msg(struct radeon_device *rdev,
 696                               int ring, uint64_t addr,
 697                               struct radeon_fence **fence)
 698{
 699        struct radeon_ib ib;
 700        int i, r;
 701
 702        r = radeon_ib_get(rdev, ring, &ib, NULL, 64);
 703        if (r)
 704                return r;
 705
 706        ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0);
 707        ib.ptr[1] = addr;
 708        ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0);
 709        ib.ptr[3] = addr >> 32;
 710        ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0);
 711        ib.ptr[5] = 0;
 712        for (i = 6; i < 16; ++i)
 713                ib.ptr[i] = PACKET2(0);
 714        ib.length_dw = 16;
 715
 716        r = radeon_ib_schedule(rdev, &ib, NULL, false);
 717
 718        if (fence)
 719                *fence = radeon_fence_ref(ib.fence);
 720
 721        radeon_ib_free(rdev, &ib);
 722        return r;
 723}
 724
 725/*
 726 * multiple fence commands without any stream commands in between can
 727 * crash the vcpu so just try to emmit a dummy create/destroy msg to
 728 * avoid this
 729 */
 730int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
 731                              uint32_t handle, struct radeon_fence **fence)
 732{
 733        /* we use the last page of the vcpu bo for the UVD message */
 734        uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) -
 735                RADEON_GPU_PAGE_SIZE;
 736
 737        uint32_t *msg = rdev->uvd.cpu_addr + offs;
 738        uint64_t addr = rdev->uvd.gpu_addr + offs;
 739
 740        int r, i;
 741
 742        r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true);
 743        if (r)
 744                return r;
 745
 746        /* stitch together an UVD create msg */
 747        msg[0] = cpu_to_le32(0x00000de4);
 748        msg[1] = cpu_to_le32(0x00000000);
 749        msg[2] = cpu_to_le32(handle);
 750        msg[3] = cpu_to_le32(0x00000000);
 751        msg[4] = cpu_to_le32(0x00000000);
 752        msg[5] = cpu_to_le32(0x00000000);
 753        msg[6] = cpu_to_le32(0x00000000);
 754        msg[7] = cpu_to_le32(0x00000780);
 755        msg[8] = cpu_to_le32(0x00000440);
 756        msg[9] = cpu_to_le32(0x00000000);
 757        msg[10] = cpu_to_le32(0x01b37000);
 758        for (i = 11; i < 1024; ++i)
 759                msg[i] = cpu_to_le32(0x0);
 760
 761        r = radeon_uvd_send_msg(rdev, ring, addr, fence);
 762        radeon_bo_unreserve(rdev->uvd.vcpu_bo);
 763        return r;
 764}
 765
 766int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
 767                               uint32_t handle, struct radeon_fence **fence)
 768{
 769        /* we use the last page of the vcpu bo for the UVD message */
 770        uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) -
 771                RADEON_GPU_PAGE_SIZE;
 772
 773        uint32_t *msg = rdev->uvd.cpu_addr + offs;
 774        uint64_t addr = rdev->uvd.gpu_addr + offs;
 775
 776        int r, i;
 777
 778        r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true);
 779        if (r)
 780                return r;
 781
 782        /* stitch together an UVD destroy msg */
 783        msg[0] = cpu_to_le32(0x00000de4);
 784        msg[1] = cpu_to_le32(0x00000002);
 785        msg[2] = cpu_to_le32(handle);
 786        msg[3] = cpu_to_le32(0x00000000);
 787        for (i = 4; i < 1024; ++i)
 788                msg[i] = cpu_to_le32(0x0);
 789
 790        r = radeon_uvd_send_msg(rdev, ring, addr, fence);
 791        radeon_bo_unreserve(rdev->uvd.vcpu_bo);
 792        return r;
 793}
 794
 795/**
 796 * radeon_uvd_count_handles - count number of open streams
 797 *
 798 * @rdev: radeon_device pointer
 799 * @sd: number of SD streams
 800 * @hd: number of HD streams
 801 *
 802 * Count the number of open SD/HD streams as a hint for power mangement
 803 */
 804static void radeon_uvd_count_handles(struct radeon_device *rdev,
 805                                     unsigned *sd, unsigned *hd)
 806{
 807        unsigned i;
 808
 809        *sd = 0;
 810        *hd = 0;
 811
 812        for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
 813                if (!atomic_read(&rdev->uvd.handles[i]))
 814                        continue;
 815
 816                if (rdev->uvd.img_size[i] >= 720*576)
 817                        ++(*hd);
 818                else
 819                        ++(*sd);
 820        }
 821}
 822
 823static void radeon_uvd_idle_work_handler(struct work_struct *work)
 824{
 825        struct radeon_device *rdev =
 826                container_of(work, struct radeon_device, uvd.idle_work.work);
 827
 828        if (radeon_fence_count_emitted(rdev, R600_RING_TYPE_UVD_INDEX) == 0) {
 829                if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
 830                        radeon_uvd_count_handles(rdev, &rdev->pm.dpm.sd,
 831                                                 &rdev->pm.dpm.hd);
 832                        radeon_dpm_enable_uvd(rdev, false);
 833                } else {
 834                        radeon_set_uvd_clocks(rdev, 0, 0);
 835                }
 836        } else {
 837                schedule_delayed_work(&rdev->uvd.idle_work,
 838                                      msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));
 839        }
 840}
 841
 842void radeon_uvd_note_usage(struct radeon_device *rdev)
 843{
 844        bool streams_changed = false;
 845        bool set_clocks = !cancel_delayed_work_sync(&rdev->uvd.idle_work);
 846        set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work,
 847                                            msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));
 848
 849        if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
 850                unsigned hd = 0, sd = 0;
 851                radeon_uvd_count_handles(rdev, &sd, &hd);
 852                if ((rdev->pm.dpm.sd != sd) ||
 853                    (rdev->pm.dpm.hd != hd)) {
 854                        rdev->pm.dpm.sd = sd;
 855                        rdev->pm.dpm.hd = hd;
 856                        /* disable this for now */
 857                        /*streams_changed = true;*/
 858                }
 859        }
 860
 861        if (set_clocks || streams_changed) {
 862                if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
 863                        radeon_dpm_enable_uvd(rdev, true);
 864                } else {
 865                        radeon_set_uvd_clocks(rdev, 53300, 40000);
 866                }
 867        }
 868}
 869
 870static unsigned radeon_uvd_calc_upll_post_div(unsigned vco_freq,
 871                                              unsigned target_freq,
 872                                              unsigned pd_min,
 873                                              unsigned pd_even)
 874{
 875        unsigned post_div = vco_freq / target_freq;
 876
 877        /* adjust to post divider minimum value */
 878        if (post_div < pd_min)
 879                post_div = pd_min;
 880
 881        /* we alway need a frequency less than or equal the target */
 882        if ((vco_freq / post_div) > target_freq)
 883                post_div += 1;
 884
 885        /* post dividers above a certain value must be even */
 886        if (post_div > pd_even && post_div % 2)
 887                post_div += 1;
 888
 889        return post_div;
 890}
 891
 892/**
 893 * radeon_uvd_calc_upll_dividers - calc UPLL clock dividers
 894 *
 895 * @rdev: radeon_device pointer
 896 * @vclk: wanted VCLK
 897 * @dclk: wanted DCLK
 898 * @vco_min: minimum VCO frequency
 899 * @vco_max: maximum VCO frequency
 900 * @fb_factor: factor to multiply vco freq with
 901 * @fb_mask: limit and bitmask for feedback divider
 902 * @pd_min: post divider minimum
 903 * @pd_max: post divider maximum
 904 * @pd_even: post divider must be even above this value
 905 * @optimal_fb_div: resulting feedback divider
 906 * @optimal_vclk_div: resulting vclk post divider
 907 * @optimal_dclk_div: resulting dclk post divider
 908 *
 909 * Calculate dividers for UVDs UPLL (R6xx-SI, except APUs).
 910 * Returns zero on success -EINVAL on error.
 911 */
 912int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev,
 913                                  unsigned vclk, unsigned dclk,
 914                                  unsigned vco_min, unsigned vco_max,
 915                                  unsigned fb_factor, unsigned fb_mask,
 916                                  unsigned pd_min, unsigned pd_max,
 917                                  unsigned pd_even,
 918                                  unsigned *optimal_fb_div,
 919                                  unsigned *optimal_vclk_div,
 920                                  unsigned *optimal_dclk_div)
 921{
 922        unsigned vco_freq, ref_freq = rdev->clock.spll.reference_freq;
 923
 924        /* start off with something large */
 925        unsigned optimal_score = ~0;
 926
 927        /* loop through vco from low to high */
 928        vco_min = max(max(vco_min, vclk), dclk);
 929        for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) {
 930
 931                uint64_t fb_div = (uint64_t)vco_freq * fb_factor;
 932                unsigned vclk_div, dclk_div, score;
 933
 934                do_div(fb_div, ref_freq);
 935
 936                /* fb div out of range ? */
 937                if (fb_div > fb_mask)
 938                        break; /* it can oly get worse */
 939
 940                fb_div &= fb_mask;
 941
 942                /* calc vclk divider with current vco freq */
 943                vclk_div = radeon_uvd_calc_upll_post_div(vco_freq, vclk,
 944                                                         pd_min, pd_even);
 945                if (vclk_div > pd_max)
 946                        break; /* vco is too big, it has to stop */
 947
 948                /* calc dclk divider with current vco freq */
 949                dclk_div = radeon_uvd_calc_upll_post_div(vco_freq, dclk,
 950                                                         pd_min, pd_even);
 951                if (vclk_div > pd_max)
 952                        break; /* vco is too big, it has to stop */
 953
 954                /* calc score with current vco freq */
 955                score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div);
 956
 957                /* determine if this vco setting is better than current optimal settings */
 958                if (score < optimal_score) {
 959                        *optimal_fb_div = fb_div;
 960                        *optimal_vclk_div = vclk_div;
 961                        *optimal_dclk_div = dclk_div;
 962                        optimal_score = score;
 963                        if (optimal_score == 0)
 964                                break; /* it can't get better than this */
 965                }
 966        }
 967
 968        /* did we found a valid setup ? */
 969        if (optimal_score == ~0)
 970                return -EINVAL;
 971
 972        return 0;
 973}
 974
 975int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev,
 976                                unsigned cg_upll_func_cntl)
 977{
 978        unsigned i;
 979
 980        /* make sure UPLL_CTLREQ is deasserted */
 981        WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK);
 982
 983        mdelay(10);
 984
 985        /* assert UPLL_CTLREQ */
 986        WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
 987
 988        /* wait for CTLACK and CTLACK2 to get asserted */
 989        for (i = 0; i < 100; ++i) {
 990                uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
 991                if ((RREG32(cg_upll_func_cntl) & mask) == mask)
 992                        break;
 993                mdelay(10);
 994        }
 995
 996        /* deassert UPLL_CTLREQ */
 997        WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK);
 998
 999        if (i == 100) {
1000                DRM_ERROR("Timeout setting UVD clocks!\n");
1001                return -ETIMEDOUT;
1002        }
1003
1004        return 0;
1005}
1006