linux/drivers/staging/media/tegra-vde/tegra-vde.c
<<
>>
Prefs
   1/*
   2 * NVIDIA Tegra Video decoder driver
   3 *
   4 * Copyright (C) 2016-2017 Dmitry Osipenko <digetx@gmail.com>
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License
   8 * as published by the Free Software Foundation; either version
   9 * 2 of the License, or (at your option) any later version.
  10 */
  11
  12#include <linux/clk.h>
  13#include <linux/dma-buf.h>
  14#include <linux/genalloc.h>
  15#include <linux/interrupt.h>
  16#include <linux/iopoll.h>
  17#include <linux/miscdevice.h>
  18#include <linux/module.h>
  19#include <linux/of_device.h>
  20#include <linux/pm_runtime.h>
  21#include <linux/reset.h>
  22#include <linux/slab.h>
  23#include <linux/uaccess.h>
  24
  25#include <soc/tegra/pmc.h>
  26
  27#include "uapi.h"
  28
  29#define ICMDQUE_WR              0x00
  30#define CMDQUE_CONTROL          0x08
  31#define INTR_STATUS             0x18
  32#define BSE_INT_ENB             0x40
  33#define BSE_CONFIG              0x44
  34
  35#define BSE_ICMDQUE_EMPTY       BIT(3)
  36#define BSE_DMA_BUSY            BIT(23)
  37
  38#define VDE_WR(__data, __addr)                          \
  39do {                                                    \
  40        dev_dbg(vde->miscdev.parent,                    \
  41                "%s: %d: 0x%08X => " #__addr ")\n",     \
  42                __func__, __LINE__, (u32)(__data));     \
  43        writel_relaxed(__data, __addr);                 \
  44} while (0)
  45
  46struct video_frame {
  47        struct dma_buf_attachment *y_dmabuf_attachment;
  48        struct dma_buf_attachment *cb_dmabuf_attachment;
  49        struct dma_buf_attachment *cr_dmabuf_attachment;
  50        struct dma_buf_attachment *aux_dmabuf_attachment;
  51        struct sg_table *y_sgt;
  52        struct sg_table *cb_sgt;
  53        struct sg_table *cr_sgt;
  54        struct sg_table *aux_sgt;
  55        dma_addr_t y_addr;
  56        dma_addr_t cb_addr;
  57        dma_addr_t cr_addr;
  58        dma_addr_t aux_addr;
  59        u32 frame_num;
  60        u32 flags;
  61};
  62
  63struct tegra_vde {
  64        void __iomem *sxe;
  65        void __iomem *bsev;
  66        void __iomem *mbe;
  67        void __iomem *ppe;
  68        void __iomem *mce;
  69        void __iomem *tfe;
  70        void __iomem *ppb;
  71        void __iomem *vdma;
  72        void __iomem *frameid;
  73        struct mutex lock;
  74        struct miscdevice miscdev;
  75        struct reset_control *rst;
  76        struct reset_control *rst_mc;
  77        struct gen_pool *iram_pool;
  78        struct completion decode_completion;
  79        struct clk *clk;
  80        dma_addr_t iram_lists_addr;
  81        u32 *iram;
  82};
  83
  84static void tegra_vde_set_bits(struct tegra_vde *vde,
  85                               u32 mask, void __iomem *regs)
  86{
  87        u32 value = readl_relaxed(regs);
  88
  89        VDE_WR(value | mask, regs);
  90}
  91
  92static int tegra_vde_wait_mbe(struct tegra_vde *vde)
  93{
  94        u32 tmp;
  95
  96        return readl_relaxed_poll_timeout(vde->mbe + 0x8C, tmp,
  97                                          (tmp >= 0x10), 1, 100);
  98}
  99
 100static int tegra_vde_setup_mbe_frame_idx(struct tegra_vde *vde,
 101                                         unsigned int refs_nb,
 102                                         bool setup_refs)
 103{
 104        u32 frame_idx_enb_mask = 0;
 105        u32 value;
 106        unsigned int frame_idx;
 107        unsigned int idx;
 108        int err;
 109
 110        VDE_WR(0xD0000000 | (0 << 23), vde->mbe + 0x80);
 111        VDE_WR(0xD0200000 | (0 << 23), vde->mbe + 0x80);
 112
 113        err = tegra_vde_wait_mbe(vde);
 114        if (err)
 115                return err;
 116
 117        if (!setup_refs)
 118                return 0;
 119
 120        for (idx = 0, frame_idx = 1; idx < refs_nb; idx++, frame_idx++) {
 121                VDE_WR(0xD0000000 | (frame_idx << 23), vde->mbe + 0x80);
 122                VDE_WR(0xD0200000 | (frame_idx << 23), vde->mbe + 0x80);
 123
 124                frame_idx_enb_mask |= frame_idx << (6 * (idx % 4));
 125
 126                if (idx % 4 == 3 || idx == refs_nb - 1) {
 127                        value = 0xC0000000;
 128                        value |= (idx >> 2) << 24;
 129                        value |= frame_idx_enb_mask;
 130
 131                        VDE_WR(value, vde->mbe + 0x80);
 132
 133                        err = tegra_vde_wait_mbe(vde);
 134                        if (err)
 135                                return err;
 136
 137                        frame_idx_enb_mask = 0;
 138                }
 139        }
 140
 141        return 0;
 142}
 143
 144static void tegra_vde_mbe_set_0xa_reg(struct tegra_vde *vde, int reg, u32 val)
 145{
 146        VDE_WR(0xA0000000 | (reg << 24) | (val & 0xFFFF), vde->mbe + 0x80);
 147        VDE_WR(0xA0000000 | ((reg + 1) << 24) | (val >> 16), vde->mbe + 0x80);
 148}
 149
 150static int tegra_vde_wait_bsev(struct tegra_vde *vde, bool wait_dma)
 151{
 152        struct device *dev = vde->miscdev.parent;
 153        u32 value;
 154        int err;
 155
 156        err = readl_relaxed_poll_timeout(vde->bsev + INTR_STATUS, value,
 157                                         !(value & BIT(2)), 1, 100);
 158        if (err) {
 159                dev_err(dev, "BSEV unknown bit timeout\n");
 160                return err;
 161        }
 162
 163        err = readl_relaxed_poll_timeout(vde->bsev + INTR_STATUS, value,
 164                                         (value & BSE_ICMDQUE_EMPTY), 1, 100);
 165        if (err) {
 166                dev_err(dev, "BSEV ICMDQUE flush timeout\n");
 167                return err;
 168        }
 169
 170        if (!wait_dma)
 171                return 0;
 172
 173        err = readl_relaxed_poll_timeout(vde->bsev + INTR_STATUS, value,
 174                                         !(value & BSE_DMA_BUSY), 1, 100);
 175        if (err) {
 176                dev_err(dev, "BSEV DMA timeout\n");
 177                return err;
 178        }
 179
 180        return 0;
 181}
 182
 183static int tegra_vde_push_to_bsev_icmdqueue(struct tegra_vde *vde,
 184                                            u32 value, bool wait_dma)
 185{
 186        VDE_WR(value, vde->bsev + ICMDQUE_WR);
 187
 188        return tegra_vde_wait_bsev(vde, wait_dma);
 189}
 190
 191static void tegra_vde_setup_frameid(struct tegra_vde *vde,
 192                                    struct video_frame *frame,
 193                                    unsigned int frameid,
 194                                    u32 mbs_width, u32 mbs_height)
 195{
 196        u32 y_addr  = frame ? frame->y_addr  : 0x6CDEAD00;
 197        u32 cb_addr = frame ? frame->cb_addr : 0x6CDEAD00;
 198        u32 cr_addr = frame ? frame->cr_addr : 0x6CDEAD00;
 199        u32 value1 = frame ? ((mbs_width << 16) | mbs_height) : 0;
 200        u32 value2 = frame ? ((((mbs_width + 1) >> 1) << 6) | 1) : 0;
 201
 202        VDE_WR(y_addr  >> 8, vde->frameid + 0x000 + frameid * 4);
 203        VDE_WR(cb_addr >> 8, vde->frameid + 0x100 + frameid * 4);
 204        VDE_WR(cr_addr >> 8, vde->frameid + 0x180 + frameid * 4);
 205        VDE_WR(value1,       vde->frameid + 0x080 + frameid * 4);
 206        VDE_WR(value2,       vde->frameid + 0x280 + frameid * 4);
 207}
 208
 209static void tegra_setup_frameidx(struct tegra_vde *vde,
 210                                 struct video_frame *frames,
 211                                 unsigned int frames_nb,
 212                                 u32 mbs_width, u32 mbs_height)
 213{
 214        unsigned int idx;
 215
 216        for (idx = 0; idx < frames_nb; idx++)
 217                tegra_vde_setup_frameid(vde, &frames[idx], idx,
 218                                        mbs_width, mbs_height);
 219
 220        for (; idx < 17; idx++)
 221                tegra_vde_setup_frameid(vde, NULL, idx, 0, 0);
 222}
 223
 224static void tegra_vde_setup_iram_entry(struct tegra_vde *vde,
 225                                       unsigned int table,
 226                                       unsigned int row,
 227                                       u32 value1, u32 value2)
 228{
 229        u32 *iram_tables = vde->iram;
 230
 231        dev_dbg(vde->miscdev.parent, "IRAM table %u: row %u: 0x%08X 0x%08X\n",
 232                table, row, value1, value2);
 233
 234        iram_tables[0x20 * table + row * 2] = value1;
 235        iram_tables[0x20 * table + row * 2 + 1] = value2;
 236}
 237
 238static void tegra_vde_setup_iram_tables(struct tegra_vde *vde,
 239                                        struct video_frame *dpb_frames,
 240                                        unsigned int ref_frames_nb,
 241                                        unsigned int with_earlier_poc_nb)
 242{
 243        struct video_frame *frame;
 244        u32 value, aux_addr;
 245        int with_later_poc_nb;
 246        unsigned int i, k;
 247
 248        dev_dbg(vde->miscdev.parent, "DPB: Frame 0: frame_num = %d\n",
 249                dpb_frames[0].frame_num);
 250
 251        dev_dbg(vde->miscdev.parent, "REF L0:\n");
 252
 253        for (i = 0; i < 16; i++) {
 254                if (i < ref_frames_nb) {
 255                        frame = &dpb_frames[i + 1];
 256
 257                        aux_addr = frame->aux_addr;
 258
 259                        value  = (i + 1) << 26;
 260                        value |= !(frame->flags & FLAG_B_FRAME) << 25;
 261                        value |= 1 << 24;
 262                        value |= frame->frame_num;
 263
 264                        dev_dbg(vde->miscdev.parent,
 265                                "\tFrame %d: frame_num = %d B_frame = %d\n",
 266                                i + 1, frame->frame_num,
 267                                (frame->flags & FLAG_B_FRAME));
 268                } else {
 269                        aux_addr = 0x6ADEAD00;
 270                        value = 0;
 271                }
 272
 273                tegra_vde_setup_iram_entry(vde, 0, i, value, aux_addr);
 274                tegra_vde_setup_iram_entry(vde, 1, i, value, aux_addr);
 275                tegra_vde_setup_iram_entry(vde, 2, i, value, aux_addr);
 276                tegra_vde_setup_iram_entry(vde, 3, i, value, aux_addr);
 277        }
 278
 279        if (!(dpb_frames[0].flags & FLAG_B_FRAME))
 280                return;
 281
 282        if (with_earlier_poc_nb >= ref_frames_nb)
 283                return;
 284
 285        with_later_poc_nb = ref_frames_nb - with_earlier_poc_nb;
 286
 287        dev_dbg(vde->miscdev.parent,
 288                "REF L1: with_later_poc_nb %d with_earlier_poc_nb %d\n",
 289                 with_later_poc_nb, with_earlier_poc_nb);
 290
 291        for (i = 0, k = with_earlier_poc_nb; i < with_later_poc_nb; i++, k++) {
 292                frame = &dpb_frames[k + 1];
 293
 294                aux_addr = frame->aux_addr;
 295
 296                value  = (k + 1) << 26;
 297                value |= !(frame->flags & FLAG_B_FRAME) << 25;
 298                value |= 1 << 24;
 299                value |= frame->frame_num;
 300
 301                dev_dbg(vde->miscdev.parent,
 302                        "\tFrame %d: frame_num = %d\n",
 303                        k + 1, frame->frame_num);
 304
 305                tegra_vde_setup_iram_entry(vde, 2, i, value, aux_addr);
 306        }
 307
 308        for (k = 0; i < ref_frames_nb; i++, k++) {
 309                frame = &dpb_frames[k + 1];
 310
 311                aux_addr = frame->aux_addr;
 312
 313                value  = (k + 1) << 26;
 314                value |= !(frame->flags & FLAG_B_FRAME) << 25;
 315                value |= 1 << 24;
 316                value |= frame->frame_num;
 317
 318                dev_dbg(vde->miscdev.parent,
 319                        "\tFrame %d: frame_num = %d\n",
 320                        k + 1, frame->frame_num);
 321
 322                tegra_vde_setup_iram_entry(vde, 2, i, value, aux_addr);
 323        }
 324}
 325
 326static int tegra_vde_setup_hw_context(struct tegra_vde *vde,
 327                                      struct tegra_vde_h264_decoder_ctx *ctx,
 328                                      struct video_frame *dpb_frames,
 329                                      dma_addr_t bitstream_data_addr,
 330                                      size_t bitstream_data_size,
 331                                      unsigned int macroblocks_nb)
 332{
 333        struct device *dev = vde->miscdev.parent;
 334        u32 value;
 335        int err;
 336
 337        tegra_vde_set_bits(vde, 0x000A, vde->sxe + 0xF0);
 338        tegra_vde_set_bits(vde, 0x000B, vde->bsev + CMDQUE_CONTROL);
 339        tegra_vde_set_bits(vde, 0x8002, vde->mbe + 0x50);
 340        tegra_vde_set_bits(vde, 0x000A, vde->mbe + 0xA0);
 341        tegra_vde_set_bits(vde, 0x000A, vde->ppe + 0x14);
 342        tegra_vde_set_bits(vde, 0x000A, vde->ppe + 0x28);
 343        tegra_vde_set_bits(vde, 0x0A00, vde->mce + 0x08);
 344        tegra_vde_set_bits(vde, 0x000A, vde->tfe + 0x00);
 345        tegra_vde_set_bits(vde, 0x0005, vde->vdma + 0x04);
 346
 347        VDE_WR(0x00000000, vde->vdma + 0x1C);
 348        VDE_WR(0x00000000, vde->vdma + 0x00);
 349        VDE_WR(0x00000007, vde->vdma + 0x04);
 350        VDE_WR(0x00000007, vde->frameid + 0x200);
 351        VDE_WR(0x00000005, vde->tfe + 0x04);
 352        VDE_WR(0x00000000, vde->mbe + 0x84);
 353        VDE_WR(0x00000010, vde->sxe + 0x08);
 354        VDE_WR(0x00000150, vde->sxe + 0x54);
 355        VDE_WR(0x0000054C, vde->sxe + 0x58);
 356        VDE_WR(0x00000E34, vde->sxe + 0x5C);
 357        VDE_WR(0x063C063C, vde->mce + 0x10);
 358        VDE_WR(0x0003FC00, vde->bsev + INTR_STATUS);
 359        VDE_WR(0x0000150D, vde->bsev + BSE_CONFIG);
 360        VDE_WR(0x00000100, vde->bsev + BSE_INT_ENB);
 361        VDE_WR(0x00000000, vde->bsev + 0x98);
 362        VDE_WR(0x00000060, vde->bsev + 0x9C);
 363
 364        memset(vde->iram + 128, 0, macroblocks_nb / 2);
 365
 366        tegra_setup_frameidx(vde, dpb_frames, ctx->dpb_frames_nb,
 367                             ctx->pic_width_in_mbs, ctx->pic_height_in_mbs);
 368
 369        tegra_vde_setup_iram_tables(vde, dpb_frames,
 370                                    ctx->dpb_frames_nb - 1,
 371                                    ctx->dpb_ref_frames_with_earlier_poc_nb);
 372
 373        /*
 374         * The IRAM mapping is write-combine, ensure that CPU buffers have
 375         * been flushed at this point.
 376         */
 377        wmb();
 378
 379        VDE_WR(0x00000000, vde->bsev + 0x8C);
 380        VDE_WR(bitstream_data_addr + bitstream_data_size,
 381               vde->bsev + 0x54);
 382
 383        value = ctx->pic_width_in_mbs << 11 | ctx->pic_height_in_mbs << 3;
 384
 385        VDE_WR(value, vde->bsev + 0x88);
 386
 387        err = tegra_vde_wait_bsev(vde, false);
 388        if (err)
 389                return err;
 390
 391        err = tegra_vde_push_to_bsev_icmdqueue(vde, 0x800003FC, false);
 392        if (err)
 393                return err;
 394
 395        value = 0x01500000;
 396        value |= ((vde->iram_lists_addr + 512) >> 2) & 0xFFFF;
 397
 398        err = tegra_vde_push_to_bsev_icmdqueue(vde, value, true);
 399        if (err)
 400                return err;
 401
 402        err = tegra_vde_push_to_bsev_icmdqueue(vde, 0x840F054C, false);
 403        if (err)
 404                return err;
 405
 406        err = tegra_vde_push_to_bsev_icmdqueue(vde, 0x80000080, false);
 407        if (err)
 408                return err;
 409
 410        value = 0x0E340000 | ((vde->iram_lists_addr >> 2) & 0xFFFF);
 411
 412        err = tegra_vde_push_to_bsev_icmdqueue(vde, value, true);
 413        if (err)
 414                return err;
 415
 416        value = 0x00800005;
 417        value |= ctx->pic_width_in_mbs << 11;
 418        value |= ctx->pic_height_in_mbs << 3;
 419
 420        VDE_WR(value, vde->sxe + 0x10);
 421
 422        value = !ctx->baseline_profile << 17;
 423        value |= ctx->level_idc << 13;
 424        value |= ctx->log2_max_pic_order_cnt_lsb << 7;
 425        value |= ctx->pic_order_cnt_type << 5;
 426        value |= ctx->log2_max_frame_num;
 427
 428        VDE_WR(value, vde->sxe + 0x40);
 429
 430        value = ctx->pic_init_qp << 25;
 431        value |= !!(ctx->deblocking_filter_control_present_flag) << 2;
 432        value |= !!ctx->pic_order_present_flag;
 433
 434        VDE_WR(value, vde->sxe + 0x44);
 435
 436        value = ctx->chroma_qp_index_offset;
 437        value |= ctx->num_ref_idx_l0_active_minus1 << 5;
 438        value |= ctx->num_ref_idx_l1_active_minus1 << 10;
 439        value |= !!ctx->constrained_intra_pred_flag << 15;
 440
 441        VDE_WR(value, vde->sxe + 0x48);
 442
 443        value = 0x0C000000;
 444        value |= !!(dpb_frames[0].flags & FLAG_B_FRAME) << 24;
 445
 446        VDE_WR(value, vde->sxe + 0x4C);
 447
 448        value = 0x03800000;
 449        value |= bitstream_data_size & GENMASK(19, 15);
 450
 451        VDE_WR(value, vde->sxe + 0x68);
 452
 453        VDE_WR(bitstream_data_addr, vde->sxe + 0x6C);
 454
 455        value = 0x10000005;
 456        value |= ctx->pic_width_in_mbs << 11;
 457        value |= ctx->pic_height_in_mbs << 3;
 458
 459        VDE_WR(value, vde->mbe + 0x80);
 460
 461        value = 0x26800000;
 462        value |= ctx->level_idc << 4;
 463        value |= !ctx->baseline_profile << 1;
 464        value |= !!ctx->direct_8x8_inference_flag;
 465
 466        VDE_WR(value, vde->mbe + 0x80);
 467
 468        VDE_WR(0xF4000001, vde->mbe + 0x80);
 469        VDE_WR(0x20000000, vde->mbe + 0x80);
 470        VDE_WR(0xF4000101, vde->mbe + 0x80);
 471
 472        value = 0x20000000;
 473        value |= ctx->chroma_qp_index_offset << 8;
 474
 475        VDE_WR(value, vde->mbe + 0x80);
 476
 477        err = tegra_vde_setup_mbe_frame_idx(vde,
 478                                            ctx->dpb_frames_nb - 1,
 479                                            ctx->pic_order_cnt_type == 0);
 480        if (err) {
 481                dev_err(dev, "MBE frames setup failed %d\n", err);
 482                return err;
 483        }
 484
 485        tegra_vde_mbe_set_0xa_reg(vde, 0, 0x000009FC);
 486        tegra_vde_mbe_set_0xa_reg(vde, 2, 0x61DEAD00);
 487        tegra_vde_mbe_set_0xa_reg(vde, 4, 0x62DEAD00);
 488        tegra_vde_mbe_set_0xa_reg(vde, 6, 0x63DEAD00);
 489        tegra_vde_mbe_set_0xa_reg(vde, 8, dpb_frames[0].aux_addr);
 490
 491        value = 0xFC000000;
 492        value |= !!(dpb_frames[0].flags & FLAG_B_FRAME) << 2;
 493
 494        if (!ctx->baseline_profile)
 495                value |= !!(dpb_frames[0].flags & FLAG_REFERENCE) << 1;
 496
 497        VDE_WR(value, vde->mbe + 0x80);
 498
 499        err = tegra_vde_wait_mbe(vde);
 500        if (err) {
 501                dev_err(dev, "MBE programming failed %d\n", err);
 502                return err;
 503        }
 504
 505        return 0;
 506}
 507
 508static void tegra_vde_decode_frame(struct tegra_vde *vde,
 509                                   unsigned int macroblocks_nb)
 510{
 511        reinit_completion(&vde->decode_completion);
 512
 513        VDE_WR(0x00000001, vde->bsev + 0x8C);
 514        VDE_WR(0x20000000 | (macroblocks_nb - 1), vde->sxe + 0x00);
 515}
 516
 517static void tegra_vde_detach_and_put_dmabuf(struct dma_buf_attachment *a,
 518                                            struct sg_table *sgt,
 519                                            enum dma_data_direction dma_dir)
 520{
 521        struct dma_buf *dmabuf = a->dmabuf;
 522
 523        dma_buf_unmap_attachment(a, sgt, dma_dir);
 524        dma_buf_detach(dmabuf, a);
 525        dma_buf_put(dmabuf);
 526}
 527
 528static int tegra_vde_attach_dmabuf(struct device *dev,
 529                                   int fd,
 530                                   unsigned long offset,
 531                                   size_t min_size,
 532                                   size_t align_size,
 533                                   struct dma_buf_attachment **a,
 534                                   dma_addr_t *addr,
 535                                   struct sg_table **s,
 536                                   size_t *size,
 537                                   enum dma_data_direction dma_dir)
 538{
 539        struct dma_buf_attachment *attachment;
 540        struct dma_buf *dmabuf;
 541        struct sg_table *sgt;
 542        int err;
 543
 544        dmabuf = dma_buf_get(fd);
 545        if (IS_ERR(dmabuf)) {
 546                dev_err(dev, "Invalid dmabuf FD\n");
 547                return PTR_ERR(dmabuf);
 548        }
 549
 550        if (dmabuf->size & (align_size - 1)) {
 551                dev_err(dev, "Unaligned dmabuf 0x%zX, should be aligned to 0x%zX\n",
 552                        dmabuf->size, align_size);
 553                return -EINVAL;
 554        }
 555
 556        if ((u64)offset + min_size > dmabuf->size) {
 557                dev_err(dev, "Too small dmabuf size %zu @0x%lX, should be at least %zu\n",
 558                        dmabuf->size, offset, min_size);
 559                return -EINVAL;
 560        }
 561
 562        attachment = dma_buf_attach(dmabuf, dev);
 563        if (IS_ERR(attachment)) {
 564                dev_err(dev, "Failed to attach dmabuf\n");
 565                err = PTR_ERR(attachment);
 566                goto err_put;
 567        }
 568
 569        sgt = dma_buf_map_attachment(attachment, dma_dir);
 570        if (IS_ERR(sgt)) {
 571                dev_err(dev, "Failed to get dmabufs sg_table\n");
 572                err = PTR_ERR(sgt);
 573                goto err_detach;
 574        }
 575
 576        if (sgt->nents != 1) {
 577                dev_err(dev, "Sparse DMA region is unsupported\n");
 578                err = -EINVAL;
 579                goto err_unmap;
 580        }
 581
 582        *addr = sg_dma_address(sgt->sgl) + offset;
 583        *a = attachment;
 584        *s = sgt;
 585
 586        if (size)
 587                *size = dmabuf->size - offset;
 588
 589        return 0;
 590
 591err_unmap:
 592        dma_buf_unmap_attachment(attachment, sgt, dma_dir);
 593err_detach:
 594        dma_buf_detach(dmabuf, attachment);
 595err_put:
 596        dma_buf_put(dmabuf);
 597
 598        return err;
 599}
 600
 601static int tegra_vde_attach_dmabufs_to_frame(struct device *dev,
 602                                             struct video_frame *frame,
 603                                             struct tegra_vde_h264_frame *src,
 604                                             enum dma_data_direction dma_dir,
 605                                             bool baseline_profile,
 606                                             size_t lsize, size_t csize)
 607{
 608        int err;
 609
 610        err = tegra_vde_attach_dmabuf(dev, src->y_fd,
 611                                      src->y_offset, lsize, SZ_256,
 612                                      &frame->y_dmabuf_attachment,
 613                                      &frame->y_addr,
 614                                      &frame->y_sgt,
 615                                      NULL, dma_dir);
 616        if (err)
 617                return err;
 618
 619        err = tegra_vde_attach_dmabuf(dev, src->cb_fd,
 620                                      src->cb_offset, csize, SZ_256,
 621                                      &frame->cb_dmabuf_attachment,
 622                                      &frame->cb_addr,
 623                                      &frame->cb_sgt,
 624                                      NULL, dma_dir);
 625        if (err)
 626                goto err_release_y;
 627
 628        err = tegra_vde_attach_dmabuf(dev, src->cr_fd,
 629                                      src->cr_offset, csize, SZ_256,
 630                                      &frame->cr_dmabuf_attachment,
 631                                      &frame->cr_addr,
 632                                      &frame->cr_sgt,
 633                                      NULL, dma_dir);
 634        if (err)
 635                goto err_release_cb;
 636
 637        if (baseline_profile) {
 638                frame->aux_addr = 0x64DEAD00;
 639                return 0;
 640        }
 641
 642        err = tegra_vde_attach_dmabuf(dev, src->aux_fd,
 643                                      src->aux_offset, csize, SZ_256,
 644                                      &frame->aux_dmabuf_attachment,
 645                                      &frame->aux_addr,
 646                                      &frame->aux_sgt,
 647                                      NULL, dma_dir);
 648        if (err)
 649                goto err_release_cr;
 650
 651        return 0;
 652
 653err_release_cr:
 654        tegra_vde_detach_and_put_dmabuf(frame->cr_dmabuf_attachment,
 655                                        frame->cr_sgt, dma_dir);
 656err_release_cb:
 657        tegra_vde_detach_and_put_dmabuf(frame->cb_dmabuf_attachment,
 658                                        frame->cb_sgt, dma_dir);
 659err_release_y:
 660        tegra_vde_detach_and_put_dmabuf(frame->y_dmabuf_attachment,
 661                                        frame->y_sgt, dma_dir);
 662
 663        return err;
 664}
 665
 666static void tegra_vde_release_frame_dmabufs(struct video_frame *frame,
 667                                            enum dma_data_direction dma_dir,
 668                                            bool baseline_profile)
 669{
 670        if (!baseline_profile)
 671                tegra_vde_detach_and_put_dmabuf(frame->aux_dmabuf_attachment,
 672                                                frame->aux_sgt, dma_dir);
 673
 674        tegra_vde_detach_and_put_dmabuf(frame->cr_dmabuf_attachment,
 675                                        frame->cr_sgt, dma_dir);
 676
 677        tegra_vde_detach_and_put_dmabuf(frame->cb_dmabuf_attachment,
 678                                        frame->cb_sgt, dma_dir);
 679
 680        tegra_vde_detach_and_put_dmabuf(frame->y_dmabuf_attachment,
 681                                        frame->y_sgt, dma_dir);
 682}
 683
 684static int tegra_vde_validate_frame(struct device *dev,
 685                                    struct tegra_vde_h264_frame *frame)
 686{
 687        if (frame->frame_num > 0x7FFFFF) {
 688                dev_err(dev, "Bad frame_num %u\n", frame->frame_num);
 689                return -EINVAL;
 690        }
 691
 692        return 0;
 693}
 694
 695static int tegra_vde_validate_h264_ctx(struct device *dev,
 696                                       struct tegra_vde_h264_decoder_ctx *ctx)
 697{
 698        if (ctx->dpb_frames_nb == 0 || ctx->dpb_frames_nb > 17) {
 699                dev_err(dev, "Bad DPB size %u\n", ctx->dpb_frames_nb);
 700                return -EINVAL;
 701        }
 702
 703        if (ctx->level_idc > 15) {
 704                dev_err(dev, "Bad level value %u\n", ctx->level_idc);
 705                return -EINVAL;
 706        }
 707
 708        if (ctx->pic_init_qp > 52) {
 709                dev_err(dev, "Bad pic_init_qp value %u\n", ctx->pic_init_qp);
 710                return -EINVAL;
 711        }
 712
 713        if (ctx->log2_max_pic_order_cnt_lsb > 16) {
 714                dev_err(dev, "Bad log2_max_pic_order_cnt_lsb value %u\n",
 715                        ctx->log2_max_pic_order_cnt_lsb);
 716                return -EINVAL;
 717        }
 718
 719        if (ctx->log2_max_frame_num > 16) {
 720                dev_err(dev, "Bad log2_max_frame_num value %u\n",
 721                        ctx->log2_max_frame_num);
 722                return -EINVAL;
 723        }
 724
 725        if (ctx->chroma_qp_index_offset > 31) {
 726                dev_err(dev, "Bad chroma_qp_index_offset value %u\n",
 727                        ctx->chroma_qp_index_offset);
 728                return -EINVAL;
 729        }
 730
 731        if (ctx->pic_order_cnt_type > 2) {
 732                dev_err(dev, "Bad pic_order_cnt_type value %u\n",
 733                        ctx->pic_order_cnt_type);
 734                return -EINVAL;
 735        }
 736
 737        if (ctx->num_ref_idx_l0_active_minus1 > 15) {
 738                dev_err(dev, "Bad num_ref_idx_l0_active_minus1 value %u\n",
 739                        ctx->num_ref_idx_l0_active_minus1);
 740                return -EINVAL;
 741        }
 742
 743        if (ctx->num_ref_idx_l1_active_minus1 > 15) {
 744                dev_err(dev, "Bad num_ref_idx_l1_active_minus1 value %u\n",
 745                        ctx->num_ref_idx_l1_active_minus1);
 746                return -EINVAL;
 747        }
 748
 749        if (!ctx->pic_width_in_mbs || ctx->pic_width_in_mbs > 127) {
 750                dev_err(dev, "Bad pic_width_in_mbs value %u\n",
 751                        ctx->pic_width_in_mbs);
 752                return -EINVAL;
 753        }
 754
 755        if (!ctx->pic_height_in_mbs || ctx->pic_height_in_mbs > 127) {
 756                dev_err(dev, "Bad pic_height_in_mbs value %u\n",
 757                        ctx->pic_height_in_mbs);
 758                return -EINVAL;
 759        }
 760
 761        return 0;
 762}
 763
 764static int tegra_vde_ioctl_decode_h264(struct tegra_vde *vde,
 765                                       unsigned long vaddr)
 766{
 767        struct device *dev = vde->miscdev.parent;
 768        struct tegra_vde_h264_decoder_ctx ctx;
 769        struct tegra_vde_h264_frame frames[17];
 770        struct tegra_vde_h264_frame __user *frames_user;
 771        struct video_frame *dpb_frames;
 772        struct dma_buf_attachment *bitstream_data_dmabuf_attachment;
 773        struct sg_table *bitstream_sgt;
 774        enum dma_data_direction dma_dir;
 775        dma_addr_t bitstream_data_addr;
 776        dma_addr_t bsev_ptr;
 777        size_t lsize, csize;
 778        size_t bitstream_data_size;
 779        unsigned int macroblocks_nb;
 780        unsigned int read_bytes;
 781        unsigned int cstride;
 782        unsigned int i;
 783        long timeout;
 784        int ret, err;
 785
 786        if (copy_from_user(&ctx, (void __user *)vaddr, sizeof(ctx)))
 787                return -EFAULT;
 788
 789        ret = tegra_vde_validate_h264_ctx(dev, &ctx);
 790        if (ret)
 791                return ret;
 792
 793        ret = tegra_vde_attach_dmabuf(dev, ctx.bitstream_data_fd,
 794                                      ctx.bitstream_data_offset,
 795                                      SZ_16K, SZ_16K,
 796                                      &bitstream_data_dmabuf_attachment,
 797                                      &bitstream_data_addr,
 798                                      &bitstream_sgt,
 799                                      &bitstream_data_size,
 800                                      DMA_TO_DEVICE);
 801        if (ret)
 802                return ret;
 803
 804        dpb_frames = kcalloc(ctx.dpb_frames_nb, sizeof(*dpb_frames),
 805                             GFP_KERNEL);
 806        if (!dpb_frames) {
 807                ret = -ENOMEM;
 808                goto release_bitstream_dmabuf;
 809        }
 810
 811        macroblocks_nb = ctx.pic_width_in_mbs * ctx.pic_height_in_mbs;
 812        frames_user = u64_to_user_ptr(ctx.dpb_frames_ptr);
 813
 814        if (copy_from_user(frames, frames_user,
 815                           ctx.dpb_frames_nb * sizeof(*frames))) {
 816                ret = -EFAULT;
 817                goto free_dpb_frames;
 818        }
 819
 820        cstride = ALIGN(ctx.pic_width_in_mbs * 8, 16);
 821        csize = cstride * ctx.pic_height_in_mbs * 8;
 822        lsize = macroblocks_nb * 256;
 823
 824        for (i = 0; i < ctx.dpb_frames_nb; i++) {
 825                ret = tegra_vde_validate_frame(dev, &frames[i]);
 826                if (ret)
 827                        goto release_dpb_frames;
 828
 829                dpb_frames[i].flags = frames[i].flags;
 830                dpb_frames[i].frame_num = frames[i].frame_num;
 831
 832                dma_dir = (i == 0) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
 833
 834                ret = tegra_vde_attach_dmabufs_to_frame(dev, &dpb_frames[i],
 835                                                        &frames[i], dma_dir,
 836                                                        ctx.baseline_profile,
 837                                                        lsize, csize);
 838                if (ret)
 839                        goto release_dpb_frames;
 840        }
 841
 842        ret = mutex_lock_interruptible(&vde->lock);
 843        if (ret)
 844                goto release_dpb_frames;
 845
 846        ret = pm_runtime_get_sync(dev);
 847        if (ret < 0)
 848                goto unlock;
 849
 850        /*
 851         * We rely on the VDE registers reset value, otherwise VDE
 852         * causes bus lockup.
 853         */
 854        ret = reset_control_assert(vde->rst_mc);
 855        if (ret) {
 856                dev_err(dev, "DEC start: Failed to assert MC reset: %d\n",
 857                        ret);
 858                goto put_runtime_pm;
 859        }
 860
 861        ret = reset_control_reset(vde->rst);
 862        if (ret) {
 863                dev_err(dev, "DEC start: Failed to reset HW: %d\n", ret);
 864                goto put_runtime_pm;
 865        }
 866
 867        ret = reset_control_deassert(vde->rst_mc);
 868        if (ret) {
 869                dev_err(dev, "DEC start: Failed to deassert MC reset: %d\n",
 870                        ret);
 871                goto put_runtime_pm;
 872        }
 873
 874        ret = tegra_vde_setup_hw_context(vde, &ctx, dpb_frames,
 875                                         bitstream_data_addr,
 876                                         bitstream_data_size,
 877                                         macroblocks_nb);
 878        if (ret)
 879                goto put_runtime_pm;
 880
 881        tegra_vde_decode_frame(vde, macroblocks_nb);
 882
 883        timeout = wait_for_completion_interruptible_timeout(
 884                        &vde->decode_completion, msecs_to_jiffies(1000));
 885        if (timeout == 0) {
 886                bsev_ptr = readl_relaxed(vde->bsev + 0x10);
 887                macroblocks_nb = readl_relaxed(vde->sxe + 0xC8) & 0x1FFF;
 888                read_bytes = bsev_ptr ? bsev_ptr - bitstream_data_addr : 0;
 889
 890                dev_err(dev, "Decoding failed: read 0x%X bytes, %u macroblocks parsed\n",
 891                        read_bytes, macroblocks_nb);
 892
 893                ret = -EIO;
 894        } else if (timeout < 0) {
 895                ret = timeout;
 896        }
 897
 898        /*
 899         * At first reset memory client to avoid resetting VDE HW in the
 900         * middle of DMA which could result into memory corruption or hang
 901         * the whole system.
 902         */
 903        err = reset_control_assert(vde->rst_mc);
 904        if (err)
 905                dev_err(dev, "DEC end: Failed to assert MC reset: %d\n", err);
 906
 907        err = reset_control_assert(vde->rst);
 908        if (err)
 909                dev_err(dev, "DEC end: Failed to assert HW reset: %d\n", err);
 910
 911put_runtime_pm:
 912        pm_runtime_mark_last_busy(dev);
 913        pm_runtime_put_autosuspend(dev);
 914
 915unlock:
 916        mutex_unlock(&vde->lock);
 917
 918release_dpb_frames:
 919        while (i--) {
 920                dma_dir = (i == 0) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
 921
 922                tegra_vde_release_frame_dmabufs(&dpb_frames[i], dma_dir,
 923                                                ctx.baseline_profile);
 924        }
 925
 926free_dpb_frames:
 927        kfree(dpb_frames);
 928
 929release_bitstream_dmabuf:
 930        tegra_vde_detach_and_put_dmabuf(bitstream_data_dmabuf_attachment,
 931                                        bitstream_sgt, DMA_TO_DEVICE);
 932
 933        return ret;
 934}
 935
 936static long tegra_vde_unlocked_ioctl(struct file *filp,
 937                                     unsigned int cmd, unsigned long arg)
 938{
 939        struct miscdevice *miscdev = filp->private_data;
 940        struct tegra_vde *vde = container_of(miscdev, struct tegra_vde,
 941                                             miscdev);
 942
 943        switch (cmd) {
 944        case TEGRA_VDE_IOCTL_DECODE_H264:
 945                return tegra_vde_ioctl_decode_h264(vde, arg);
 946        }
 947
 948        dev_err(miscdev->parent, "Invalid IOCTL command %u\n", cmd);
 949
 950        return -ENOTTY;
 951}
 952
 953static const struct file_operations tegra_vde_fops = {
 954        .owner          = THIS_MODULE,
 955        .unlocked_ioctl = tegra_vde_unlocked_ioctl,
 956};
 957
 958static irqreturn_t tegra_vde_isr(int irq, void *data)
 959{
 960        struct tegra_vde *vde = data;
 961
 962        if (completion_done(&vde->decode_completion))
 963                return IRQ_NONE;
 964
 965        tegra_vde_set_bits(vde, 0, vde->frameid + 0x208);
 966        complete(&vde->decode_completion);
 967
 968        return IRQ_HANDLED;
 969}
 970
 971static int tegra_vde_runtime_suspend(struct device *dev)
 972{
 973        struct tegra_vde *vde = dev_get_drvdata(dev);
 974        int err;
 975
 976        err = tegra_powergate_power_off(TEGRA_POWERGATE_VDEC);
 977        if (err) {
 978                dev_err(dev, "Failed to power down HW: %d\n", err);
 979                return err;
 980        }
 981
 982        clk_disable_unprepare(vde->clk);
 983
 984        return 0;
 985}
 986
 987static int tegra_vde_runtime_resume(struct device *dev)
 988{
 989        struct tegra_vde *vde = dev_get_drvdata(dev);
 990        int err;
 991
 992        err = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_VDEC,
 993                                                vde->clk, vde->rst);
 994        if (err) {
 995                dev_err(dev, "Failed to power up HW : %d\n", err);
 996                return err;
 997        }
 998
 999        return 0;
1000}
1001
1002static int tegra_vde_probe(struct platform_device *pdev)
1003{
1004        struct device *dev = &pdev->dev;
1005        struct resource *regs;
1006        struct tegra_vde *vde;
1007        int irq, err;
1008
1009        vde = devm_kzalloc(dev, sizeof(*vde), GFP_KERNEL);
1010        if (!vde)
1011                return -ENOMEM;
1012
1013        platform_set_drvdata(pdev, vde);
1014
1015        regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "sxe");
1016        if (!regs)
1017                return -ENODEV;
1018
1019        vde->sxe = devm_ioremap_resource(dev, regs);
1020        if (IS_ERR(vde->sxe))
1021                return PTR_ERR(vde->sxe);
1022
1023        regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "bsev");
1024        if (!regs)
1025                return -ENODEV;
1026
1027        vde->bsev = devm_ioremap_resource(dev, regs);
1028        if (IS_ERR(vde->bsev))
1029                return PTR_ERR(vde->bsev);
1030
1031        regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "mbe");
1032        if (!regs)
1033                return -ENODEV;
1034
1035        vde->mbe = devm_ioremap_resource(dev, regs);
1036        if (IS_ERR(vde->mbe))
1037                return PTR_ERR(vde->mbe);
1038
1039        regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ppe");
1040        if (!regs)
1041                return -ENODEV;
1042
1043        vde->ppe = devm_ioremap_resource(dev, regs);
1044        if (IS_ERR(vde->ppe))
1045                return PTR_ERR(vde->ppe);
1046
1047        regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "mce");
1048        if (!regs)
1049                return -ENODEV;
1050
1051        vde->mce = devm_ioremap_resource(dev, regs);
1052        if (IS_ERR(vde->mce))
1053                return PTR_ERR(vde->mce);
1054
1055        regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "tfe");
1056        if (!regs)
1057                return -ENODEV;
1058
1059        vde->tfe = devm_ioremap_resource(dev, regs);
1060        if (IS_ERR(vde->tfe))
1061                return PTR_ERR(vde->tfe);
1062
1063        regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ppb");
1064        if (!regs)
1065                return -ENODEV;
1066
1067        vde->ppb = devm_ioremap_resource(dev, regs);
1068        if (IS_ERR(vde->ppb))
1069                return PTR_ERR(vde->ppb);
1070
1071        regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "vdma");
1072        if (!regs)
1073                return -ENODEV;
1074
1075        vde->vdma = devm_ioremap_resource(dev, regs);
1076        if (IS_ERR(vde->vdma))
1077                return PTR_ERR(vde->vdma);
1078
1079        regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "frameid");
1080        if (!regs)
1081                return -ENODEV;
1082
1083        vde->frameid = devm_ioremap_resource(dev, regs);
1084        if (IS_ERR(vde->frameid))
1085                return PTR_ERR(vde->frameid);
1086
1087        vde->clk = devm_clk_get(dev, NULL);
1088        if (IS_ERR(vde->clk)) {
1089                err = PTR_ERR(vde->clk);
1090                dev_err(dev, "Could not get VDE clk %d\n", err);
1091                return err;
1092        }
1093
1094        vde->rst = devm_reset_control_get(dev, NULL);
1095        if (IS_ERR(vde->rst)) {
1096                err = PTR_ERR(vde->rst);
1097                dev_err(dev, "Could not get VDE reset %d\n", err);
1098                return err;
1099        }
1100
1101        vde->rst_mc = devm_reset_control_get_optional(dev, "mc");
1102        if (IS_ERR(vde->rst_mc)) {
1103                err = PTR_ERR(vde->rst_mc);
1104                dev_err(dev, "Could not get MC reset %d\n", err);
1105                return err;
1106        }
1107
1108        irq = platform_get_irq_byname(pdev, "sync-token");
1109        if (irq < 0)
1110                return irq;
1111
1112        err = devm_request_irq(dev, irq, tegra_vde_isr, 0,
1113                               dev_name(dev), vde);
1114        if (err) {
1115                dev_err(dev, "Could not request IRQ %d\n", err);
1116                return err;
1117        }
1118
1119        vde->iram_pool = of_gen_pool_get(dev->of_node, "iram", 0);
1120        if (!vde->iram_pool) {
1121                dev_err(dev, "Could not get IRAM pool\n");
1122                return -EPROBE_DEFER;
1123        }
1124
1125        vde->iram = gen_pool_dma_alloc(vde->iram_pool,
1126                                       gen_pool_size(vde->iram_pool),
1127                                       &vde->iram_lists_addr);
1128        if (!vde->iram) {
1129                dev_err(dev, "Could not reserve IRAM\n");
1130                return -ENOMEM;
1131        }
1132
1133        mutex_init(&vde->lock);
1134        init_completion(&vde->decode_completion);
1135
1136        vde->miscdev.minor = MISC_DYNAMIC_MINOR;
1137        vde->miscdev.name = "tegra_vde";
1138        vde->miscdev.fops = &tegra_vde_fops;
1139        vde->miscdev.parent = dev;
1140
1141        err = misc_register(&vde->miscdev);
1142        if (err) {
1143                dev_err(dev, "Failed to register misc device: %d\n", err);
1144                goto err_gen_free;
1145        }
1146
1147        pm_runtime_enable(dev);
1148        pm_runtime_use_autosuspend(dev);
1149        pm_runtime_set_autosuspend_delay(dev, 300);
1150
1151        if (!pm_runtime_enabled(dev)) {
1152                err = tegra_vde_runtime_resume(dev);
1153                if (err)
1154                        goto err_misc_unreg;
1155        }
1156
1157        return 0;
1158
1159err_misc_unreg:
1160        misc_deregister(&vde->miscdev);
1161
1162err_gen_free:
1163        gen_pool_free(vde->iram_pool, (unsigned long)vde->iram,
1164                      gen_pool_size(vde->iram_pool));
1165
1166        return err;
1167}
1168
1169static int tegra_vde_remove(struct platform_device *pdev)
1170{
1171        struct tegra_vde *vde = platform_get_drvdata(pdev);
1172        struct device *dev = &pdev->dev;
1173        int err;
1174
1175        if (!pm_runtime_enabled(dev)) {
1176                err = tegra_vde_runtime_suspend(dev);
1177                if (err)
1178                        return err;
1179        }
1180
1181        pm_runtime_dont_use_autosuspend(dev);
1182        pm_runtime_disable(dev);
1183
1184        misc_deregister(&vde->miscdev);
1185
1186        gen_pool_free(vde->iram_pool, (unsigned long)vde->iram,
1187                      gen_pool_size(vde->iram_pool));
1188
1189        return 0;
1190}
1191
1192#ifdef CONFIG_PM_SLEEP
1193static int tegra_vde_pm_suspend(struct device *dev)
1194{
1195        struct tegra_vde *vde = dev_get_drvdata(dev);
1196        int err;
1197
1198        mutex_lock(&vde->lock);
1199
1200        err = pm_runtime_force_suspend(dev);
1201        if (err < 0)
1202                return err;
1203
1204        return 0;
1205}
1206
1207static int tegra_vde_pm_resume(struct device *dev)
1208{
1209        struct tegra_vde *vde = dev_get_drvdata(dev);
1210        int err;
1211
1212        err = pm_runtime_force_resume(dev);
1213        if (err < 0)
1214                return err;
1215
1216        mutex_unlock(&vde->lock);
1217
1218        return 0;
1219}
1220#endif
1221
1222static const struct dev_pm_ops tegra_vde_pm_ops = {
1223        SET_RUNTIME_PM_OPS(tegra_vde_runtime_suspend,
1224                           tegra_vde_runtime_resume,
1225                           NULL)
1226        SET_SYSTEM_SLEEP_PM_OPS(tegra_vde_pm_suspend,
1227                                tegra_vde_pm_resume)
1228};
1229
1230static const struct of_device_id tegra_vde_of_match[] = {
1231        { .compatible = "nvidia,tegra20-vde", },
1232        { },
1233};
1234MODULE_DEVICE_TABLE(of, tegra_vde_of_match);
1235
1236static struct platform_driver tegra_vde_driver = {
1237        .probe          = tegra_vde_probe,
1238        .remove         = tegra_vde_remove,
1239        .driver         = {
1240                .name           = "tegra-vde",
1241                .of_match_table = tegra_vde_of_match,
1242                .pm             = &tegra_vde_pm_ops,
1243        },
1244};
1245module_platform_driver(tegra_vde_driver);
1246
1247MODULE_DESCRIPTION("NVIDIA Tegra Video Decoder driver");
1248MODULE_AUTHOR("Dmitry Osipenko <digetx@gmail.com>");
1249MODULE_LICENSE("GPL");
1250