linux/drivers/staging/media/sunxi/cedrus/cedrus_h264.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Cedrus VPU driver
   4 *
   5 * Copyright (c) 2013 Jens Kuske <jenskuske@gmail.com>
   6 * Copyright (c) 2018 Bootlin
   7 */
   8
   9#include <linux/delay.h>
  10#include <linux/types.h>
  11
  12#include <media/videobuf2-dma-contig.h>
  13
  14#include "cedrus.h"
  15#include "cedrus_hw.h"
  16#include "cedrus_regs.h"
  17
  18enum cedrus_h264_sram_off {
  19        CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE      = 0x000,
  20        CEDRUS_SRAM_H264_FRAMEBUFFER_LIST       = 0x100,
  21        CEDRUS_SRAM_H264_REF_LIST_0             = 0x190,
  22        CEDRUS_SRAM_H264_REF_LIST_1             = 0x199,
  23        CEDRUS_SRAM_H264_SCALING_LIST_8x8_0     = 0x200,
  24        CEDRUS_SRAM_H264_SCALING_LIST_8x8_1     = 0x210,
  25        CEDRUS_SRAM_H264_SCALING_LIST_4x4       = 0x220,
  26};
  27
  28struct cedrus_h264_sram_ref_pic {
  29        __le32  top_field_order_cnt;
  30        __le32  bottom_field_order_cnt;
  31        __le32  frame_info;
  32        __le32  luma_ptr;
  33        __le32  chroma_ptr;
  34        __le32  mv_col_top_ptr;
  35        __le32  mv_col_bot_ptr;
  36        __le32  reserved;
  37} __packed;
  38
  39#define CEDRUS_H264_FRAME_NUM           18
  40
  41#define CEDRUS_NEIGHBOR_INFO_BUF_SIZE   (16 * SZ_1K)
  42#define CEDRUS_MIN_PIC_INFO_BUF_SIZE       (130 * SZ_1K)
  43
  44static void cedrus_h264_write_sram(struct cedrus_dev *dev,
  45                                   enum cedrus_h264_sram_off off,
  46                                   const void *data, size_t len)
  47{
  48        const u32 *buffer = data;
  49        size_t count = DIV_ROUND_UP(len, 4);
  50
  51        cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET, off << 2);
  52
  53        while (count--)
  54                cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, *buffer++);
  55}
  56
  57static dma_addr_t cedrus_h264_mv_col_buf_addr(struct cedrus_ctx *ctx,
  58                                              unsigned int position,
  59                                              unsigned int field)
  60{
  61        dma_addr_t addr = ctx->codec.h264.mv_col_buf_dma;
  62
  63        /* Adjust for the position */
  64        addr += position * ctx->codec.h264.mv_col_buf_field_size * 2;
  65
  66        /* Adjust for the field */
  67        addr += field * ctx->codec.h264.mv_col_buf_field_size;
  68
  69        return addr;
  70}
  71
  72static void cedrus_fill_ref_pic(struct cedrus_ctx *ctx,
  73                                struct cedrus_buffer *buf,
  74                                unsigned int top_field_order_cnt,
  75                                unsigned int bottom_field_order_cnt,
  76                                struct cedrus_h264_sram_ref_pic *pic)
  77{
  78        struct vb2_buffer *vbuf = &buf->m2m_buf.vb.vb2_buf;
  79        unsigned int position = buf->codec.h264.position;
  80
  81        pic->top_field_order_cnt = cpu_to_le32(top_field_order_cnt);
  82        pic->bottom_field_order_cnt = cpu_to_le32(bottom_field_order_cnt);
  83        pic->frame_info = cpu_to_le32(buf->codec.h264.pic_type << 8);
  84
  85        pic->luma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 0));
  86        pic->chroma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 1));
  87        pic->mv_col_top_ptr =
  88                cpu_to_le32(cedrus_h264_mv_col_buf_addr(ctx, position, 0));
  89        pic->mv_col_bot_ptr =
  90                cpu_to_le32(cedrus_h264_mv_col_buf_addr(ctx, position, 1));
  91}
  92
  93static void cedrus_write_frame_list(struct cedrus_ctx *ctx,
  94                                    struct cedrus_run *run)
  95{
  96        struct cedrus_h264_sram_ref_pic pic_list[CEDRUS_H264_FRAME_NUM];
  97        const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
  98        const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
  99        const struct v4l2_ctrl_h264_sps *sps = run->h264.sps;
 100        struct vb2_queue *cap_q;
 101        struct cedrus_buffer *output_buf;
 102        struct cedrus_dev *dev = ctx->dev;
 103        unsigned long used_dpbs = 0;
 104        unsigned int position;
 105        unsigned int output = 0;
 106        unsigned int i;
 107
 108        cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
 109
 110        memset(pic_list, 0, sizeof(pic_list));
 111
 112        for (i = 0; i < ARRAY_SIZE(decode->dpb); i++) {
 113                const struct v4l2_h264_dpb_entry *dpb = &decode->dpb[i];
 114                struct cedrus_buffer *cedrus_buf;
 115                int buf_idx;
 116
 117                if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_VALID))
 118                        continue;
 119
 120                buf_idx = vb2_find_timestamp(cap_q, dpb->reference_ts, 0);
 121                if (buf_idx < 0)
 122                        continue;
 123
 124                cedrus_buf = vb2_to_cedrus_buffer(cap_q->bufs[buf_idx]);
 125                position = cedrus_buf->codec.h264.position;
 126                used_dpbs |= BIT(position);
 127
 128                if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
 129                        continue;
 130
 131                cedrus_fill_ref_pic(ctx, cedrus_buf,
 132                                    dpb->top_field_order_cnt,
 133                                    dpb->bottom_field_order_cnt,
 134                                    &pic_list[position]);
 135
 136                output = max(position, output);
 137        }
 138
 139        position = find_next_zero_bit(&used_dpbs, CEDRUS_H264_FRAME_NUM,
 140                                      output);
 141        if (position >= CEDRUS_H264_FRAME_NUM)
 142                position = find_first_zero_bit(&used_dpbs, CEDRUS_H264_FRAME_NUM);
 143
 144        output_buf = vb2_to_cedrus_buffer(&run->dst->vb2_buf);
 145        output_buf->codec.h264.position = position;
 146
 147        if (slice->flags & V4L2_H264_SLICE_FLAG_FIELD_PIC)
 148                output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FIELD;
 149        else if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)
 150                output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_MBAFF;
 151        else
 152                output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FRAME;
 153
 154        cedrus_fill_ref_pic(ctx, output_buf,
 155                            decode->top_field_order_cnt,
 156                            decode->bottom_field_order_cnt,
 157                            &pic_list[position]);
 158
 159        cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_FRAMEBUFFER_LIST,
 160                               pic_list, sizeof(pic_list));
 161
 162        cedrus_write(dev, VE_H264_OUTPUT_FRAME_IDX, position);
 163}
 164
 165#define CEDRUS_MAX_REF_IDX      32
 166
 167static void _cedrus_write_ref_list(struct cedrus_ctx *ctx,
 168                                   struct cedrus_run *run,
 169                                   const u8 *ref_list, u8 num_ref,
 170                                   enum cedrus_h264_sram_off sram)
 171{
 172        const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
 173        struct vb2_queue *cap_q;
 174        struct cedrus_dev *dev = ctx->dev;
 175        u8 sram_array[CEDRUS_MAX_REF_IDX];
 176        unsigned int i;
 177        size_t size;
 178
 179        cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
 180
 181        memset(sram_array, 0, sizeof(sram_array));
 182
 183        for (i = 0; i < num_ref; i++) {
 184                const struct v4l2_h264_dpb_entry *dpb;
 185                const struct cedrus_buffer *cedrus_buf;
 186                const struct vb2_v4l2_buffer *ref_buf;
 187                unsigned int position;
 188                int buf_idx;
 189                u8 dpb_idx;
 190
 191                dpb_idx = ref_list[i];
 192                dpb = &decode->dpb[dpb_idx];
 193
 194                if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
 195                        continue;
 196
 197                buf_idx = vb2_find_timestamp(cap_q, dpb->reference_ts, 0);
 198                if (buf_idx < 0)
 199                        continue;
 200
 201                ref_buf = to_vb2_v4l2_buffer(cap_q->bufs[buf_idx]);
 202                cedrus_buf = vb2_v4l2_to_cedrus_buffer(ref_buf);
 203                position = cedrus_buf->codec.h264.position;
 204
 205                sram_array[i] |= position << 1;
 206                if (ref_buf->field == V4L2_FIELD_BOTTOM)
 207                        sram_array[i] |= BIT(0);
 208        }
 209
 210        size = min_t(size_t, ALIGN(num_ref, 4), sizeof(sram_array));
 211        cedrus_h264_write_sram(dev, sram, &sram_array, size);
 212}
 213
 214static void cedrus_write_ref_list0(struct cedrus_ctx *ctx,
 215                                   struct cedrus_run *run)
 216{
 217        const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
 218
 219        _cedrus_write_ref_list(ctx, run,
 220                               slice->ref_pic_list0,
 221                               slice->num_ref_idx_l0_active_minus1 + 1,
 222                               CEDRUS_SRAM_H264_REF_LIST_0);
 223}
 224
 225static void cedrus_write_ref_list1(struct cedrus_ctx *ctx,
 226                                   struct cedrus_run *run)
 227{
 228        const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
 229
 230        _cedrus_write_ref_list(ctx, run,
 231                               slice->ref_pic_list1,
 232                               slice->num_ref_idx_l1_active_minus1 + 1,
 233                               CEDRUS_SRAM_H264_REF_LIST_1);
 234}
 235
 236static void cedrus_write_scaling_lists(struct cedrus_ctx *ctx,
 237                                       struct cedrus_run *run)
 238{
 239        const struct v4l2_ctrl_h264_scaling_matrix *scaling =
 240                run->h264.scaling_matrix;
 241        struct cedrus_dev *dev = ctx->dev;
 242
 243        cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_0,
 244                               scaling->scaling_list_8x8[0],
 245                               sizeof(scaling->scaling_list_8x8[0]));
 246
 247        cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_1,
 248                               scaling->scaling_list_8x8[1],
 249                               sizeof(scaling->scaling_list_8x8[1]));
 250
 251        cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_4x4,
 252                               scaling->scaling_list_4x4,
 253                               sizeof(scaling->scaling_list_4x4));
 254}
 255
 256static void cedrus_write_pred_weight_table(struct cedrus_ctx *ctx,
 257                                           struct cedrus_run *run)
 258{
 259        const struct v4l2_ctrl_h264_slice_params *slice =
 260                run->h264.slice_params;
 261        const struct v4l2_h264_pred_weight_table *pred_weight =
 262                &slice->pred_weight_table;
 263        struct cedrus_dev *dev = ctx->dev;
 264        int i, j, k;
 265
 266        cedrus_write(dev, VE_H264_SHS_WP,
 267                     ((pred_weight->chroma_log2_weight_denom & 0x7) << 4) |
 268                     ((pred_weight->luma_log2_weight_denom & 0x7) << 0));
 269
 270        cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET,
 271                     CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE << 2);
 272
 273        for (i = 0; i < ARRAY_SIZE(pred_weight->weight_factors); i++) {
 274                const struct v4l2_h264_weight_factors *factors =
 275                        &pred_weight->weight_factors[i];
 276
 277                for (j = 0; j < ARRAY_SIZE(factors->luma_weight); j++) {
 278                        u32 val;
 279
 280                        val = (((u32)factors->luma_offset[j] & 0x1ff) << 16) |
 281                                (factors->luma_weight[j] & 0x1ff);
 282                        cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val);
 283                }
 284
 285                for (j = 0; j < ARRAY_SIZE(factors->chroma_weight); j++) {
 286                        for (k = 0; k < ARRAY_SIZE(factors->chroma_weight[0]); k++) {
 287                                u32 val;
 288
 289                                val = (((u32)factors->chroma_offset[j][k] & 0x1ff) << 16) |
 290                                        (factors->chroma_weight[j][k] & 0x1ff);
 291                                cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val);
 292                        }
 293                }
 294        }
 295}
 296
 297/*
 298 * It turns out that using VE_H264_VLD_OFFSET to skip bits is not reliable. In
 299 * rare cases frame is not decoded correctly. However, setting offset to 0 and
 300 * skipping appropriate amount of bits with flush bits trigger always works.
 301 */
 302static void cedrus_skip_bits(struct cedrus_dev *dev, int num)
 303{
 304        int count = 0;
 305
 306        while (count < num) {
 307                int tmp = min(num - count, 32);
 308
 309                cedrus_write(dev, VE_H264_TRIGGER_TYPE,
 310                             VE_H264_TRIGGER_TYPE_FLUSH_BITS |
 311                             VE_H264_TRIGGER_TYPE_N_BITS(tmp));
 312                while (cedrus_read(dev, VE_H264_STATUS) & VE_H264_STATUS_VLD_BUSY)
 313                        udelay(1);
 314
 315                count += tmp;
 316        }
 317}
 318
 319static void cedrus_set_params(struct cedrus_ctx *ctx,
 320                              struct cedrus_run *run)
 321{
 322        const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
 323        const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
 324        const struct v4l2_ctrl_h264_pps *pps = run->h264.pps;
 325        const struct v4l2_ctrl_h264_sps *sps = run->h264.sps;
 326        struct vb2_buffer *src_buf = &run->src->vb2_buf;
 327        struct cedrus_dev *dev = ctx->dev;
 328        dma_addr_t src_buf_addr;
 329        u32 len = slice->size * 8;
 330        unsigned int pic_width_in_mbs;
 331        bool mbaff_pic;
 332        u32 reg;
 333
 334        cedrus_write(dev, VE_H264_VLD_LEN, len);
 335        cedrus_write(dev, VE_H264_VLD_OFFSET, 0);
 336
 337        src_buf_addr = vb2_dma_contig_plane_dma_addr(src_buf, 0);
 338        cedrus_write(dev, VE_H264_VLD_END,
 339                     src_buf_addr + vb2_get_plane_payload(src_buf, 0));
 340        cedrus_write(dev, VE_H264_VLD_ADDR,
 341                     VE_H264_VLD_ADDR_VAL(src_buf_addr) |
 342                     VE_H264_VLD_ADDR_FIRST | VE_H264_VLD_ADDR_VALID |
 343                     VE_H264_VLD_ADDR_LAST);
 344
 345        if (ctx->src_fmt.width > 2048) {
 346                cedrus_write(dev, VE_BUF_CTRL,
 347                             VE_BUF_CTRL_INTRAPRED_MIXED_RAM |
 348                             VE_BUF_CTRL_DBLK_MIXED_RAM);
 349                cedrus_write(dev, VE_DBLK_DRAM_BUF_ADDR,
 350                             ctx->codec.h264.deblk_buf_dma);
 351                cedrus_write(dev, VE_INTRAPRED_DRAM_BUF_ADDR,
 352                             ctx->codec.h264.intra_pred_buf_dma);
 353        } else {
 354                cedrus_write(dev, VE_BUF_CTRL,
 355                             VE_BUF_CTRL_INTRAPRED_INT_SRAM |
 356                             VE_BUF_CTRL_DBLK_INT_SRAM);
 357        }
 358
 359        /*
 360         * FIXME: Since the bitstream parsing is done in software, and
 361         * in userspace, this shouldn't be needed anymore. But it
 362         * turns out that removing it breaks the decoding process,
 363         * without any clear indication why.
 364         */
 365        cedrus_write(dev, VE_H264_TRIGGER_TYPE,
 366                     VE_H264_TRIGGER_TYPE_INIT_SWDEC);
 367
 368        cedrus_skip_bits(dev, slice->header_bit_size);
 369
 370        if (((pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED) &&
 371             (slice->slice_type == V4L2_H264_SLICE_TYPE_P ||
 372              slice->slice_type == V4L2_H264_SLICE_TYPE_SP)) ||
 373            (pps->weighted_bipred_idc == 1 &&
 374             slice->slice_type == V4L2_H264_SLICE_TYPE_B))
 375                cedrus_write_pred_weight_table(ctx, run);
 376
 377        if ((slice->slice_type == V4L2_H264_SLICE_TYPE_P) ||
 378            (slice->slice_type == V4L2_H264_SLICE_TYPE_SP) ||
 379            (slice->slice_type == V4L2_H264_SLICE_TYPE_B))
 380                cedrus_write_ref_list0(ctx, run);
 381
 382        if (slice->slice_type == V4L2_H264_SLICE_TYPE_B)
 383                cedrus_write_ref_list1(ctx, run);
 384
 385        // picture parameters
 386        reg = 0;
 387        /*
 388         * FIXME: the kernel headers are allowing the default value to
 389         * be passed, but the libva doesn't give us that.
 390         */
 391        reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 10;
 392        reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 5;
 393        reg |= (pps->weighted_bipred_idc & 0x3) << 2;
 394        if (pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE)
 395                reg |= VE_H264_PPS_ENTROPY_CODING_MODE;
 396        if (pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED)
 397                reg |= VE_H264_PPS_WEIGHTED_PRED;
 398        if (pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED)
 399                reg |= VE_H264_PPS_CONSTRAINED_INTRA_PRED;
 400        if (pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE)
 401                reg |= VE_H264_PPS_TRANSFORM_8X8_MODE;
 402        cedrus_write(dev, VE_H264_PPS, reg);
 403
 404        // sequence parameters
 405        reg = 0;
 406        reg |= (sps->chroma_format_idc & 0x7) << 19;
 407        reg |= (sps->pic_width_in_mbs_minus1 & 0xff) << 8;
 408        reg |= sps->pic_height_in_map_units_minus1 & 0xff;
 409        if (sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY)
 410                reg |= VE_H264_SPS_MBS_ONLY;
 411        if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)
 412                reg |= VE_H264_SPS_MB_ADAPTIVE_FRAME_FIELD;
 413        if (sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE)
 414                reg |= VE_H264_SPS_DIRECT_8X8_INFERENCE;
 415        cedrus_write(dev, VE_H264_SPS, reg);
 416
 417        mbaff_pic = !(slice->flags & V4L2_H264_SLICE_FLAG_FIELD_PIC) &&
 418                    (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD);
 419        pic_width_in_mbs = sps->pic_width_in_mbs_minus1 + 1;
 420
 421        // slice parameters
 422        reg = 0;
 423        reg |= ((slice->first_mb_in_slice % pic_width_in_mbs) & 0xff) << 24;
 424        reg |= (((slice->first_mb_in_slice / pic_width_in_mbs) *
 425                 (mbaff_pic + 1)) & 0xff) << 16;
 426        reg |= decode->nal_ref_idc ? BIT(12) : 0;
 427        reg |= (slice->slice_type & 0xf) << 8;
 428        reg |= slice->cabac_init_idc & 0x3;
 429        if (ctx->fh.m2m_ctx->new_frame)
 430                reg |= VE_H264_SHS_FIRST_SLICE_IN_PIC;
 431        if (slice->flags & V4L2_H264_SLICE_FLAG_FIELD_PIC)
 432                reg |= VE_H264_SHS_FIELD_PIC;
 433        if (slice->flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD)
 434                reg |= VE_H264_SHS_BOTTOM_FIELD;
 435        if (slice->flags & V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED)
 436                reg |= VE_H264_SHS_DIRECT_SPATIAL_MV_PRED;
 437        cedrus_write(dev, VE_H264_SHS, reg);
 438
 439        reg = 0;
 440        reg |= VE_H264_SHS2_NUM_REF_IDX_ACTIVE_OVRD;
 441        reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 24;
 442        reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 16;
 443        reg |= (slice->disable_deblocking_filter_idc & 0x3) << 8;
 444        reg |= (slice->slice_alpha_c0_offset_div2 & 0xf) << 4;
 445        reg |= slice->slice_beta_offset_div2 & 0xf;
 446        cedrus_write(dev, VE_H264_SHS2, reg);
 447
 448        reg = 0;
 449        reg |= (pps->second_chroma_qp_index_offset & 0x3f) << 16;
 450        reg |= (pps->chroma_qp_index_offset & 0x3f) << 8;
 451        reg |= (pps->pic_init_qp_minus26 + 26 + slice->slice_qp_delta) & 0x3f;
 452        cedrus_write(dev, VE_H264_SHS_QP, reg);
 453
 454        // clear status flags
 455        cedrus_write(dev, VE_H264_STATUS, cedrus_read(dev, VE_H264_STATUS));
 456
 457        // enable int
 458        cedrus_write(dev, VE_H264_CTRL,
 459                     VE_H264_CTRL_SLICE_DECODE_INT |
 460                     VE_H264_CTRL_DECODE_ERR_INT |
 461                     VE_H264_CTRL_VLD_DATA_REQ_INT);
 462}
 463
 464static enum cedrus_irq_status
 465cedrus_h264_irq_status(struct cedrus_ctx *ctx)
 466{
 467        struct cedrus_dev *dev = ctx->dev;
 468        u32 reg = cedrus_read(dev, VE_H264_STATUS);
 469
 470        if (reg & (VE_H264_STATUS_DECODE_ERR_INT |
 471                   VE_H264_STATUS_VLD_DATA_REQ_INT))
 472                return CEDRUS_IRQ_ERROR;
 473
 474        if (reg & VE_H264_CTRL_SLICE_DECODE_INT)
 475                return CEDRUS_IRQ_OK;
 476
 477        return CEDRUS_IRQ_NONE;
 478}
 479
 480static void cedrus_h264_irq_clear(struct cedrus_ctx *ctx)
 481{
 482        struct cedrus_dev *dev = ctx->dev;
 483
 484        cedrus_write(dev, VE_H264_STATUS,
 485                     VE_H264_STATUS_INT_MASK);
 486}
 487
 488static void cedrus_h264_irq_disable(struct cedrus_ctx *ctx)
 489{
 490        struct cedrus_dev *dev = ctx->dev;
 491        u32 reg = cedrus_read(dev, VE_H264_CTRL);
 492
 493        cedrus_write(dev, VE_H264_CTRL,
 494                     reg & ~VE_H264_CTRL_INT_MASK);
 495}
 496
 497static void cedrus_h264_setup(struct cedrus_ctx *ctx,
 498                              struct cedrus_run *run)
 499{
 500        struct cedrus_dev *dev = ctx->dev;
 501
 502        cedrus_engine_enable(ctx, CEDRUS_CODEC_H264);
 503
 504        cedrus_write(dev, VE_H264_SDROT_CTRL, 0);
 505        cedrus_write(dev, VE_H264_EXTRA_BUFFER1,
 506                     ctx->codec.h264.pic_info_buf_dma);
 507        cedrus_write(dev, VE_H264_EXTRA_BUFFER2,
 508                     ctx->codec.h264.neighbor_info_buf_dma);
 509
 510        cedrus_write_scaling_lists(ctx, run);
 511        cedrus_write_frame_list(ctx, run);
 512
 513        cedrus_set_params(ctx, run);
 514}
 515
 516static int cedrus_h264_start(struct cedrus_ctx *ctx)
 517{
 518        struct cedrus_dev *dev = ctx->dev;
 519        unsigned int pic_info_size;
 520        unsigned int field_size;
 521        unsigned int mv_col_size;
 522        int ret;
 523
 524        /* Formula for picture buffer size is taken from CedarX source. */
 525
 526        if (ctx->src_fmt.width > 2048)
 527                pic_info_size = CEDRUS_H264_FRAME_NUM * 0x4000;
 528        else
 529                pic_info_size = CEDRUS_H264_FRAME_NUM * 0x1000;
 530
 531        /*
 532         * FIXME: If V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY is set,
 533         * there is no need to multiply by 2.
 534         */
 535        pic_info_size += ctx->src_fmt.height * 2 * 64;
 536
 537        if (pic_info_size < CEDRUS_MIN_PIC_INFO_BUF_SIZE)
 538                pic_info_size = CEDRUS_MIN_PIC_INFO_BUF_SIZE;
 539
 540        ctx->codec.h264.pic_info_buf_size = pic_info_size;
 541        ctx->codec.h264.pic_info_buf =
 542                dma_alloc_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size,
 543                                   &ctx->codec.h264.pic_info_buf_dma,
 544                                   GFP_KERNEL);
 545        if (!ctx->codec.h264.pic_info_buf)
 546                return -ENOMEM;
 547
 548        /*
 549         * That buffer is supposed to be 16kiB in size, and be aligned
 550         * on 16kiB as well. However, dma_alloc_coherent provides the
 551         * guarantee that we'll have a CPU and DMA address aligned on
 552         * the smallest page order that is greater to the requested
 553         * size, so we don't have to overallocate.
 554         */
 555        ctx->codec.h264.neighbor_info_buf =
 556                dma_alloc_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
 557                                   &ctx->codec.h264.neighbor_info_buf_dma,
 558                                   GFP_KERNEL);
 559        if (!ctx->codec.h264.neighbor_info_buf) {
 560                ret = -ENOMEM;
 561                goto err_pic_buf;
 562        }
 563
 564        field_size = DIV_ROUND_UP(ctx->src_fmt.width, 16) *
 565                DIV_ROUND_UP(ctx->src_fmt.height, 16) * 16;
 566
 567        /*
 568         * FIXME: This is actually conditional to
 569         * V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE not being set, we
 570         * might have to rework this if memory efficiency ever is
 571         * something we need to work on.
 572         */
 573        field_size = field_size * 2;
 574
 575        /*
 576         * FIXME: This is actually conditional to
 577         * V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY not being set, we might
 578         * have to rework this if memory efficiency ever is something
 579         * we need to work on.
 580         */
 581        field_size = field_size * 2;
 582        ctx->codec.h264.mv_col_buf_field_size = field_size;
 583
 584        mv_col_size = field_size * 2 * CEDRUS_H264_FRAME_NUM;
 585        ctx->codec.h264.mv_col_buf_size = mv_col_size;
 586        ctx->codec.h264.mv_col_buf = dma_alloc_coherent(dev->dev,
 587                                                        ctx->codec.h264.mv_col_buf_size,
 588                                                        &ctx->codec.h264.mv_col_buf_dma,
 589                                                        GFP_KERNEL);
 590        if (!ctx->codec.h264.mv_col_buf) {
 591                ret = -ENOMEM;
 592                goto err_neighbor_buf;
 593        }
 594
 595        if (ctx->src_fmt.width > 2048) {
 596                /*
 597                 * Formulas for deblock and intra prediction buffer sizes
 598                 * are taken from CedarX source.
 599                 */
 600
 601                ctx->codec.h264.deblk_buf_size =
 602                        ALIGN(ctx->src_fmt.width, 32) * 12;
 603                ctx->codec.h264.deblk_buf =
 604                        dma_alloc_coherent(dev->dev,
 605                                           ctx->codec.h264.deblk_buf_size,
 606                                           &ctx->codec.h264.deblk_buf_dma,
 607                                           GFP_KERNEL);
 608                if (!ctx->codec.h264.deblk_buf) {
 609                        ret = -ENOMEM;
 610                        goto err_mv_col_buf;
 611                }
 612
 613                /*
 614                 * NOTE: Multiplying by two deviates from CedarX logic, but it
 615                 * is for some unknown reason needed for H264 4K decoding on H6.
 616                 */
 617                ctx->codec.h264.intra_pred_buf_size =
 618                        ALIGN(ctx->src_fmt.width, 64) * 5 * 2;
 619                ctx->codec.h264.intra_pred_buf =
 620                        dma_alloc_coherent(dev->dev,
 621                                           ctx->codec.h264.intra_pred_buf_size,
 622                                           &ctx->codec.h264.intra_pred_buf_dma,
 623                                           GFP_KERNEL);
 624                if (!ctx->codec.h264.intra_pred_buf) {
 625                        ret = -ENOMEM;
 626                        goto err_deblk_buf;
 627                }
 628        }
 629
 630        return 0;
 631
 632err_deblk_buf:
 633        dma_free_coherent(dev->dev, ctx->codec.h264.deblk_buf_size,
 634                          ctx->codec.h264.deblk_buf,
 635                          ctx->codec.h264.deblk_buf_dma);
 636
 637err_mv_col_buf:
 638        dma_free_coherent(dev->dev, ctx->codec.h264.mv_col_buf_size,
 639                          ctx->codec.h264.mv_col_buf,
 640                          ctx->codec.h264.mv_col_buf_dma);
 641
 642err_neighbor_buf:
 643        dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
 644                          ctx->codec.h264.neighbor_info_buf,
 645                          ctx->codec.h264.neighbor_info_buf_dma);
 646
 647err_pic_buf:
 648        dma_free_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size,
 649                          ctx->codec.h264.pic_info_buf,
 650                          ctx->codec.h264.pic_info_buf_dma);
 651        return ret;
 652}
 653
 654static void cedrus_h264_stop(struct cedrus_ctx *ctx)
 655{
 656        struct cedrus_dev *dev = ctx->dev;
 657
 658        dma_free_coherent(dev->dev, ctx->codec.h264.mv_col_buf_size,
 659                          ctx->codec.h264.mv_col_buf,
 660                          ctx->codec.h264.mv_col_buf_dma);
 661        dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
 662                          ctx->codec.h264.neighbor_info_buf,
 663                          ctx->codec.h264.neighbor_info_buf_dma);
 664        dma_free_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size,
 665                          ctx->codec.h264.pic_info_buf,
 666                          ctx->codec.h264.pic_info_buf_dma);
 667        if (ctx->codec.h264.deblk_buf_size)
 668                dma_free_coherent(dev->dev, ctx->codec.h264.deblk_buf_size,
 669                                  ctx->codec.h264.deblk_buf,
 670                                  ctx->codec.h264.deblk_buf_dma);
 671        if (ctx->codec.h264.intra_pred_buf_size)
 672                dma_free_coherent(dev->dev, ctx->codec.h264.intra_pred_buf_size,
 673                                  ctx->codec.h264.intra_pred_buf,
 674                                  ctx->codec.h264.intra_pred_buf_dma);
 675}
 676
 677static void cedrus_h264_trigger(struct cedrus_ctx *ctx)
 678{
 679        struct cedrus_dev *dev = ctx->dev;
 680
 681        cedrus_write(dev, VE_H264_TRIGGER_TYPE,
 682                     VE_H264_TRIGGER_TYPE_AVC_SLICE_DECODE);
 683}
 684
 685struct cedrus_dec_ops cedrus_dec_ops_h264 = {
 686        .irq_clear      = cedrus_h264_irq_clear,
 687        .irq_disable    = cedrus_h264_irq_disable,
 688        .irq_status     = cedrus_h264_irq_status,
 689        .setup          = cedrus_h264_setup,
 690        .start          = cedrus_h264_start,
 691        .stop           = cedrus_h264_stop,
 692        .trigger        = cedrus_h264_trigger,
 693};
 694