linux/drivers/staging/media/sunxi/cedrus/cedrus_h264.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Cedrus VPU driver
   4 *
   5 * Copyright (c) 2013 Jens Kuske <jenskuske@gmail.com>
   6 * Copyright (c) 2018 Bootlin
   7 */
   8
   9#include <linux/delay.h>
  10#include <linux/types.h>
  11
  12#include <media/videobuf2-dma-contig.h>
  13
  14#include "cedrus.h"
  15#include "cedrus_hw.h"
  16#include "cedrus_regs.h"
  17
  18enum cedrus_h264_sram_off {
  19        CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE      = 0x000,
  20        CEDRUS_SRAM_H264_FRAMEBUFFER_LIST       = 0x100,
  21        CEDRUS_SRAM_H264_REF_LIST_0             = 0x190,
  22        CEDRUS_SRAM_H264_REF_LIST_1             = 0x199,
  23        CEDRUS_SRAM_H264_SCALING_LIST_8x8_0     = 0x200,
  24        CEDRUS_SRAM_H264_SCALING_LIST_8x8_1     = 0x210,
  25        CEDRUS_SRAM_H264_SCALING_LIST_4x4       = 0x220,
  26};
  27
  28struct cedrus_h264_sram_ref_pic {
  29        __le32  top_field_order_cnt;
  30        __le32  bottom_field_order_cnt;
  31        __le32  frame_info;
  32        __le32  luma_ptr;
  33        __le32  chroma_ptr;
  34        __le32  mv_col_top_ptr;
  35        __le32  mv_col_bot_ptr;
  36        __le32  reserved;
  37} __packed;
  38
  39#define CEDRUS_H264_FRAME_NUM           18
  40
  41#define CEDRUS_NEIGHBOR_INFO_BUF_SIZE   (16 * SZ_1K)
  42#define CEDRUS_MIN_PIC_INFO_BUF_SIZE       (130 * SZ_1K)
  43
  44static void cedrus_h264_write_sram(struct cedrus_dev *dev,
  45                                   enum cedrus_h264_sram_off off,
  46                                   const void *data, size_t len)
  47{
  48        const u32 *buffer = data;
  49        size_t count = DIV_ROUND_UP(len, 4);
  50
  51        cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET, off << 2);
  52
  53        while (count--)
  54                cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, *buffer++);
  55}
  56
  57static dma_addr_t cedrus_h264_mv_col_buf_addr(struct cedrus_ctx *ctx,
  58                                              unsigned int position,
  59                                              unsigned int field)
  60{
  61        dma_addr_t addr = ctx->codec.h264.mv_col_buf_dma;
  62
  63        /* Adjust for the position */
  64        addr += position * ctx->codec.h264.mv_col_buf_field_size * 2;
  65
  66        /* Adjust for the field */
  67        addr += field * ctx->codec.h264.mv_col_buf_field_size;
  68
  69        return addr;
  70}
  71
  72static void cedrus_fill_ref_pic(struct cedrus_ctx *ctx,
  73                                struct cedrus_buffer *buf,
  74                                unsigned int top_field_order_cnt,
  75                                unsigned int bottom_field_order_cnt,
  76                                struct cedrus_h264_sram_ref_pic *pic)
  77{
  78        struct vb2_buffer *vbuf = &buf->m2m_buf.vb.vb2_buf;
  79        unsigned int position = buf->codec.h264.position;
  80
  81        pic->top_field_order_cnt = cpu_to_le32(top_field_order_cnt);
  82        pic->bottom_field_order_cnt = cpu_to_le32(bottom_field_order_cnt);
  83        pic->frame_info = cpu_to_le32(buf->codec.h264.pic_type << 8);
  84
  85        pic->luma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 0));
  86        pic->chroma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 1));
  87        pic->mv_col_top_ptr =
  88                cpu_to_le32(cedrus_h264_mv_col_buf_addr(ctx, position, 0));
  89        pic->mv_col_bot_ptr =
  90                cpu_to_le32(cedrus_h264_mv_col_buf_addr(ctx, position, 1));
  91}
  92
  93static void cedrus_write_frame_list(struct cedrus_ctx *ctx,
  94                                    struct cedrus_run *run)
  95{
  96        struct cedrus_h264_sram_ref_pic pic_list[CEDRUS_H264_FRAME_NUM];
  97        const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
  98        const struct v4l2_ctrl_h264_sps *sps = run->h264.sps;
  99        struct vb2_queue *cap_q;
 100        struct cedrus_buffer *output_buf;
 101        struct cedrus_dev *dev = ctx->dev;
 102        unsigned long used_dpbs = 0;
 103        unsigned int position;
 104        int output = -1;
 105        unsigned int i;
 106
 107        cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
 108
 109        memset(pic_list, 0, sizeof(pic_list));
 110
 111        for (i = 0; i < ARRAY_SIZE(decode->dpb); i++) {
 112                const struct v4l2_h264_dpb_entry *dpb = &decode->dpb[i];
 113                struct cedrus_buffer *cedrus_buf;
 114                int buf_idx;
 115
 116                if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_VALID))
 117                        continue;
 118
 119                buf_idx = vb2_find_timestamp(cap_q, dpb->reference_ts, 0);
 120                if (buf_idx < 0)
 121                        continue;
 122
 123                cedrus_buf = vb2_to_cedrus_buffer(cap_q->bufs[buf_idx]);
 124                position = cedrus_buf->codec.h264.position;
 125                used_dpbs |= BIT(position);
 126
 127                if (run->dst->vb2_buf.timestamp == dpb->reference_ts) {
 128                        output = position;
 129                        continue;
 130                }
 131
 132                if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
 133                        continue;
 134
 135                cedrus_fill_ref_pic(ctx, cedrus_buf,
 136                                    dpb->top_field_order_cnt,
 137                                    dpb->bottom_field_order_cnt,
 138                                    &pic_list[position]);
 139        }
 140
 141        if (output >= 0)
 142                position = output;
 143        else
 144                position = find_first_zero_bit(&used_dpbs, CEDRUS_H264_FRAME_NUM);
 145
 146        output_buf = vb2_to_cedrus_buffer(&run->dst->vb2_buf);
 147        output_buf->codec.h264.position = position;
 148
 149        if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC)
 150                output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FIELD;
 151        else if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)
 152                output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_MBAFF;
 153        else
 154                output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FRAME;
 155
 156        cedrus_fill_ref_pic(ctx, output_buf,
 157                            decode->top_field_order_cnt,
 158                            decode->bottom_field_order_cnt,
 159                            &pic_list[position]);
 160
 161        cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_FRAMEBUFFER_LIST,
 162                               pic_list, sizeof(pic_list));
 163
 164        cedrus_write(dev, VE_H264_OUTPUT_FRAME_IDX, position);
 165}
 166
 167#define CEDRUS_MAX_REF_IDX      32
 168
 169static void _cedrus_write_ref_list(struct cedrus_ctx *ctx,
 170                                   struct cedrus_run *run,
 171                                   const struct v4l2_h264_reference *ref_list,
 172                                   u8 num_ref, enum cedrus_h264_sram_off sram)
 173{
 174        const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
 175        struct vb2_queue *cap_q;
 176        struct cedrus_dev *dev = ctx->dev;
 177        u8 sram_array[CEDRUS_MAX_REF_IDX];
 178        unsigned int i;
 179        size_t size;
 180
 181        cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
 182
 183        memset(sram_array, 0, sizeof(sram_array));
 184
 185        for (i = 0; i < num_ref; i++) {
 186                const struct v4l2_h264_dpb_entry *dpb;
 187                const struct cedrus_buffer *cedrus_buf;
 188                unsigned int position;
 189                int buf_idx;
 190                u8 dpb_idx;
 191
 192                dpb_idx = ref_list[i].index;
 193                dpb = &decode->dpb[dpb_idx];
 194
 195                if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
 196                        continue;
 197
 198                buf_idx = vb2_find_timestamp(cap_q, dpb->reference_ts, 0);
 199                if (buf_idx < 0)
 200                        continue;
 201
 202                cedrus_buf = vb2_to_cedrus_buffer(cap_q->bufs[buf_idx]);
 203                position = cedrus_buf->codec.h264.position;
 204
 205                sram_array[i] |= position << 1;
 206                if (ref_list[i].fields == V4L2_H264_BOTTOM_FIELD_REF)
 207                        sram_array[i] |= BIT(0);
 208        }
 209
 210        size = min_t(size_t, ALIGN(num_ref, 4), sizeof(sram_array));
 211        cedrus_h264_write_sram(dev, sram, &sram_array, size);
 212}
 213
 214static void cedrus_write_ref_list0(struct cedrus_ctx *ctx,
 215                                   struct cedrus_run *run)
 216{
 217        const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
 218
 219        _cedrus_write_ref_list(ctx, run,
 220                               slice->ref_pic_list0,
 221                               slice->num_ref_idx_l0_active_minus1 + 1,
 222                               CEDRUS_SRAM_H264_REF_LIST_0);
 223}
 224
 225static void cedrus_write_ref_list1(struct cedrus_ctx *ctx,
 226                                   struct cedrus_run *run)
 227{
 228        const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
 229
 230        _cedrus_write_ref_list(ctx, run,
 231                               slice->ref_pic_list1,
 232                               slice->num_ref_idx_l1_active_minus1 + 1,
 233                               CEDRUS_SRAM_H264_REF_LIST_1);
 234}
 235
 236static void cedrus_write_scaling_lists(struct cedrus_ctx *ctx,
 237                                       struct cedrus_run *run)
 238{
 239        const struct v4l2_ctrl_h264_scaling_matrix *scaling =
 240                run->h264.scaling_matrix;
 241        const struct v4l2_ctrl_h264_pps *pps = run->h264.pps;
 242        struct cedrus_dev *dev = ctx->dev;
 243
 244        if (!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT))
 245                return;
 246
 247        cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_0,
 248                               scaling->scaling_list_8x8[0],
 249                               sizeof(scaling->scaling_list_8x8[0]));
 250
 251        cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_1,
 252                               scaling->scaling_list_8x8[1],
 253                               sizeof(scaling->scaling_list_8x8[1]));
 254
 255        cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_4x4,
 256                               scaling->scaling_list_4x4,
 257                               sizeof(scaling->scaling_list_4x4));
 258}
 259
 260static void cedrus_write_pred_weight_table(struct cedrus_ctx *ctx,
 261                                           struct cedrus_run *run)
 262{
 263        const struct v4l2_ctrl_h264_pred_weights *pred_weight =
 264                run->h264.pred_weights;
 265        struct cedrus_dev *dev = ctx->dev;
 266        int i, j, k;
 267
 268        cedrus_write(dev, VE_H264_SHS_WP,
 269                     ((pred_weight->chroma_log2_weight_denom & 0x7) << 4) |
 270                     ((pred_weight->luma_log2_weight_denom & 0x7) << 0));
 271
 272        cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET,
 273                     CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE << 2);
 274
 275        for (i = 0; i < ARRAY_SIZE(pred_weight->weight_factors); i++) {
 276                const struct v4l2_h264_weight_factors *factors =
 277                        &pred_weight->weight_factors[i];
 278
 279                for (j = 0; j < ARRAY_SIZE(factors->luma_weight); j++) {
 280                        u32 val;
 281
 282                        val = (((u32)factors->luma_offset[j] & 0x1ff) << 16) |
 283                                (factors->luma_weight[j] & 0x1ff);
 284                        cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val);
 285                }
 286
 287                for (j = 0; j < ARRAY_SIZE(factors->chroma_weight); j++) {
 288                        for (k = 0; k < ARRAY_SIZE(factors->chroma_weight[0]); k++) {
 289                                u32 val;
 290
 291                                val = (((u32)factors->chroma_offset[j][k] & 0x1ff) << 16) |
 292                                        (factors->chroma_weight[j][k] & 0x1ff);
 293                                cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val);
 294                        }
 295                }
 296        }
 297}
 298
 299/*
 300 * It turns out that using VE_H264_VLD_OFFSET to skip bits is not reliable. In
 301 * rare cases frame is not decoded correctly. However, setting offset to 0 and
 302 * skipping appropriate amount of bits with flush bits trigger always works.
 303 */
 304static void cedrus_skip_bits(struct cedrus_dev *dev, int num)
 305{
 306        int count = 0;
 307
 308        while (count < num) {
 309                int tmp = min(num - count, 32);
 310
 311                cedrus_write(dev, VE_H264_TRIGGER_TYPE,
 312                             VE_H264_TRIGGER_TYPE_FLUSH_BITS |
 313                             VE_H264_TRIGGER_TYPE_N_BITS(tmp));
 314                while (cedrus_read(dev, VE_H264_STATUS) & VE_H264_STATUS_VLD_BUSY)
 315                        udelay(1);
 316
 317                count += tmp;
 318        }
 319}
 320
 321static void cedrus_set_params(struct cedrus_ctx *ctx,
 322                              struct cedrus_run *run)
 323{
 324        const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
 325        const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
 326        const struct v4l2_ctrl_h264_pps *pps = run->h264.pps;
 327        const struct v4l2_ctrl_h264_sps *sps = run->h264.sps;
 328        struct vb2_buffer *src_buf = &run->src->vb2_buf;
 329        struct cedrus_dev *dev = ctx->dev;
 330        dma_addr_t src_buf_addr;
 331        size_t slice_bytes = vb2_get_plane_payload(src_buf, 0);
 332        unsigned int pic_width_in_mbs;
 333        bool mbaff_pic;
 334        u32 reg;
 335
 336        cedrus_write(dev, VE_H264_VLD_LEN, slice_bytes * 8);
 337        cedrus_write(dev, VE_H264_VLD_OFFSET, 0);
 338
 339        src_buf_addr = vb2_dma_contig_plane_dma_addr(src_buf, 0);
 340        cedrus_write(dev, VE_H264_VLD_END, src_buf_addr + slice_bytes);
 341        cedrus_write(dev, VE_H264_VLD_ADDR,
 342                     VE_H264_VLD_ADDR_VAL(src_buf_addr) |
 343                     VE_H264_VLD_ADDR_FIRST | VE_H264_VLD_ADDR_VALID |
 344                     VE_H264_VLD_ADDR_LAST);
 345
 346        if (ctx->src_fmt.width > 2048) {
 347                cedrus_write(dev, VE_BUF_CTRL,
 348                             VE_BUF_CTRL_INTRAPRED_MIXED_RAM |
 349                             VE_BUF_CTRL_DBLK_MIXED_RAM);
 350                cedrus_write(dev, VE_DBLK_DRAM_BUF_ADDR,
 351                             ctx->codec.h264.deblk_buf_dma);
 352                cedrus_write(dev, VE_INTRAPRED_DRAM_BUF_ADDR,
 353                             ctx->codec.h264.intra_pred_buf_dma);
 354        } else {
 355                cedrus_write(dev, VE_BUF_CTRL,
 356                             VE_BUF_CTRL_INTRAPRED_INT_SRAM |
 357                             VE_BUF_CTRL_DBLK_INT_SRAM);
 358        }
 359
 360        /*
 361         * FIXME: Since the bitstream parsing is done in software, and
 362         * in userspace, this shouldn't be needed anymore. But it
 363         * turns out that removing it breaks the decoding process,
 364         * without any clear indication why.
 365         */
 366        cedrus_write(dev, VE_H264_TRIGGER_TYPE,
 367                     VE_H264_TRIGGER_TYPE_INIT_SWDEC);
 368
 369        cedrus_skip_bits(dev, slice->header_bit_size);
 370
 371        if (V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED(pps, slice))
 372                cedrus_write_pred_weight_table(ctx, run);
 373
 374        if ((slice->slice_type == V4L2_H264_SLICE_TYPE_P) ||
 375            (slice->slice_type == V4L2_H264_SLICE_TYPE_SP) ||
 376            (slice->slice_type == V4L2_H264_SLICE_TYPE_B))
 377                cedrus_write_ref_list0(ctx, run);
 378
 379        if (slice->slice_type == V4L2_H264_SLICE_TYPE_B)
 380                cedrus_write_ref_list1(ctx, run);
 381
 382        // picture parameters
 383        reg = 0;
 384        /*
 385         * FIXME: the kernel headers are allowing the default value to
 386         * be passed, but the libva doesn't give us that.
 387         */
 388        reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 10;
 389        reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 5;
 390        reg |= (pps->weighted_bipred_idc & 0x3) << 2;
 391        if (pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE)
 392                reg |= VE_H264_PPS_ENTROPY_CODING_MODE;
 393        if (pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED)
 394                reg |= VE_H264_PPS_WEIGHTED_PRED;
 395        if (pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED)
 396                reg |= VE_H264_PPS_CONSTRAINED_INTRA_PRED;
 397        if (pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE)
 398                reg |= VE_H264_PPS_TRANSFORM_8X8_MODE;
 399        cedrus_write(dev, VE_H264_PPS, reg);
 400
 401        // sequence parameters
 402        reg = 0;
 403        reg |= (sps->chroma_format_idc & 0x7) << 19;
 404        reg |= (sps->pic_width_in_mbs_minus1 & 0xff) << 8;
 405        reg |= sps->pic_height_in_map_units_minus1 & 0xff;
 406        if (sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY)
 407                reg |= VE_H264_SPS_MBS_ONLY;
 408        if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)
 409                reg |= VE_H264_SPS_MB_ADAPTIVE_FRAME_FIELD;
 410        if (sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE)
 411                reg |= VE_H264_SPS_DIRECT_8X8_INFERENCE;
 412        cedrus_write(dev, VE_H264_SPS, reg);
 413
 414        mbaff_pic = !(decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) &&
 415                    (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD);
 416        pic_width_in_mbs = sps->pic_width_in_mbs_minus1 + 1;
 417
 418        // slice parameters
 419        reg = 0;
 420        reg |= ((slice->first_mb_in_slice % pic_width_in_mbs) & 0xff) << 24;
 421        reg |= (((slice->first_mb_in_slice / pic_width_in_mbs) *
 422                 (mbaff_pic + 1)) & 0xff) << 16;
 423        reg |= decode->nal_ref_idc ? BIT(12) : 0;
 424        reg |= (slice->slice_type & 0xf) << 8;
 425        reg |= slice->cabac_init_idc & 0x3;
 426        if (ctx->fh.m2m_ctx->new_frame)
 427                reg |= VE_H264_SHS_FIRST_SLICE_IN_PIC;
 428        if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC)
 429                reg |= VE_H264_SHS_FIELD_PIC;
 430        if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD)
 431                reg |= VE_H264_SHS_BOTTOM_FIELD;
 432        if (slice->flags & V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED)
 433                reg |= VE_H264_SHS_DIRECT_SPATIAL_MV_PRED;
 434        cedrus_write(dev, VE_H264_SHS, reg);
 435
 436        reg = 0;
 437        reg |= VE_H264_SHS2_NUM_REF_IDX_ACTIVE_OVRD;
 438        reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 24;
 439        reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 16;
 440        reg |= (slice->disable_deblocking_filter_idc & 0x3) << 8;
 441        reg |= (slice->slice_alpha_c0_offset_div2 & 0xf) << 4;
 442        reg |= slice->slice_beta_offset_div2 & 0xf;
 443        cedrus_write(dev, VE_H264_SHS2, reg);
 444
 445        reg = 0;
 446        reg |= (pps->second_chroma_qp_index_offset & 0x3f) << 16;
 447        reg |= (pps->chroma_qp_index_offset & 0x3f) << 8;
 448        reg |= (pps->pic_init_qp_minus26 + 26 + slice->slice_qp_delta) & 0x3f;
 449        if (!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT))
 450                reg |= VE_H264_SHS_QP_SCALING_MATRIX_DEFAULT;
 451        cedrus_write(dev, VE_H264_SHS_QP, reg);
 452
 453        // clear status flags
 454        cedrus_write(dev, VE_H264_STATUS, cedrus_read(dev, VE_H264_STATUS));
 455
 456        // enable int
 457        cedrus_write(dev, VE_H264_CTRL,
 458                     VE_H264_CTRL_SLICE_DECODE_INT |
 459                     VE_H264_CTRL_DECODE_ERR_INT |
 460                     VE_H264_CTRL_VLD_DATA_REQ_INT);
 461}
 462
 463static enum cedrus_irq_status
 464cedrus_h264_irq_status(struct cedrus_ctx *ctx)
 465{
 466        struct cedrus_dev *dev = ctx->dev;
 467        u32 reg = cedrus_read(dev, VE_H264_STATUS);
 468
 469        if (reg & (VE_H264_STATUS_DECODE_ERR_INT |
 470                   VE_H264_STATUS_VLD_DATA_REQ_INT))
 471                return CEDRUS_IRQ_ERROR;
 472
 473        if (reg & VE_H264_CTRL_SLICE_DECODE_INT)
 474                return CEDRUS_IRQ_OK;
 475
 476        return CEDRUS_IRQ_NONE;
 477}
 478
 479static void cedrus_h264_irq_clear(struct cedrus_ctx *ctx)
 480{
 481        struct cedrus_dev *dev = ctx->dev;
 482
 483        cedrus_write(dev, VE_H264_STATUS,
 484                     VE_H264_STATUS_INT_MASK);
 485}
 486
 487static void cedrus_h264_irq_disable(struct cedrus_ctx *ctx)
 488{
 489        struct cedrus_dev *dev = ctx->dev;
 490        u32 reg = cedrus_read(dev, VE_H264_CTRL);
 491
 492        cedrus_write(dev, VE_H264_CTRL,
 493                     reg & ~VE_H264_CTRL_INT_MASK);
 494}
 495
 496static void cedrus_h264_setup(struct cedrus_ctx *ctx,
 497                              struct cedrus_run *run)
 498{
 499        struct cedrus_dev *dev = ctx->dev;
 500
 501        cedrus_engine_enable(ctx, CEDRUS_CODEC_H264);
 502
 503        cedrus_write(dev, VE_H264_SDROT_CTRL, 0);
 504        cedrus_write(dev, VE_H264_EXTRA_BUFFER1,
 505                     ctx->codec.h264.pic_info_buf_dma);
 506        cedrus_write(dev, VE_H264_EXTRA_BUFFER2,
 507                     ctx->codec.h264.neighbor_info_buf_dma);
 508
 509        cedrus_write_scaling_lists(ctx, run);
 510        cedrus_write_frame_list(ctx, run);
 511
 512        cedrus_set_params(ctx, run);
 513}
 514
 515static int cedrus_h264_start(struct cedrus_ctx *ctx)
 516{
 517        struct cedrus_dev *dev = ctx->dev;
 518        unsigned int pic_info_size;
 519        unsigned int field_size;
 520        unsigned int mv_col_size;
 521        int ret;
 522
 523        /* Formula for picture buffer size is taken from CedarX source. */
 524
 525        if (ctx->src_fmt.width > 2048)
 526                pic_info_size = CEDRUS_H264_FRAME_NUM * 0x4000;
 527        else
 528                pic_info_size = CEDRUS_H264_FRAME_NUM * 0x1000;
 529
 530        /*
 531         * FIXME: If V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY is set,
 532         * there is no need to multiply by 2.
 533         */
 534        pic_info_size += ctx->src_fmt.height * 2 * 64;
 535
 536        if (pic_info_size < CEDRUS_MIN_PIC_INFO_BUF_SIZE)
 537                pic_info_size = CEDRUS_MIN_PIC_INFO_BUF_SIZE;
 538
 539        ctx->codec.h264.pic_info_buf_size = pic_info_size;
 540        ctx->codec.h264.pic_info_buf =
 541                dma_alloc_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size,
 542                                   &ctx->codec.h264.pic_info_buf_dma,
 543                                   GFP_KERNEL);
 544        if (!ctx->codec.h264.pic_info_buf)
 545                return -ENOMEM;
 546
 547        /*
 548         * That buffer is supposed to be 16kiB in size, and be aligned
 549         * on 16kiB as well. However, dma_alloc_coherent provides the
 550         * guarantee that we'll have a CPU and DMA address aligned on
 551         * the smallest page order that is greater to the requested
 552         * size, so we don't have to overallocate.
 553         */
 554        ctx->codec.h264.neighbor_info_buf =
 555                dma_alloc_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
 556                                   &ctx->codec.h264.neighbor_info_buf_dma,
 557                                   GFP_KERNEL);
 558        if (!ctx->codec.h264.neighbor_info_buf) {
 559                ret = -ENOMEM;
 560                goto err_pic_buf;
 561        }
 562
 563        field_size = DIV_ROUND_UP(ctx->src_fmt.width, 16) *
 564                DIV_ROUND_UP(ctx->src_fmt.height, 16) * 16;
 565
 566        /*
 567         * FIXME: This is actually conditional to
 568         * V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE not being set, we
 569         * might have to rework this if memory efficiency ever is
 570         * something we need to work on.
 571         */
 572        field_size = field_size * 2;
 573
 574        /*
 575         * FIXME: This is actually conditional to
 576         * V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY not being set, we might
 577         * have to rework this if memory efficiency ever is something
 578         * we need to work on.
 579         */
 580        field_size = field_size * 2;
 581        ctx->codec.h264.mv_col_buf_field_size = field_size;
 582
 583        mv_col_size = field_size * 2 * CEDRUS_H264_FRAME_NUM;
 584        ctx->codec.h264.mv_col_buf_size = mv_col_size;
 585        ctx->codec.h264.mv_col_buf = dma_alloc_coherent(dev->dev,
 586                                                        ctx->codec.h264.mv_col_buf_size,
 587                                                        &ctx->codec.h264.mv_col_buf_dma,
 588                                                        GFP_KERNEL);
 589        if (!ctx->codec.h264.mv_col_buf) {
 590                ret = -ENOMEM;
 591                goto err_neighbor_buf;
 592        }
 593
 594        if (ctx->src_fmt.width > 2048) {
 595                /*
 596                 * Formulas for deblock and intra prediction buffer sizes
 597                 * are taken from CedarX source.
 598                 */
 599
 600                ctx->codec.h264.deblk_buf_size =
 601                        ALIGN(ctx->src_fmt.width, 32) * 12;
 602                ctx->codec.h264.deblk_buf =
 603                        dma_alloc_coherent(dev->dev,
 604                                           ctx->codec.h264.deblk_buf_size,
 605                                           &ctx->codec.h264.deblk_buf_dma,
 606                                           GFP_KERNEL);
 607                if (!ctx->codec.h264.deblk_buf) {
 608                        ret = -ENOMEM;
 609                        goto err_mv_col_buf;
 610                }
 611
 612                /*
 613                 * NOTE: Multiplying by two deviates from CedarX logic, but it
 614                 * is for some unknown reason needed for H264 4K decoding on H6.
 615                 */
 616                ctx->codec.h264.intra_pred_buf_size =
 617                        ALIGN(ctx->src_fmt.width, 64) * 5 * 2;
 618                ctx->codec.h264.intra_pred_buf =
 619                        dma_alloc_coherent(dev->dev,
 620                                           ctx->codec.h264.intra_pred_buf_size,
 621                                           &ctx->codec.h264.intra_pred_buf_dma,
 622                                           GFP_KERNEL);
 623                if (!ctx->codec.h264.intra_pred_buf) {
 624                        ret = -ENOMEM;
 625                        goto err_deblk_buf;
 626                }
 627        }
 628
 629        return 0;
 630
 631err_deblk_buf:
 632        dma_free_coherent(dev->dev, ctx->codec.h264.deblk_buf_size,
 633                          ctx->codec.h264.deblk_buf,
 634                          ctx->codec.h264.deblk_buf_dma);
 635
 636err_mv_col_buf:
 637        dma_free_coherent(dev->dev, ctx->codec.h264.mv_col_buf_size,
 638                          ctx->codec.h264.mv_col_buf,
 639                          ctx->codec.h264.mv_col_buf_dma);
 640
 641err_neighbor_buf:
 642        dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
 643                          ctx->codec.h264.neighbor_info_buf,
 644                          ctx->codec.h264.neighbor_info_buf_dma);
 645
 646err_pic_buf:
 647        dma_free_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size,
 648                          ctx->codec.h264.pic_info_buf,
 649                          ctx->codec.h264.pic_info_buf_dma);
 650        return ret;
 651}
 652
 653static void cedrus_h264_stop(struct cedrus_ctx *ctx)
 654{
 655        struct cedrus_dev *dev = ctx->dev;
 656
 657        dma_free_coherent(dev->dev, ctx->codec.h264.mv_col_buf_size,
 658                          ctx->codec.h264.mv_col_buf,
 659                          ctx->codec.h264.mv_col_buf_dma);
 660        dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
 661                          ctx->codec.h264.neighbor_info_buf,
 662                          ctx->codec.h264.neighbor_info_buf_dma);
 663        dma_free_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size,
 664                          ctx->codec.h264.pic_info_buf,
 665                          ctx->codec.h264.pic_info_buf_dma);
 666        if (ctx->codec.h264.deblk_buf_size)
 667                dma_free_coherent(dev->dev, ctx->codec.h264.deblk_buf_size,
 668                                  ctx->codec.h264.deblk_buf,
 669                                  ctx->codec.h264.deblk_buf_dma);
 670        if (ctx->codec.h264.intra_pred_buf_size)
 671                dma_free_coherent(dev->dev, ctx->codec.h264.intra_pred_buf_size,
 672                                  ctx->codec.h264.intra_pred_buf,
 673                                  ctx->codec.h264.intra_pred_buf_dma);
 674}
 675
 676static void cedrus_h264_trigger(struct cedrus_ctx *ctx)
 677{
 678        struct cedrus_dev *dev = ctx->dev;
 679
 680        cedrus_write(dev, VE_H264_TRIGGER_TYPE,
 681                     VE_H264_TRIGGER_TYPE_AVC_SLICE_DECODE);
 682}
 683
 684struct cedrus_dec_ops cedrus_dec_ops_h264 = {
 685        .irq_clear      = cedrus_h264_irq_clear,
 686        .irq_disable    = cedrus_h264_irq_disable,
 687        .irq_status     = cedrus_h264_irq_status,
 688        .setup          = cedrus_h264_setup,
 689        .start          = cedrus_h264_start,
 690        .stop           = cedrus_h264_stop,
 691        .trigger        = cedrus_h264_trigger,
 692};
 693