linux/drivers/media/platform/sti/hva/hva-h264.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) STMicroelectronics SA 2015
   4 * Authors: Yannick Fertre <yannick.fertre@st.com>
   5 *          Hugues Fruchet <hugues.fruchet@st.com>
   6 */
   7
   8#include "hva.h"
   9#include "hva-hw.h"
  10
  11#define MAX_SPS_PPS_SIZE 128
  12
  13#define BITSTREAM_OFFSET_MASK 0x7F
  14
  15/* video max size*/
  16#define H264_MAX_SIZE_W 1920
  17#define H264_MAX_SIZE_H 1920
  18
  19/* macroBlocs number (width & height) */
  20#define MB_W(w) ((w + 0xF)  / 0x10)
  21#define MB_H(h) ((h + 0xF)  / 0x10)
  22
  23/* formula to get temporal or spatial data size */
  24#define DATA_SIZE(w, h) (MB_W(w) * MB_H(h) * 16)
  25
  26#define SEARCH_WINDOW_BUFFER_MAX_SIZE(w) ((4 * MB_W(w) + 42) * 256 * 3 / 2)
  27#define CABAC_CONTEXT_BUFFER_MAX_SIZE(w) (MB_W(w) * 16)
  28#define CTX_MB_BUFFER_MAX_SIZE(w) (MB_W(w) * 16 * 8)
  29#define SLICE_HEADER_SIZE (4 * 16)
  30#define BRC_DATA_SIZE (5 * 16)
  31
  32/* source buffer copy in YUV 420 MB-tiled format with size=16*256*3/2 */
  33#define CURRENT_WINDOW_BUFFER_MAX_SIZE (16 * 256 * 3 / 2)
  34
  35/*
  36 * 4 lines of pixels (in Luma, Chroma blue and Chroma red) of top MB
  37 * for deblocking with size=4*16*MBx*2
  38 */
  39#define LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(w) (4 * 16 * MB_W(w) * 2)
  40
  41/* factor for bitrate and cpb buffer size max values if profile >= high */
  42#define H264_FACTOR_HIGH 1200
  43
  44/* factor for bitrate and cpb buffer size max values if profile < high */
  45#define H264_FACTOR_BASELINE 1000
  46
  47/* number of bytes for NALU_TYPE_FILLER_DATA header and footer */
  48#define H264_FILLER_DATA_SIZE 6
  49
  50struct h264_profile {
  51        enum v4l2_mpeg_video_h264_level level;
  52        u32 max_mb_per_seconds;
  53        u32 max_frame_size;
  54        u32 max_bitrate;
  55        u32 max_cpb_size;
  56        u32 min_comp_ratio;
  57};
  58
  59static const struct h264_profile h264_infos_list[] = {
  60        {V4L2_MPEG_VIDEO_H264_LEVEL_1_0, 1485, 99, 64, 175, 2},
  61        {V4L2_MPEG_VIDEO_H264_LEVEL_1B, 1485, 99, 128, 350, 2},
  62        {V4L2_MPEG_VIDEO_H264_LEVEL_1_1, 3000, 396, 192, 500, 2},
  63        {V4L2_MPEG_VIDEO_H264_LEVEL_1_2, 6000, 396, 384, 1000, 2},
  64        {V4L2_MPEG_VIDEO_H264_LEVEL_1_3, 11880, 396, 768, 2000, 2},
  65        {V4L2_MPEG_VIDEO_H264_LEVEL_2_0, 11880, 396, 2000, 2000, 2},
  66        {V4L2_MPEG_VIDEO_H264_LEVEL_2_1, 19800, 792, 4000, 4000, 2},
  67        {V4L2_MPEG_VIDEO_H264_LEVEL_2_2, 20250, 1620, 4000, 4000, 2},
  68        {V4L2_MPEG_VIDEO_H264_LEVEL_3_0, 40500, 1620, 10000, 10000, 2},
  69        {V4L2_MPEG_VIDEO_H264_LEVEL_3_1, 108000, 3600, 14000, 14000, 4},
  70        {V4L2_MPEG_VIDEO_H264_LEVEL_3_2, 216000, 5120, 20000, 20000, 4},
  71        {V4L2_MPEG_VIDEO_H264_LEVEL_4_0, 245760, 8192, 20000, 25000, 4},
  72        {V4L2_MPEG_VIDEO_H264_LEVEL_4_1, 245760, 8192, 50000, 62500, 2},
  73        {V4L2_MPEG_VIDEO_H264_LEVEL_4_2, 522240, 8704, 50000, 62500, 2},
  74        {V4L2_MPEG_VIDEO_H264_LEVEL_5_0, 589824, 22080, 135000, 135000, 2},
  75        {V4L2_MPEG_VIDEO_H264_LEVEL_5_1, 983040, 36864, 240000, 240000, 2}
  76};
  77
  78enum hva_brc_type {
  79        BRC_TYPE_NONE = 0,
  80        BRC_TYPE_CBR = 1,
  81        BRC_TYPE_VBR = 2,
  82        BRC_TYPE_VBR_LOW_DELAY = 3
  83};
  84
  85enum hva_entropy_coding_mode {
  86        CAVLC = 0,
  87        CABAC = 1
  88};
  89
  90enum hva_picture_coding_type {
  91        PICTURE_CODING_TYPE_I = 0,
  92        PICTURE_CODING_TYPE_P = 1,
  93        PICTURE_CODING_TYPE_B = 2
  94};
  95
  96enum hva_h264_sampling_mode {
  97        SAMPLING_MODE_NV12 = 0,
  98        SAMPLING_MODE_UYVY = 1,
  99        SAMPLING_MODE_RGB3 = 3,
 100        SAMPLING_MODE_XRGB4 = 4,
 101        SAMPLING_MODE_NV21 = 8,
 102        SAMPLING_MODE_VYUY = 9,
 103        SAMPLING_MODE_BGR3 = 11,
 104        SAMPLING_MODE_XBGR4 = 12,
 105        SAMPLING_MODE_RGBX4 = 20,
 106        SAMPLING_MODE_BGRX4 = 28
 107};
 108
 109enum hva_h264_nalu_type {
 110        NALU_TYPE_UNKNOWN = 0,
 111        NALU_TYPE_SLICE = 1,
 112        NALU_TYPE_SLICE_DPA = 2,
 113        NALU_TYPE_SLICE_DPB = 3,
 114        NALU_TYPE_SLICE_DPC = 4,
 115        NALU_TYPE_SLICE_IDR = 5,
 116        NALU_TYPE_SEI = 6,
 117        NALU_TYPE_SPS = 7,
 118        NALU_TYPE_PPS = 8,
 119        NALU_TYPE_AU_DELIMITER = 9,
 120        NALU_TYPE_SEQ_END = 10,
 121        NALU_TYPE_STREAM_END = 11,
 122        NALU_TYPE_FILLER_DATA = 12,
 123        NALU_TYPE_SPS_EXT = 13,
 124        NALU_TYPE_PREFIX_UNIT = 14,
 125        NALU_TYPE_SUBSET_SPS = 15,
 126        NALU_TYPE_SLICE_AUX = 19,
 127        NALU_TYPE_SLICE_EXT = 20
 128};
 129
 130enum hva_h264_sei_payload_type {
 131        SEI_BUFFERING_PERIOD = 0,
 132        SEI_PICTURE_TIMING = 1,
 133        SEI_STEREO_VIDEO_INFO = 21,
 134        SEI_FRAME_PACKING_ARRANGEMENT = 45
 135};
 136
 137/*
 138 * stereo Video Info struct
 139 */
 140struct hva_h264_stereo_video_sei {
 141        u8 field_views_flag;
 142        u8 top_field_is_left_view_flag;
 143        u8 current_frame_is_left_view_flag;
 144        u8 next_frame_is_second_view_flag;
 145        u8 left_view_self_contained_flag;
 146        u8 right_view_self_contained_flag;
 147};
 148
 149/*
 150 * struct hva_h264_td
 151 *
 152 * @frame_width: width in pixels of the buffer containing the input frame
 153 * @frame_height: height in pixels of the buffer containing the input frame
 154 * @frame_num: the parameter to be written in the slice header
 155 * @picture_coding_type: type I, P or B
 156 * @pic_order_cnt_type: POC mode, as defined in H264 std : can be 0,1,2
 157 * @first_picture_in_sequence: flag telling to encoder that this is the
 158 *                             first picture in a video sequence.
 159 *                             Used for VBR
 160 * @slice_size_type: 0 = no constraint to close the slice
 161 *                   1= a slice is closed as soon as the slice_mb_size limit
 162 *                      is reached
 163 *                   2= a slice is closed as soon as the slice_byte_size limit
 164 *                      is reached
 165 *                   3= a slice is closed as soon as either the slice_byte_size
 166 *                      limit or the slice_mb_size limit is reached
 167 * @slice_mb_size: defines the slice size in number of macroblocks
 168 *                 (used when slice_size_type=1 or slice_size_type=3)
 169 * @ir_param_option: defines the number of macroblocks per frame to be
 170 *                   refreshed by AIR algorithm OR the refresh period
 171 *                   by CIR algorithm
 172 * @intra_refresh_type: enables the adaptive intra refresh algorithm.
 173 *                      Disable=0 / Adaptative=1 and Cycle=2 as intra refresh
 174 * @use_constrained_intra_flag: constrained_intra_pred_flag from PPS
 175 * @transform_mode: controls the use of 4x4/8x8 transform mode
 176 * @disable_deblocking_filter_idc:
 177 *                   0: specifies that all luma and chroma block edges of
 178 *                      the slice are filtered.
 179 *                   1: specifies that deblocking is disabled for all block
 180 *                      edges of the slice.
 181 *                   2: specifies that all luma and chroma block edges of
 182 *                      the slice are filtered with exception of the block edges
 183 *                      that coincide with slice boundaries
 184 * @slice_alpha_c0_offset_div2: to be written in slice header,
 185 *                              controls deblocking
 186 * @slice_beta_offset_div2: to be written in slice header,
 187 *                          controls deblocking
 188 * @encoder_complexity: encoder complexity control (IME).
 189 *                   0 = I_16x16, P_16x16, Full ME Complexity
 190 *                   1 = I_16x16, I_NxN, P_16x16, Full ME Complexity
 191 *                   2 = I_16x16, I_NXN, P_16x16, P_WxH, Full ME Complexity
 192 *                   4 = I_16x16, P_16x16, Reduced ME Complexity
 193 *                   5 = I_16x16, I_NxN, P_16x16, Reduced ME Complexity
 194 *                   6 = I_16x16, I_NXN, P_16x16, P_WxH, Reduced ME Complexity
 195 *  @chroma_qp_index_offset: coming from picture parameter set
 196 *                           (PPS see [H.264 STD] 7.4.2.2)
 197 *  @entropy_coding_mode: entropy coding mode.
 198 *                        0 = CAVLC
 199 *                        1 = CABAC
 200 * @brc_type: selects the bit-rate control algorithm
 201 *                   0 = constant Qp, (no BRC)
 202 *                   1 = CBR
 203 *                   2 = VBR
 204 * @quant: Quantization param used in case of fix QP encoding (no BRC)
 205 * @non_VCL_NALU_Size: size of non-VCL NALUs (SPS, PPS, filler),
 206 *                     used by BRC
 207 * @cpb_buffer_size: size of Coded Picture Buffer, used by BRC
 208 * @bit_rate: target bitrate, for BRC
 209 * @qp_min: min QP threshold
 210 * @qp_max: max QP threshold
 211 * @framerate_num: target framerate numerator , used by BRC
 212 * @framerate_den: target framerate denomurator , used by BRC
 213 * @delay: End-to-End Initial Delay
 214 * @strict_HRD_compliancy: flag for HDR compliancy (1)
 215 *                         May impact quality encoding
 216 * @addr_source_buffer: address of input frame buffer for current frame
 217 * @addr_fwd_Ref_Buffer: address of reference frame buffer
 218 * @addr_rec_buffer: address of reconstructed frame buffer
 219 * @addr_output_bitstream_start: output bitstream start address
 220 * @addr_output_bitstream_end: output bitstream end address
 221 * @addr_external_sw : address of external search window
 222 * @addr_lctx : address of context picture buffer
 223 * @addr_local_rec_buffer: address of local reconstructed buffer
 224 * @addr_spatial_context: address of spatial context buffer
 225 * @bitstream_offset: offset in bits between aligned bitstream start
 226 *                    address and first bit to be written by HVA.
 227 *                    Range value is [0..63]
 228 * @sampling_mode: Input picture format .
 229 *                   0: YUV420 semi_planar Interleaved
 230 *                   1: YUV422 raster Interleaved
 231 * @addr_param_out: address of output parameters structure
 232 * @addr_scaling_matrix: address to the coefficient of
 233 *                       the inverse scaling matrix
 234 * @addr_scaling_matrix_dir: address to the coefficient of
 235 *                           the direct scaling matrix
 236 * @addr_cabac_context_buffer: address of cabac context buffer
 237 * @GmvX: Input information about the horizontal global displacement of
 238 *        the encoded frame versus the previous one
 239 * @GmvY: Input information about the vertical global displacement of
 240 *        the encoded frame versus the previous one
 241 * @window_width: width in pixels of the window to be encoded inside
 242 *                the input frame
 243 * @window_height: width in pixels of the window to be encoded inside
 244 *                 the input frame
 245 * @window_horizontal_offset: horizontal offset in pels for input window
 246 *                            within input frame
 247 * @window_vertical_offset: vertical offset in pels for input window
 248 *                          within input frame
 249 * @addr_roi: Map of QP offset for the Region of Interest algorithm and
 250 *            also used for Error map.
 251 *            Bit 0-6 used for qp offset (value -64 to 63).
 252 *            Bit 7 used to force intra
 253 * @addr_slice_header: address to slice header
 254 * @slice_header_size_in_bits: size in bits of the Slice header
 255 * @slice_header_offset0: Slice header offset where to insert
 256 *                        first_Mb_in_slice
 257 * @slice_header_offset1: Slice header offset where to insert
 258 *                        slice_qp_delta
 259 * @slice_header_offset2: Slice header offset where to insert
 260 *                        num_MBs_in_slice
 261 * @slice_synchro_enable: enable "slice ready" interrupt after each slice
 262 * @max_slice_number: Maximum number of slice in a frame
 263 *                    (0 is strictly forbidden)
 264 * @rgb2_yuv_y_coeff: Four coefficients (C0C1C2C3) to convert from RGB to
 265 *                    YUV for the Y component.
 266 *                    Y = C0*R + C1*G + C2*B + C3 (C0 is on byte 0)
 267 * @rgb2_yuv_u_coeff: four coefficients (C0C1C2C3) to convert from RGB to
 268 *                    YUV for the Y component.
 269 *                    Y = C0*R + C1*G + C2*B + C3 (C0 is on byte 0)
 270 * @rgb2_yuv_v_coeff: Four coefficients (C0C1C2C3) to convert from RGB to
 271 *                    YUV for the U (Cb) component.
 272 *                    U = C0*R + C1*G + C2*B + C3 (C0 is on byte 0)
 273 * @slice_byte_size: maximum slice size in bytes
 274 *                   (used when slice_size_type=2 or slice_size_type=3)
 275 * @max_air_intra_mb_nb: Maximum number of intra macroblock in a frame
 276 *                       for the AIR algorithm
 277 * @brc_no_skip: Disable skipping in the Bitrate Controller
 278 * @addr_brc_in_out_parameter: address of static buffer for BRC parameters
 279 */
 280struct hva_h264_td {
 281        u16 frame_width;
 282        u16 frame_height;
 283        u32 frame_num;
 284        u16 picture_coding_type;
 285        u16 reserved1;
 286        u16 pic_order_cnt_type;
 287        u16 first_picture_in_sequence;
 288        u16 slice_size_type;
 289        u16 reserved2;
 290        u32 slice_mb_size;
 291        u16 ir_param_option;
 292        u16 intra_refresh_type;
 293        u16 use_constrained_intra_flag;
 294        u16 transform_mode;
 295        u16 disable_deblocking_filter_idc;
 296        s16 slice_alpha_c0_offset_div2;
 297        s16 slice_beta_offset_div2;
 298        u16 encoder_complexity;
 299        s16 chroma_qp_index_offset;
 300        u16 entropy_coding_mode;
 301        u16 brc_type;
 302        u16 quant;
 303        u32 non_vcl_nalu_size;
 304        u32 cpb_buffer_size;
 305        u32 bit_rate;
 306        u16 qp_min;
 307        u16 qp_max;
 308        u16 framerate_num;
 309        u16 framerate_den;
 310        u16 delay;
 311        u16 strict_hrd_compliancy;
 312        u32 addr_source_buffer;
 313        u32 addr_fwd_ref_buffer;
 314        u32 addr_rec_buffer;
 315        u32 addr_output_bitstream_start;
 316        u32 addr_output_bitstream_end;
 317        u32 addr_external_sw;
 318        u32 addr_lctx;
 319        u32 addr_local_rec_buffer;
 320        u32 addr_spatial_context;
 321        u16 bitstream_offset;
 322        u16 sampling_mode;
 323        u32 addr_param_out;
 324        u32 addr_scaling_matrix;
 325        u32 addr_scaling_matrix_dir;
 326        u32 addr_cabac_context_buffer;
 327        u32 reserved3;
 328        u32 reserved4;
 329        s16 gmv_x;
 330        s16 gmv_y;
 331        u16 window_width;
 332        u16 window_height;
 333        u16 window_horizontal_offset;
 334        u16 window_vertical_offset;
 335        u32 addr_roi;
 336        u32 addr_slice_header;
 337        u16 slice_header_size_in_bits;
 338        u16 slice_header_offset0;
 339        u16 slice_header_offset1;
 340        u16 slice_header_offset2;
 341        u32 reserved5;
 342        u32 reserved6;
 343        u16 reserved7;
 344        u16 reserved8;
 345        u16 slice_synchro_enable;
 346        u16 max_slice_number;
 347        u32 rgb2_yuv_y_coeff;
 348        u32 rgb2_yuv_u_coeff;
 349        u32 rgb2_yuv_v_coeff;
 350        u32 slice_byte_size;
 351        u16 max_air_intra_mb_nb;
 352        u16 brc_no_skip;
 353        u32 addr_temporal_context;
 354        u32 addr_brc_in_out_parameter;
 355};
 356
 357/*
 358 * struct hva_h264_slice_po
 359 *
 360 * @ slice_size: slice size
 361 * @ slice_start_time: start time
 362 * @ slice_stop_time: stop time
 363 * @ slice_num: slice number
 364 */
 365struct hva_h264_slice_po {
 366        u32 slice_size;
 367        u32 slice_start_time;
 368        u32 slice_end_time;
 369        u32 slice_num;
 370};
 371
 372/*
 373 * struct hva_h264_po
 374 *
 375 * @ bitstream_size: bitstream size
 376 * @ dct_bitstream_size: dtc bitstream size
 377 * @ stuffing_bits: number of stuffing bits inserted by the encoder
 378 * @ removal_time: removal time of current frame (nb of ticks 1/framerate)
 379 * @ hvc_start_time: hvc start time
 380 * @ hvc_stop_time: hvc stop time
 381 * @ slice_count: slice count
 382 */
 383struct hva_h264_po {
 384        u32 bitstream_size;
 385        u32 dct_bitstream_size;
 386        u32 stuffing_bits;
 387        u32 removal_time;
 388        u32 hvc_start_time;
 389        u32 hvc_stop_time;
 390        u32 slice_count;
 391        u32 reserved0;
 392        struct hva_h264_slice_po slice_params[16];
 393};
 394
 395struct hva_h264_task {
 396        struct hva_h264_td td;
 397        struct hva_h264_po po;
 398};
 399
 400/*
 401 * struct hva_h264_ctx
 402 *
 403 * @seq_info:  sequence information buffer
 404 * @ref_frame: reference frame buffer
 405 * @rec_frame: reconstructed frame buffer
 406 * @task:      task descriptor
 407 */
 408struct hva_h264_ctx {
 409        struct hva_buffer *seq_info;
 410        struct hva_buffer *ref_frame;
 411        struct hva_buffer *rec_frame;
 412        struct hva_buffer *task;
 413};
 414
 415static int hva_h264_fill_slice_header(struct hva_ctx *pctx,
 416                                      u8 *slice_header_addr,
 417                                      struct hva_controls *ctrls,
 418                                      int frame_num,
 419                                      u16 *header_size,
 420                                      u16 *header_offset0,
 421                                      u16 *header_offset1,
 422                                      u16 *header_offset2)
 423{
 424        /*
 425         * with this HVA hardware version, part of the slice header is computed
 426         * on host and part by hardware.
 427         * The part of host is precomputed and available through this array.
 428         */
 429        struct device *dev = ctx_to_dev(pctx);
 430        int  cabac = V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC;
 431        static const unsigned char slice_header[] = {
 432                0x00, 0x00, 0x00, 0x01,
 433                0x41, 0x34, 0x07, 0x00
 434        };
 435        int idr_pic_id = frame_num % 2;
 436        enum hva_picture_coding_type type;
 437        u32 frame_order = frame_num % ctrls->gop_size;
 438
 439        if (!(frame_num % ctrls->gop_size))
 440                type = PICTURE_CODING_TYPE_I;
 441        else
 442                type = PICTURE_CODING_TYPE_P;
 443
 444        memcpy(slice_header_addr, slice_header, sizeof(slice_header));
 445
 446        *header_size = 56;
 447        *header_offset0 = 40;
 448        *header_offset1 = 13;
 449        *header_offset2 = 0;
 450
 451        if (type == PICTURE_CODING_TYPE_I) {
 452                slice_header_addr[4] = 0x65;
 453                slice_header_addr[5] = 0x11;
 454
 455                /* toggle the I frame */
 456                if ((frame_num / ctrls->gop_size) % 2) {
 457                        *header_size += 4;
 458                        *header_offset1 += 4;
 459                        slice_header_addr[6] = 0x04;
 460                        slice_header_addr[7] = 0x70;
 461
 462                } else {
 463                        *header_size += 2;
 464                        *header_offset1 += 2;
 465                        slice_header_addr[6] = 0x09;
 466                        slice_header_addr[7] = 0xC0;
 467                }
 468        } else {
 469                if (ctrls->entropy_mode == cabac) {
 470                        *header_size += 1;
 471                        *header_offset1 += 1;
 472                        slice_header_addr[7] = 0x80;
 473                }
 474                /*
 475                 * update slice header with P frame order
 476                 * frame order is limited to 16 (coded on 4bits only)
 477                 */
 478                slice_header_addr[5] += ((frame_order & 0x0C) >> 2);
 479                slice_header_addr[6] += ((frame_order & 0x03) << 6);
 480        }
 481
 482        dev_dbg(dev,
 483                "%s   %s slice header order %d idrPicId %d header size %d\n",
 484                pctx->name, __func__, frame_order, idr_pic_id, *header_size);
 485        return 0;
 486}
 487
 488static int hva_h264_fill_data_nal(struct hva_ctx *pctx,
 489                                  unsigned int stuffing_bytes, u8 *addr,
 490                                  unsigned int stream_size, unsigned int *size)
 491{
 492        struct device *dev = ctx_to_dev(pctx);
 493        static const u8 start[] = { 0x00, 0x00, 0x00, 0x01 };
 494
 495        dev_dbg(dev, "%s   %s stuffing bytes %d\n", pctx->name, __func__,
 496                stuffing_bytes);
 497
 498        if ((*size + stuffing_bytes + H264_FILLER_DATA_SIZE) > stream_size) {
 499                dev_dbg(dev, "%s   %s too many stuffing bytes %d\n",
 500                        pctx->name, __func__, stuffing_bytes);
 501                return 0;
 502        }
 503
 504        /* start code */
 505        memcpy(addr + *size, start, sizeof(start));
 506        *size += sizeof(start);
 507
 508        /* nal_unit_type */
 509        addr[*size] = NALU_TYPE_FILLER_DATA;
 510        *size += 1;
 511
 512        memset(addr + *size, 0xff, stuffing_bytes);
 513        *size += stuffing_bytes;
 514
 515        addr[*size] = 0x80;
 516        *size += 1;
 517
 518        return 0;
 519}
 520
 521static int hva_h264_fill_sei_nal(struct hva_ctx *pctx,
 522                                 enum hva_h264_sei_payload_type type,
 523                                 u8 *addr, u32 *size)
 524{
 525        struct device *dev = ctx_to_dev(pctx);
 526        static const u8 start[] = { 0x00, 0x00, 0x00, 0x01 };
 527        struct hva_h264_stereo_video_sei info;
 528        u8 offset = 7;
 529        u8 msg = 0;
 530
 531        /* start code */
 532        memcpy(addr + *size, start, sizeof(start));
 533        *size += sizeof(start);
 534
 535        /* nal_unit_type */
 536        addr[*size] = NALU_TYPE_SEI;
 537        *size += 1;
 538
 539        /* payload type */
 540        addr[*size] = type;
 541        *size += 1;
 542
 543        switch (type) {
 544        case SEI_STEREO_VIDEO_INFO:
 545                memset(&info, 0, sizeof(info));
 546
 547                /* set to top/bottom frame packing arrangement */
 548                info.field_views_flag = 1;
 549                info.top_field_is_left_view_flag = 1;
 550
 551                /* payload size */
 552                addr[*size] = 1;
 553                *size += 1;
 554
 555                /* payload */
 556                msg = info.field_views_flag << offset--;
 557
 558                if (info.field_views_flag) {
 559                        msg |= info.top_field_is_left_view_flag <<
 560                               offset--;
 561                } else {
 562                        msg |= info.current_frame_is_left_view_flag <<
 563                               offset--;
 564                        msg |= info.next_frame_is_second_view_flag <<
 565                               offset--;
 566                }
 567                msg |= info.left_view_self_contained_flag << offset--;
 568                msg |= info.right_view_self_contained_flag << offset--;
 569
 570                addr[*size] = msg;
 571                *size += 1;
 572
 573                addr[*size] = 0x80;
 574                *size += 1;
 575
 576                return 0;
 577        case SEI_BUFFERING_PERIOD:
 578        case SEI_PICTURE_TIMING:
 579        case SEI_FRAME_PACKING_ARRANGEMENT:
 580        default:
 581                dev_err(dev, "%s   sei nal type not supported %d\n",
 582                        pctx->name, type);
 583                return -EINVAL;
 584        }
 585}
 586
 587static int hva_h264_prepare_task(struct hva_ctx *pctx,
 588                                 struct hva_h264_task *task,
 589                                 struct hva_frame *frame,
 590                                 struct hva_stream *stream)
 591{
 592        struct hva_dev *hva = ctx_to_hdev(pctx);
 593        struct device *dev = ctx_to_dev(pctx);
 594        struct hva_h264_ctx *ctx = (struct hva_h264_ctx *)pctx->priv;
 595        struct hva_buffer *seq_info = ctx->seq_info;
 596        struct hva_buffer *fwd_ref_frame = ctx->ref_frame;
 597        struct hva_buffer *loc_rec_frame = ctx->rec_frame;
 598        struct hva_h264_td *td = &task->td;
 599        struct hva_controls *ctrls = &pctx->ctrls;
 600        struct v4l2_fract *time_per_frame = &pctx->ctrls.time_per_frame;
 601        int cavlc =  V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CAVLC;
 602        u32 frame_num = pctx->stream_num;
 603        u32 addr_esram = hva->esram_addr;
 604        enum v4l2_mpeg_video_h264_level level;
 605        dma_addr_t paddr = 0;
 606        u8 *slice_header_vaddr;
 607        u32 frame_width = frame->info.aligned_width;
 608        u32 frame_height = frame->info.aligned_height;
 609        u32 max_cpb_buffer_size;
 610        unsigned int payload = stream->bytesused;
 611        u32 max_bitrate;
 612
 613        /* check width and height parameters */
 614        if ((frame_width > max(H264_MAX_SIZE_W, H264_MAX_SIZE_H)) ||
 615            (frame_height > max(H264_MAX_SIZE_W, H264_MAX_SIZE_H))) {
 616                dev_err(dev,
 617                        "%s   width(%d) or height(%d) exceeds limits (%dx%d)\n",
 618                        pctx->name, frame_width, frame_height,
 619                        H264_MAX_SIZE_W, H264_MAX_SIZE_H);
 620                pctx->frame_errors++;
 621                return -EINVAL;
 622        }
 623
 624        level = ctrls->level;
 625
 626        memset(td, 0, sizeof(struct hva_h264_td));
 627
 628        td->frame_width = frame_width;
 629        td->frame_height = frame_height;
 630
 631        /* set frame alignment */
 632        td->window_width =  frame_width;
 633        td->window_height = frame_height;
 634        td->window_horizontal_offset = 0;
 635        td->window_vertical_offset = 0;
 636
 637        td->first_picture_in_sequence = (!frame_num) ? 1 : 0;
 638
 639        /* pic_order_cnt_type hard coded to '2' as only I & P frames */
 640        td->pic_order_cnt_type = 2;
 641
 642        /* useConstrainedIntraFlag set to false for better coding efficiency */
 643        td->use_constrained_intra_flag = false;
 644        td->brc_type = (ctrls->bitrate_mode == V4L2_MPEG_VIDEO_BITRATE_MODE_CBR)
 645                        ? BRC_TYPE_CBR : BRC_TYPE_VBR;
 646
 647        td->entropy_coding_mode = (ctrls->entropy_mode == cavlc) ? CAVLC :
 648                                  CABAC;
 649
 650        td->bit_rate = ctrls->bitrate;
 651
 652        /* set framerate, framerate = 1 n/ time per frame */
 653        if (time_per_frame->numerator >= 536) {
 654                /*
 655                 * due to a hardware bug, framerate denominator can't exceed
 656                 * 536 (BRC overflow). Compute nearest framerate
 657                 */
 658                td->framerate_den = 1;
 659                td->framerate_num = (time_per_frame->denominator +
 660                                    (time_per_frame->numerator >> 1) - 1) /
 661                                    time_per_frame->numerator;
 662
 663                /*
 664                 * update bitrate to introduce a correction due to
 665                 * the new framerate
 666                 * new bitrate = (old bitrate * new framerate) / old framerate
 667                 */
 668                td->bit_rate /= time_per_frame->numerator;
 669                td->bit_rate *= time_per_frame->denominator;
 670                td->bit_rate /= td->framerate_num;
 671        } else {
 672                td->framerate_den = time_per_frame->numerator;
 673                td->framerate_num = time_per_frame->denominator;
 674        }
 675
 676        /* compute maximum bitrate depending on profile */
 677        if (ctrls->profile >= V4L2_MPEG_VIDEO_H264_PROFILE_HIGH)
 678                max_bitrate = h264_infos_list[level].max_bitrate *
 679                              H264_FACTOR_HIGH;
 680        else
 681                max_bitrate = h264_infos_list[level].max_bitrate *
 682                              H264_FACTOR_BASELINE;
 683
 684        /* check if bitrate doesn't exceed max size */
 685        if (td->bit_rate > max_bitrate) {
 686                dev_dbg(dev,
 687                        "%s   bitrate (%d) larger than level and profile allow, clip to %d\n",
 688                        pctx->name, td->bit_rate, max_bitrate);
 689                td->bit_rate = max_bitrate;
 690        }
 691
 692        /* convert cpb_buffer_size in bits */
 693        td->cpb_buffer_size = ctrls->cpb_size * 8000;
 694
 695        /* compute maximum cpb buffer size depending on profile */
 696        if (ctrls->profile >= V4L2_MPEG_VIDEO_H264_PROFILE_HIGH)
 697                max_cpb_buffer_size =
 698                    h264_infos_list[level].max_cpb_size * H264_FACTOR_HIGH;
 699        else
 700                max_cpb_buffer_size =
 701                    h264_infos_list[level].max_cpb_size * H264_FACTOR_BASELINE;
 702
 703        /* check if cpb buffer size doesn't exceed max size */
 704        if (td->cpb_buffer_size > max_cpb_buffer_size) {
 705                dev_dbg(dev,
 706                        "%s   cpb size larger than level %d allows, clip to %d\n",
 707                        pctx->name, td->cpb_buffer_size, max_cpb_buffer_size);
 708                td->cpb_buffer_size = max_cpb_buffer_size;
 709        }
 710
 711        /* enable skipping in the Bitrate Controller */
 712        td->brc_no_skip = 0;
 713
 714        /* initial delay */
 715        if ((ctrls->bitrate_mode == V4L2_MPEG_VIDEO_BITRATE_MODE_CBR) &&
 716            td->bit_rate)
 717                td->delay = 1000 * (td->cpb_buffer_size / td->bit_rate);
 718        else
 719                td->delay = 0;
 720
 721        switch (frame->info.pixelformat) {
 722        case V4L2_PIX_FMT_NV12:
 723                td->sampling_mode = SAMPLING_MODE_NV12;
 724                break;
 725        case V4L2_PIX_FMT_NV21:
 726                td->sampling_mode = SAMPLING_MODE_NV21;
 727                break;
 728        default:
 729                dev_err(dev, "%s   invalid source pixel format\n",
 730                        pctx->name);
 731                pctx->frame_errors++;
 732                return -EINVAL;
 733        }
 734
 735        /*
 736         * fill matrix color converter (RGB to YUV)
 737         * Y = 0,299 R + 0,587 G + 0,114 B
 738         * Cb = -0,1687 R -0,3313 G + 0,5 B + 128
 739         * Cr = 0,5 R - 0,4187 G - 0,0813 B + 128
 740         */
 741        td->rgb2_yuv_y_coeff = 0x12031008;
 742        td->rgb2_yuv_u_coeff = 0x800EF7FB;
 743        td->rgb2_yuv_v_coeff = 0x80FEF40E;
 744
 745        /* enable/disable transform mode */
 746        td->transform_mode = ctrls->dct8x8;
 747
 748        /* encoder complexity fix to 2, ENCODE_I_16x16_I_NxN_P_16x16_P_WxH */
 749        td->encoder_complexity = 2;
 750
 751        /* quant fix to 28, default VBR value */
 752        td->quant = 28;
 753
 754        if (td->framerate_den == 0) {
 755                dev_err(dev, "%s   invalid framerate\n", pctx->name);
 756                pctx->frame_errors++;
 757                return -EINVAL;
 758        }
 759
 760        /* if automatic framerate, deactivate bitrate controller */
 761        if (td->framerate_num == 0)
 762                td->brc_type = 0;
 763
 764        /* compliancy fix to true */
 765        td->strict_hrd_compliancy = 1;
 766
 767        /* set minimum & maximum quantizers */
 768        td->qp_min = clamp_val(ctrls->qpmin, 0, 51);
 769        td->qp_max = clamp_val(ctrls->qpmax, 0, 51);
 770
 771        td->addr_source_buffer = frame->paddr;
 772        td->addr_fwd_ref_buffer = fwd_ref_frame->paddr;
 773        td->addr_rec_buffer = loc_rec_frame->paddr;
 774
 775        td->addr_output_bitstream_end = (u32)stream->paddr + stream->size;
 776
 777        td->addr_output_bitstream_start = (u32)stream->paddr;
 778        td->bitstream_offset = (((u32)stream->paddr & 0xF) << 3) &
 779                               BITSTREAM_OFFSET_MASK;
 780
 781        td->addr_param_out = (u32)ctx->task->paddr +
 782                             offsetof(struct hva_h264_task, po);
 783
 784        /* swap spatial and temporal context */
 785        if (frame_num % 2) {
 786                paddr = seq_info->paddr;
 787                td->addr_spatial_context =  ALIGN(paddr, 0x100);
 788                paddr = seq_info->paddr + DATA_SIZE(frame_width,
 789                                                        frame_height);
 790                td->addr_temporal_context = ALIGN(paddr, 0x100);
 791        } else {
 792                paddr = seq_info->paddr;
 793                td->addr_temporal_context = ALIGN(paddr, 0x100);
 794                paddr = seq_info->paddr + DATA_SIZE(frame_width,
 795                                                        frame_height);
 796                td->addr_spatial_context =  ALIGN(paddr, 0x100);
 797        }
 798
 799        paddr = seq_info->paddr + 2 * DATA_SIZE(frame_width, frame_height);
 800
 801        td->addr_brc_in_out_parameter =  ALIGN(paddr, 0x100);
 802
 803        paddr = td->addr_brc_in_out_parameter + BRC_DATA_SIZE;
 804        td->addr_slice_header =  ALIGN(paddr, 0x100);
 805        td->addr_external_sw =  ALIGN(addr_esram, 0x100);
 806
 807        addr_esram += SEARCH_WINDOW_BUFFER_MAX_SIZE(frame_width);
 808        td->addr_local_rec_buffer = ALIGN(addr_esram, 0x100);
 809
 810        addr_esram += LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(frame_width);
 811        td->addr_lctx = ALIGN(addr_esram, 0x100);
 812
 813        addr_esram += CTX_MB_BUFFER_MAX_SIZE(max(frame_width, frame_height));
 814        td->addr_cabac_context_buffer = ALIGN(addr_esram, 0x100);
 815
 816        if (!(frame_num % ctrls->gop_size)) {
 817                td->picture_coding_type = PICTURE_CODING_TYPE_I;
 818                stream->vbuf.flags |= V4L2_BUF_FLAG_KEYFRAME;
 819        } else {
 820                td->picture_coding_type = PICTURE_CODING_TYPE_P;
 821                stream->vbuf.flags &= ~V4L2_BUF_FLAG_KEYFRAME;
 822        }
 823
 824        /* fill the slice header part */
 825        slice_header_vaddr = seq_info->vaddr + (td->addr_slice_header -
 826                             seq_info->paddr);
 827
 828        hva_h264_fill_slice_header(pctx, slice_header_vaddr, ctrls, frame_num,
 829                                   &td->slice_header_size_in_bits,
 830                                   &td->slice_header_offset0,
 831                                   &td->slice_header_offset1,
 832                                   &td->slice_header_offset2);
 833
 834        td->chroma_qp_index_offset = 2;
 835        td->slice_synchro_enable = 0;
 836        td->max_slice_number = 1;
 837
 838        /*
 839         * check the sps/pps header size for key frame only
 840         * sps/pps header was previously fill by libv4l
 841         * during qbuf of stream buffer
 842         */
 843        if ((stream->vbuf.flags == V4L2_BUF_FLAG_KEYFRAME) &&
 844            (payload > MAX_SPS_PPS_SIZE)) {
 845                dev_err(dev, "%s   invalid sps/pps size %d\n", pctx->name,
 846                        payload);
 847                pctx->frame_errors++;
 848                return -EINVAL;
 849        }
 850
 851        if (stream->vbuf.flags != V4L2_BUF_FLAG_KEYFRAME)
 852                payload = 0;
 853
 854        /* add SEI nal (video stereo info) */
 855        if (ctrls->sei_fp && hva_h264_fill_sei_nal(pctx, SEI_STEREO_VIDEO_INFO,
 856                                                   (u8 *)stream->vaddr,
 857                                                   &payload)) {
 858                dev_err(dev, "%s   fail to get SEI nal\n", pctx->name);
 859                pctx->frame_errors++;
 860                return -EINVAL;
 861        }
 862
 863        /* fill size of non-VCL NAL units (SPS, PPS, filler and SEI) */
 864        td->non_vcl_nalu_size = payload * 8;
 865
 866        /* compute bitstream offset & new start address of bitstream */
 867        td->addr_output_bitstream_start += ((payload >> 4) << 4);
 868        td->bitstream_offset += (payload - ((payload >> 4) << 4)) * 8;
 869
 870        stream->bytesused = payload;
 871
 872        return 0;
 873}
 874
 875static unsigned int hva_h264_get_stream_size(struct hva_h264_task *task)
 876{
 877        struct hva_h264_po *po = &task->po;
 878
 879        return po->bitstream_size;
 880}
 881
 882static u32 hva_h264_get_stuffing_bytes(struct hva_h264_task *task)
 883{
 884        struct hva_h264_po *po = &task->po;
 885
 886        return po->stuffing_bits >> 3;
 887}
 888
 889static int hva_h264_open(struct hva_ctx *pctx)
 890{
 891        struct device *dev = ctx_to_dev(pctx);
 892        struct hva_h264_ctx *ctx;
 893        struct hva_dev *hva = ctx_to_hdev(pctx);
 894        u32 frame_width = pctx->frameinfo.aligned_width;
 895        u32 frame_height = pctx->frameinfo.aligned_height;
 896        u32 size;
 897        int ret;
 898
 899        /* check esram size necessary to encode a frame */
 900        size = SEARCH_WINDOW_BUFFER_MAX_SIZE(frame_width) +
 901               LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(frame_width) +
 902               CTX_MB_BUFFER_MAX_SIZE(max(frame_width, frame_height)) +
 903               CABAC_CONTEXT_BUFFER_MAX_SIZE(frame_width);
 904
 905        if (hva->esram_size < size) {
 906                dev_err(dev, "%s   not enough esram (max:%d request:%d)\n",
 907                        pctx->name, hva->esram_size, size);
 908                ret = -EINVAL;
 909                goto err;
 910        }
 911
 912        /* allocate context for codec */
 913        ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
 914        if (!ctx) {
 915                ret = -ENOMEM;
 916                goto err;
 917        }
 918
 919        /* allocate sequence info buffer */
 920        ret = hva_mem_alloc(pctx,
 921                            2 * DATA_SIZE(frame_width, frame_height) +
 922                            SLICE_HEADER_SIZE +
 923                            BRC_DATA_SIZE,
 924                            "hva sequence info",
 925                            &ctx->seq_info);
 926        if (ret) {
 927                dev_err(dev,
 928                        "%s   failed to allocate sequence info buffer\n",
 929                        pctx->name);
 930                goto err_ctx;
 931        }
 932
 933        /* allocate reference frame buffer */
 934        ret = hva_mem_alloc(pctx,
 935                            frame_width * frame_height * 3 / 2,
 936                            "hva reference frame",
 937                            &ctx->ref_frame);
 938        if (ret) {
 939                dev_err(dev, "%s   failed to allocate reference frame buffer\n",
 940                        pctx->name);
 941                goto err_seq_info;
 942        }
 943
 944        /* allocate reconstructed frame buffer */
 945        ret = hva_mem_alloc(pctx,
 946                            frame_width * frame_height * 3 / 2,
 947                            "hva reconstructed frame",
 948                            &ctx->rec_frame);
 949        if (ret) {
 950                dev_err(dev,
 951                        "%s   failed to allocate reconstructed frame buffer\n",
 952                        pctx->name);
 953                goto err_ref_frame;
 954        }
 955
 956        /* allocate task descriptor */
 957        ret = hva_mem_alloc(pctx,
 958                            sizeof(struct hva_h264_task),
 959                            "hva task descriptor",
 960                            &ctx->task);
 961        if (ret) {
 962                dev_err(dev,
 963                        "%s   failed to allocate task descriptor\n",
 964                        pctx->name);
 965                goto err_rec_frame;
 966        }
 967
 968        pctx->priv = (void *)ctx;
 969
 970        return 0;
 971
 972err_rec_frame:
 973        hva_mem_free(pctx, ctx->rec_frame);
 974err_ref_frame:
 975        hva_mem_free(pctx, ctx->ref_frame);
 976err_seq_info:
 977        hva_mem_free(pctx, ctx->seq_info);
 978err_ctx:
 979        devm_kfree(dev, ctx);
 980err:
 981        pctx->sys_errors++;
 982        return ret;
 983}
 984
 985static int hva_h264_close(struct hva_ctx *pctx)
 986{
 987        struct hva_h264_ctx *ctx = (struct hva_h264_ctx *)pctx->priv;
 988        struct device *dev = ctx_to_dev(pctx);
 989
 990        if (ctx->seq_info)
 991                hva_mem_free(pctx, ctx->seq_info);
 992
 993        if (ctx->ref_frame)
 994                hva_mem_free(pctx, ctx->ref_frame);
 995
 996        if (ctx->rec_frame)
 997                hva_mem_free(pctx, ctx->rec_frame);
 998
 999        if (ctx->task)
1000                hva_mem_free(pctx, ctx->task);
1001
1002        devm_kfree(dev, ctx);
1003
1004        return 0;
1005}
1006
1007static int hva_h264_encode(struct hva_ctx *pctx, struct hva_frame *frame,
1008                           struct hva_stream *stream)
1009{
1010        struct hva_h264_ctx *ctx = (struct hva_h264_ctx *)pctx->priv;
1011        struct hva_h264_task *task = (struct hva_h264_task *)ctx->task->vaddr;
1012        u32 stuffing_bytes = 0;
1013        int ret = 0;
1014
1015        ret = hva_h264_prepare_task(pctx, task, frame, stream);
1016        if (ret)
1017                goto err;
1018
1019        ret = hva_hw_execute_task(pctx, H264_ENC, ctx->task);
1020        if (ret)
1021                goto err;
1022
1023        pctx->stream_num++;
1024        stream->bytesused += hva_h264_get_stream_size(task);
1025
1026        stuffing_bytes = hva_h264_get_stuffing_bytes(task);
1027
1028        if (stuffing_bytes)
1029                hva_h264_fill_data_nal(pctx, stuffing_bytes,
1030                                       (u8 *)stream->vaddr,
1031                                       stream->size,
1032                                       &stream->bytesused);
1033
1034        /* switch reference & reconstructed frame */
1035        swap(ctx->ref_frame, ctx->rec_frame);
1036
1037        return 0;
1038err:
1039        stream->bytesused = 0;
1040        return ret;
1041}
1042
1043const struct hva_enc nv12h264enc = {
1044        .name = "H264(NV12)",
1045        .pixelformat = V4L2_PIX_FMT_NV12,
1046        .streamformat = V4L2_PIX_FMT_H264,
1047        .max_width = H264_MAX_SIZE_W,
1048        .max_height = H264_MAX_SIZE_H,
1049        .open = hva_h264_open,
1050        .close = hva_h264_close,
1051        .encode = hva_h264_encode,
1052};
1053
1054const struct hva_enc nv21h264enc = {
1055        .name = "H264(NV21)",
1056        .pixelformat = V4L2_PIX_FMT_NV21,
1057        .streamformat = V4L2_PIX_FMT_H264,
1058        .max_width = H264_MAX_SIZE_W,
1059        .max_height = H264_MAX_SIZE_H,
1060        .open = hva_h264_open,
1061        .close = hva_h264_close,
1062        .encode = hva_h264_encode,
1063};
1064