linux/drivers/media/platform/sti/hva/hva-h264.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) STMicroelectronics SA 2015
   4 * Authors: Yannick Fertre <yannick.fertre@st.com>
   5 *          Hugues Fruchet <hugues.fruchet@st.com>
   6 */
   7
   8#include "hva.h"
   9#include "hva-hw.h"
  10
  11#define MAX_SPS_PPS_SIZE 128
  12
  13#define BITSTREAM_OFFSET_MASK 0x7F
  14
  15/* video max size*/
  16#define H264_MAX_SIZE_W 1920
  17#define H264_MAX_SIZE_H 1920
  18
  19/* macroBlocs number (width & height) */
  20#define MB_W(w) ((w + 0xF)  / 0x10)
  21#define MB_H(h) ((h + 0xF)  / 0x10)
  22
  23/* formula to get temporal or spatial data size */
  24#define DATA_SIZE(w, h) (MB_W(w) * MB_H(h) * 16)
  25
  26#define SEARCH_WINDOW_BUFFER_MAX_SIZE(w) ((4 * MB_W(w) + 42) * 256 * 3 / 2)
  27#define CABAC_CONTEXT_BUFFER_MAX_SIZE(w) (MB_W(w) * 16)
  28#define CTX_MB_BUFFER_MAX_SIZE(w) (MB_W(w) * 16 * 8)
  29#define SLICE_HEADER_SIZE (4 * 16)
  30#define BRC_DATA_SIZE (5 * 16)
  31
  32/* source buffer copy in YUV 420 MB-tiled format with size=16*256*3/2 */
  33#define CURRENT_WINDOW_BUFFER_MAX_SIZE (16 * 256 * 3 / 2)
  34
  35/*
  36 * 4 lines of pixels (in Luma, Chroma blue and Chroma red) of top MB
  37 * for deblocking with size=4*16*MBx*2
  38 */
  39#define LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(w) (4 * 16 * MB_W(w) * 2)
  40
  41/* factor for bitrate and cpb buffer size max values if profile >= high */
  42#define H264_FACTOR_HIGH 1200
  43
  44/* factor for bitrate and cpb buffer size max values if profile < high */
  45#define H264_FACTOR_BASELINE 1000
  46
  47/* number of bytes for NALU_TYPE_FILLER_DATA header and footer */
  48#define H264_FILLER_DATA_SIZE 6
  49
  50struct h264_profile {
  51        enum v4l2_mpeg_video_h264_level level;
  52        u32 max_mb_per_seconds;
  53        u32 max_frame_size;
  54        u32 max_bitrate;
  55        u32 max_cpb_size;
  56        u32 min_comp_ratio;
  57};
  58
  59static const struct h264_profile h264_infos_list[] = {
  60        {V4L2_MPEG_VIDEO_H264_LEVEL_1_0, 1485, 99, 64, 175, 2},
  61        {V4L2_MPEG_VIDEO_H264_LEVEL_1B, 1485, 99, 128, 350, 2},
  62        {V4L2_MPEG_VIDEO_H264_LEVEL_1_1, 3000, 396, 192, 500, 2},
  63        {V4L2_MPEG_VIDEO_H264_LEVEL_1_2, 6000, 396, 384, 1000, 2},
  64        {V4L2_MPEG_VIDEO_H264_LEVEL_1_3, 11880, 396, 768, 2000, 2},
  65        {V4L2_MPEG_VIDEO_H264_LEVEL_2_0, 11880, 396, 2000, 2000, 2},
  66        {V4L2_MPEG_VIDEO_H264_LEVEL_2_1, 19800, 792, 4000, 4000, 2},
  67        {V4L2_MPEG_VIDEO_H264_LEVEL_2_2, 20250, 1620, 4000, 4000, 2},
  68        {V4L2_MPEG_VIDEO_H264_LEVEL_3_0, 40500, 1620, 10000, 10000, 2},
  69        {V4L2_MPEG_VIDEO_H264_LEVEL_3_1, 108000, 3600, 14000, 14000, 4},
  70        {V4L2_MPEG_VIDEO_H264_LEVEL_3_2, 216000, 5120, 20000, 20000, 4},
  71        {V4L2_MPEG_VIDEO_H264_LEVEL_4_0, 245760, 8192, 20000, 25000, 4},
  72        {V4L2_MPEG_VIDEO_H264_LEVEL_4_1, 245760, 8192, 50000, 62500, 2},
  73        {V4L2_MPEG_VIDEO_H264_LEVEL_4_2, 522240, 8704, 50000, 62500, 2},
  74        {V4L2_MPEG_VIDEO_H264_LEVEL_5_0, 589824, 22080, 135000, 135000, 2},
  75        {V4L2_MPEG_VIDEO_H264_LEVEL_5_1, 983040, 36864, 240000, 240000, 2}
  76};
  77
  78enum hva_brc_type {
  79        BRC_TYPE_NONE = 0,
  80        BRC_TYPE_CBR = 1,
  81        BRC_TYPE_VBR = 2,
  82        BRC_TYPE_VBR_LOW_DELAY = 3
  83};
  84
  85enum hva_entropy_coding_mode {
  86        CAVLC = 0,
  87        CABAC = 1
  88};
  89
  90enum hva_picture_coding_type {
  91        PICTURE_CODING_TYPE_I = 0,
  92        PICTURE_CODING_TYPE_P = 1,
  93        PICTURE_CODING_TYPE_B = 2
  94};
  95
  96enum hva_h264_sampling_mode {
  97        SAMPLING_MODE_NV12 = 0,
  98        SAMPLING_MODE_UYVY = 1,
  99        SAMPLING_MODE_RGB3 = 3,
 100        SAMPLING_MODE_XRGB4 = 4,
 101        SAMPLING_MODE_NV21 = 8,
 102        SAMPLING_MODE_VYUY = 9,
 103        SAMPLING_MODE_BGR3 = 11,
 104        SAMPLING_MODE_XBGR4 = 12,
 105        SAMPLING_MODE_RGBX4 = 20,
 106        SAMPLING_MODE_BGRX4 = 28
 107};
 108
 109enum hva_h264_nalu_type {
 110        NALU_TYPE_UNKNOWN = 0,
 111        NALU_TYPE_SLICE = 1,
 112        NALU_TYPE_SLICE_DPA = 2,
 113        NALU_TYPE_SLICE_DPB = 3,
 114        NALU_TYPE_SLICE_DPC = 4,
 115        NALU_TYPE_SLICE_IDR = 5,
 116        NALU_TYPE_SEI = 6,
 117        NALU_TYPE_SPS = 7,
 118        NALU_TYPE_PPS = 8,
 119        NALU_TYPE_AU_DELIMITER = 9,
 120        NALU_TYPE_SEQ_END = 10,
 121        NALU_TYPE_STREAM_END = 11,
 122        NALU_TYPE_FILLER_DATA = 12,
 123        NALU_TYPE_SPS_EXT = 13,
 124        NALU_TYPE_PREFIX_UNIT = 14,
 125        NALU_TYPE_SUBSET_SPS = 15,
 126        NALU_TYPE_SLICE_AUX = 19,
 127        NALU_TYPE_SLICE_EXT = 20
 128};
 129
 130enum hva_h264_sei_payload_type {
 131        SEI_BUFFERING_PERIOD = 0,
 132        SEI_PICTURE_TIMING = 1,
 133        SEI_STEREO_VIDEO_INFO = 21,
 134        SEI_FRAME_PACKING_ARRANGEMENT = 45
 135};
 136
 137/*
 138 * stereo Video Info struct
 139 */
 140struct hva_h264_stereo_video_sei {
 141        u8 field_views_flag;
 142        u8 top_field_is_left_view_flag;
 143        u8 current_frame_is_left_view_flag;
 144        u8 next_frame_is_second_view_flag;
 145        u8 left_view_self_contained_flag;
 146        u8 right_view_self_contained_flag;
 147};
 148
 149/*
 150 * struct hva_h264_td
 151 *
 152 * @frame_width: width in pixels of the buffer containing the input frame
 153 * @frame_height: height in pixels of the buffer containing the input frame
 154 * @frame_num: the parameter to be written in the slice header
 155 * @picture_coding_type: type I, P or B
 156 * @pic_order_cnt_type: POC mode, as defined in H264 std : can be 0,1,2
 157 * @first_picture_in_sequence: flag telling to encoder that this is the
 158 *                             first picture in a video sequence.
 159 *                             Used for VBR
 160 * @slice_size_type: 0 = no constraint to close the slice
 161 *                   1= a slice is closed as soon as the slice_mb_size limit
 162 *                      is reached
 163 *                   2= a slice is closed as soon as the slice_byte_size limit
 164 *                      is reached
 165 *                   3= a slice is closed as soon as either the slice_byte_size
 166 *                      limit or the slice_mb_size limit is reached
 167 * @slice_mb_size: defines the slice size in number of macroblocks
 168 *                 (used when slice_size_type=1 or slice_size_type=3)
 169 * @ir_param_option: defines the number of macroblocks per frame to be
 170 *                   refreshed by AIR algorithm OR the refresh period
 171 *                   by CIR algorithm
 172 * @intra_refresh_type: enables the adaptive intra refresh algorithm.
 173 *                      Disable=0 / Adaptative=1 and Cycle=2 as intra refresh
 174 * @use_constrained_intra_flag: constrained_intra_pred_flag from PPS
 175 * @transform_mode: controls the use of 4x4/8x8 transform mode
 176 * @disable_deblocking_filter_idc:
 177 *                   0: specifies that all luma and chroma block edges of
 178 *                      the slice are filtered.
 179 *                   1: specifies that deblocking is disabled for all block
 180 *                      edges of the slice.
 181 *                   2: specifies that all luma and chroma block edges of
 182 *                      the slice are filtered with exception of the block edges
 183 *                      that coincide with slice boundaries
 184 * @slice_alpha_c0_offset_div2: to be written in slice header,
 185 *                              controls deblocking
 186 * @slice_beta_offset_div2: to be written in slice header,
 187 *                          controls deblocking
 188 * @encoder_complexity: encoder complexity control (IME).
 189 *                   0 = I_16x16, P_16x16, Full ME Complexity
 190 *                   1 = I_16x16, I_NxN, P_16x16, Full ME Complexity
 191 *                   2 = I_16x16, I_NXN, P_16x16, P_WxH, Full ME Complexity
 192 *                   4 = I_16x16, P_16x16, Reduced ME Complexity
 193 *                   5 = I_16x16, I_NxN, P_16x16, Reduced ME Complexity
 194 *                   6 = I_16x16, I_NXN, P_16x16, P_WxH, Reduced ME Complexity
 195 *  @chroma_qp_index_offset: coming from picture parameter set
 196 *                           (PPS see [H.264 STD] 7.4.2.2)
 197 *  @entropy_coding_mode: entropy coding mode.
 198 *                        0 = CAVLC
 199 *                        1 = CABAC
 200 * @brc_type: selects the bit-rate control algorithm
 201 *                   0 = constant Qp, (no BRC)
 202 *                   1 = CBR
 203 *                   2 = VBR
 204 * @quant: Quantization param used in case of fix QP encoding (no BRC)
 205 * @non_VCL_NALU_Size: size of non-VCL NALUs (SPS, PPS, filler),
 206 *                     used by BRC
 207 * @cpb_buffer_size: size of Coded Picture Buffer, used by BRC
 208 * @bit_rate: target bitrate, for BRC
 209 * @qp_min: min QP threshold
 210 * @qp_max: max QP threshold
 211 * @framerate_num: target framerate numerator , used by BRC
 212 * @framerate_den: target framerate denomurator , used by BRC
 213 * @delay: End-to-End Initial Delay
 214 * @strict_HRD_compliancy: flag for HDR compliancy (1)
 215 *                         May impact quality encoding
 216 * @addr_source_buffer: address of input frame buffer for current frame
 217 * @addr_fwd_Ref_Buffer: address of reference frame buffer
 218 * @addr_rec_buffer: address of reconstructed frame buffer
 219 * @addr_output_bitstream_start: output bitstream start address
 220 * @addr_output_bitstream_end: output bitstream end address
 221 * @addr_external_sw : address of external search window
 222 * @addr_lctx : address of context picture buffer
 223 * @addr_local_rec_buffer: address of local reconstructed buffer
 224 * @addr_spatial_context: address of spatial context buffer
 225 * @bitstream_offset: offset in bits between aligned bitstream start
 226 *                    address and first bit to be written by HVA.
 227 *                    Range value is [0..63]
 228 * @sampling_mode: Input picture format .
 229 *                   0: YUV420 semi_planar Interleaved
 230 *                   1: YUV422 raster Interleaved
 231 * @addr_param_out: address of output parameters structure
 232 * @addr_scaling_matrix: address to the coefficient of
 233 *                       the inverse scaling matrix
 234 * @addr_scaling_matrix_dir: address to the coefficient of
 235 *                           the direct scaling matrix
 236 * @addr_cabac_context_buffer: address of cabac context buffer
 237 * @GmvX: Input information about the horizontal global displacement of
 238 *        the encoded frame versus the previous one
 239 * @GmvY: Input information about the vertical global displacement of
 240 *        the encoded frame versus the previous one
 241 * @window_width: width in pixels of the window to be encoded inside
 242 *                the input frame
 243 * @window_height: width in pixels of the window to be encoded inside
 244 *                 the input frame
 245 * @window_horizontal_offset: horizontal offset in pels for input window
 246 *                            within input frame
 247 * @window_vertical_offset: vertical offset in pels for input window
 248 *                          within input frame
 249 * @addr_roi: Map of QP offset for the Region of Interest algorithm and
 250 *            also used for Error map.
 251 *            Bit 0-6 used for qp offset (value -64 to 63).
 252 *            Bit 7 used to force intra
 253 * @addr_slice_header: address to slice header
 254 * @slice_header_size_in_bits: size in bits of the Slice header
 255 * @slice_header_offset0: Slice header offset where to insert
 256 *                        first_Mb_in_slice
 257 * @slice_header_offset1: Slice header offset where to insert
 258 *                        slice_qp_delta
 259 * @slice_header_offset2: Slice header offset where to insert
 260 *                        num_MBs_in_slice
 261 * @slice_synchro_enable: enable "slice ready" interrupt after each slice
 262 * @max_slice_number: Maximum number of slice in a frame
 263 *                    (0 is strictly forbidden)
 264 * @rgb2_yuv_y_coeff: Four coefficients (C0C1C2C3) to convert from RGB to
 265 *                    YUV for the Y component.
 266 *                    Y = C0*R + C1*G + C2*B + C3 (C0 is on byte 0)
 267 * @rgb2_yuv_u_coeff: four coefficients (C0C1C2C3) to convert from RGB to
 268 *                    YUV for the Y component.
 269 *                    Y = C0*R + C1*G + C2*B + C3 (C0 is on byte 0)
 270 * @rgb2_yuv_v_coeff: Four coefficients (C0C1C2C3) to convert from RGB to
 271 *                    YUV for the U (Cb) component.
 272 *                    U = C0*R + C1*G + C2*B + C3 (C0 is on byte 0)
 273 * @slice_byte_size: maximum slice size in bytes
 274 *                   (used when slice_size_type=2 or slice_size_type=3)
 275 * @max_air_intra_mb_nb: Maximum number of intra macroblock in a frame
 276 *                       for the AIR algorithm
 277 * @brc_no_skip: Disable skipping in the Bitrate Controller
 278 * @addr_brc_in_out_parameter: address of static buffer for BRC parameters
 279 */
 280struct hva_h264_td {
 281        u16 frame_width;
 282        u16 frame_height;
 283        u32 frame_num;
 284        u16 picture_coding_type;
 285        u16 reserved1;
 286        u16 pic_order_cnt_type;
 287        u16 first_picture_in_sequence;
 288        u16 slice_size_type;
 289        u16 reserved2;
 290        u32 slice_mb_size;
 291        u16 ir_param_option;
 292        u16 intra_refresh_type;
 293        u16 use_constrained_intra_flag;
 294        u16 transform_mode;
 295        u16 disable_deblocking_filter_idc;
 296        s16 slice_alpha_c0_offset_div2;
 297        s16 slice_beta_offset_div2;
 298        u16 encoder_complexity;
 299        s16 chroma_qp_index_offset;
 300        u16 entropy_coding_mode;
 301        u16 brc_type;
 302        u16 quant;
 303        u32 non_vcl_nalu_size;
 304        u32 cpb_buffer_size;
 305        u32 bit_rate;
 306        u16 qp_min;
 307        u16 qp_max;
 308        u16 framerate_num;
 309        u16 framerate_den;
 310        u16 delay;
 311        u16 strict_hrd_compliancy;
 312        u32 addr_source_buffer;
 313        u32 addr_fwd_ref_buffer;
 314        u32 addr_rec_buffer;
 315        u32 addr_output_bitstream_start;
 316        u32 addr_output_bitstream_end;
 317        u32 addr_external_sw;
 318        u32 addr_lctx;
 319        u32 addr_local_rec_buffer;
 320        u32 addr_spatial_context;
 321        u16 bitstream_offset;
 322        u16 sampling_mode;
 323        u32 addr_param_out;
 324        u32 addr_scaling_matrix;
 325        u32 addr_scaling_matrix_dir;
 326        u32 addr_cabac_context_buffer;
 327        u32 reserved3;
 328        u32 reserved4;
 329        s16 gmv_x;
 330        s16 gmv_y;
 331        u16 window_width;
 332        u16 window_height;
 333        u16 window_horizontal_offset;
 334        u16 window_vertical_offset;
 335        u32 addr_roi;
 336        u32 addr_slice_header;
 337        u16 slice_header_size_in_bits;
 338        u16 slice_header_offset0;
 339        u16 slice_header_offset1;
 340        u16 slice_header_offset2;
 341        u32 reserved5;
 342        u32 reserved6;
 343        u16 reserved7;
 344        u16 reserved8;
 345        u16 slice_synchro_enable;
 346        u16 max_slice_number;
 347        u32 rgb2_yuv_y_coeff;
 348        u32 rgb2_yuv_u_coeff;
 349        u32 rgb2_yuv_v_coeff;
 350        u32 slice_byte_size;
 351        u16 max_air_intra_mb_nb;
 352        u16 brc_no_skip;
 353        u32 addr_temporal_context;
 354        u32 addr_brc_in_out_parameter;
 355};
 356
 357/*
 358 * struct hva_h264_slice_po
 359 *
 360 * @ slice_size: slice size
 361 * @ slice_start_time: start time
 362 * @ slice_stop_time: stop time
 363 * @ slice_num: slice number
 364 */
 365struct hva_h264_slice_po {
 366        u32 slice_size;
 367        u32 slice_start_time;
 368        u32 slice_end_time;
 369        u32 slice_num;
 370};
 371
 372/*
 373 * struct hva_h264_po
 374 *
 375 * @ bitstream_size: bitstream size
 376 * @ dct_bitstream_size: dtc bitstream size
 377 * @ stuffing_bits: number of stuffing bits inserted by the encoder
 378 * @ removal_time: removal time of current frame (nb of ticks 1/framerate)
 379 * @ hvc_start_time: hvc start time
 380 * @ hvc_stop_time: hvc stop time
 381 * @ slice_count: slice count
 382 */
 383struct hva_h264_po {
 384        u32 bitstream_size;
 385        u32 dct_bitstream_size;
 386        u32 stuffing_bits;
 387        u32 removal_time;
 388        u32 hvc_start_time;
 389        u32 hvc_stop_time;
 390        u32 slice_count;
 391        u32 reserved0;
 392        struct hva_h264_slice_po slice_params[16];
 393};
 394
 395struct hva_h264_task {
 396        struct hva_h264_td td;
 397        struct hva_h264_po po;
 398};
 399
 400/*
 401 * struct hva_h264_ctx
 402 *
 403 * @seq_info:  sequence information buffer
 404 * @ref_frame: reference frame buffer
 405 * @rec_frame: reconstructed frame buffer
 406 * @task:      task descriptor
 407 */
 408struct hva_h264_ctx {
 409        struct hva_buffer *seq_info;
 410        struct hva_buffer *ref_frame;
 411        struct hva_buffer *rec_frame;
 412        struct hva_buffer *task;
 413};
 414
 415static int hva_h264_fill_slice_header(struct hva_ctx *pctx,
 416                                      u8 *slice_header_addr,
 417                                      struct hva_controls *ctrls,
 418                                      int frame_num,
 419                                      u16 *header_size,
 420                                      u16 *header_offset0,
 421                                      u16 *header_offset1,
 422                                      u16 *header_offset2)
 423{
 424        /*
 425         * with this HVA hardware version, part of the slice header is computed
 426         * on host and part by hardware.
 427         * The part of host is precomputed and available through this array.
 428         */
 429        struct device *dev = ctx_to_dev(pctx);
 430        int  cabac = V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC;
 431        const unsigned char slice_header[] = { 0x00, 0x00, 0x00, 0x01,
 432                                               0x41, 0x34, 0x07, 0x00};
 433        int idr_pic_id = frame_num % 2;
 434        enum hva_picture_coding_type type;
 435        u32 frame_order = frame_num % ctrls->gop_size;
 436
 437        if (!(frame_num % ctrls->gop_size))
 438                type = PICTURE_CODING_TYPE_I;
 439        else
 440                type = PICTURE_CODING_TYPE_P;
 441
 442        memcpy(slice_header_addr, slice_header, sizeof(slice_header));
 443
 444        *header_size = 56;
 445        *header_offset0 = 40;
 446        *header_offset1 = 13;
 447        *header_offset2 = 0;
 448
 449        if (type == PICTURE_CODING_TYPE_I) {
 450                slice_header_addr[4] = 0x65;
 451                slice_header_addr[5] = 0x11;
 452
 453                /* toggle the I frame */
 454                if ((frame_num / ctrls->gop_size) % 2) {
 455                        *header_size += 4;
 456                        *header_offset1 += 4;
 457                        slice_header_addr[6] = 0x04;
 458                        slice_header_addr[7] = 0x70;
 459
 460                } else {
 461                        *header_size += 2;
 462                        *header_offset1 += 2;
 463                        slice_header_addr[6] = 0x09;
 464                        slice_header_addr[7] = 0xC0;
 465                }
 466        } else {
 467                if (ctrls->entropy_mode == cabac) {
 468                        *header_size += 1;
 469                        *header_offset1 += 1;
 470                        slice_header_addr[7] = 0x80;
 471                }
 472                /*
 473                 * update slice header with P frame order
 474                 * frame order is limited to 16 (coded on 4bits only)
 475                 */
 476                slice_header_addr[5] += ((frame_order & 0x0C) >> 2);
 477                slice_header_addr[6] += ((frame_order & 0x03) << 6);
 478        }
 479
 480        dev_dbg(dev,
 481                "%s   %s slice header order %d idrPicId %d header size %d\n",
 482                pctx->name, __func__, frame_order, idr_pic_id, *header_size);
 483        return 0;
 484}
 485
 486static int hva_h264_fill_data_nal(struct hva_ctx *pctx,
 487                                  unsigned int stuffing_bytes, u8 *addr,
 488                                  unsigned int stream_size, unsigned int *size)
 489{
 490        struct device *dev = ctx_to_dev(pctx);
 491        const u8 start[] = { 0x00, 0x00, 0x00, 0x01 };
 492
 493        dev_dbg(dev, "%s   %s stuffing bytes %d\n", pctx->name, __func__,
 494                stuffing_bytes);
 495
 496        if ((*size + stuffing_bytes + H264_FILLER_DATA_SIZE) > stream_size) {
 497                dev_dbg(dev, "%s   %s too many stuffing bytes %d\n",
 498                        pctx->name, __func__, stuffing_bytes);
 499                return 0;
 500        }
 501
 502        /* start code */
 503        memcpy(addr + *size, start, sizeof(start));
 504        *size += sizeof(start);
 505
 506        /* nal_unit_type */
 507        addr[*size] = NALU_TYPE_FILLER_DATA;
 508        *size += 1;
 509
 510        memset(addr + *size, 0xff, stuffing_bytes);
 511        *size += stuffing_bytes;
 512
 513        addr[*size] = 0x80;
 514        *size += 1;
 515
 516        return 0;
 517}
 518
 519static int hva_h264_fill_sei_nal(struct hva_ctx *pctx,
 520                                 enum hva_h264_sei_payload_type type,
 521                                 u8 *addr, u32 *size)
 522{
 523        struct device *dev = ctx_to_dev(pctx);
 524        const u8 start[] = { 0x00, 0x00, 0x00, 0x01 };
 525        struct hva_h264_stereo_video_sei info;
 526        u8 offset = 7;
 527        u8 msg = 0;
 528
 529        /* start code */
 530        memcpy(addr + *size, start, sizeof(start));
 531        *size += sizeof(start);
 532
 533        /* nal_unit_type */
 534        addr[*size] = NALU_TYPE_SEI;
 535        *size += 1;
 536
 537        /* payload type */
 538        addr[*size] = type;
 539        *size += 1;
 540
 541        switch (type) {
 542        case SEI_STEREO_VIDEO_INFO:
 543                memset(&info, 0, sizeof(info));
 544
 545                /* set to top/bottom frame packing arrangement */
 546                info.field_views_flag = 1;
 547                info.top_field_is_left_view_flag = 1;
 548
 549                /* payload size */
 550                addr[*size] = 1;
 551                *size += 1;
 552
 553                /* payload */
 554                msg = info.field_views_flag << offset--;
 555
 556                if (info.field_views_flag) {
 557                        msg |= info.top_field_is_left_view_flag <<
 558                               offset--;
 559                } else {
 560                        msg |= info.current_frame_is_left_view_flag <<
 561                               offset--;
 562                        msg |= info.next_frame_is_second_view_flag <<
 563                               offset--;
 564                }
 565                msg |= info.left_view_self_contained_flag << offset--;
 566                msg |= info.right_view_self_contained_flag << offset--;
 567
 568                addr[*size] = msg;
 569                *size += 1;
 570
 571                addr[*size] = 0x80;
 572                *size += 1;
 573
 574                return 0;
 575        case SEI_BUFFERING_PERIOD:
 576        case SEI_PICTURE_TIMING:
 577        case SEI_FRAME_PACKING_ARRANGEMENT:
 578        default:
 579                dev_err(dev, "%s   sei nal type not supported %d\n",
 580                        pctx->name, type);
 581                return -EINVAL;
 582        }
 583}
 584
 585static int hva_h264_prepare_task(struct hva_ctx *pctx,
 586                                 struct hva_h264_task *task,
 587                                 struct hva_frame *frame,
 588                                 struct hva_stream *stream)
 589{
 590        struct hva_dev *hva = ctx_to_hdev(pctx);
 591        struct device *dev = ctx_to_dev(pctx);
 592        struct hva_h264_ctx *ctx = (struct hva_h264_ctx *)pctx->priv;
 593        struct hva_buffer *seq_info = ctx->seq_info;
 594        struct hva_buffer *fwd_ref_frame = ctx->ref_frame;
 595        struct hva_buffer *loc_rec_frame = ctx->rec_frame;
 596        struct hva_h264_td *td = &task->td;
 597        struct hva_controls *ctrls = &pctx->ctrls;
 598        struct v4l2_fract *time_per_frame = &pctx->ctrls.time_per_frame;
 599        int cavlc =  V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CAVLC;
 600        u32 frame_num = pctx->stream_num;
 601        u32 addr_esram = hva->esram_addr;
 602        enum v4l2_mpeg_video_h264_level level;
 603        dma_addr_t paddr = 0;
 604        u8 *slice_header_vaddr;
 605        u32 frame_width = frame->info.aligned_width;
 606        u32 frame_height = frame->info.aligned_height;
 607        u32 max_cpb_buffer_size;
 608        unsigned int payload = stream->bytesused;
 609        u32 max_bitrate;
 610
 611        /* check width and height parameters */
 612        if ((frame_width > max(H264_MAX_SIZE_W, H264_MAX_SIZE_H)) ||
 613            (frame_height > max(H264_MAX_SIZE_W, H264_MAX_SIZE_H))) {
 614                dev_err(dev,
 615                        "%s   width(%d) or height(%d) exceeds limits (%dx%d)\n",
 616                        pctx->name, frame_width, frame_height,
 617                        H264_MAX_SIZE_W, H264_MAX_SIZE_H);
 618                pctx->frame_errors++;
 619                return -EINVAL;
 620        }
 621
 622        level = ctrls->level;
 623
 624        memset(td, 0, sizeof(struct hva_h264_td));
 625
 626        td->frame_width = frame_width;
 627        td->frame_height = frame_height;
 628
 629        /* set frame alignment */
 630        td->window_width =  frame_width;
 631        td->window_height = frame_height;
 632        td->window_horizontal_offset = 0;
 633        td->window_vertical_offset = 0;
 634
 635        td->first_picture_in_sequence = (!frame_num) ? 1 : 0;
 636
 637        /* pic_order_cnt_type hard coded to '2' as only I & P frames */
 638        td->pic_order_cnt_type = 2;
 639
 640        /* useConstrainedIntraFlag set to false for better coding efficiency */
 641        td->use_constrained_intra_flag = false;
 642        td->brc_type = (ctrls->bitrate_mode == V4L2_MPEG_VIDEO_BITRATE_MODE_CBR)
 643                        ? BRC_TYPE_CBR : BRC_TYPE_VBR;
 644
 645        td->entropy_coding_mode = (ctrls->entropy_mode == cavlc) ? CAVLC :
 646                                  CABAC;
 647
 648        td->bit_rate = ctrls->bitrate;
 649
 650        /* set framerate, framerate = 1 n/ time per frame */
 651        if (time_per_frame->numerator >= 536) {
 652                /*
 653                 * due to a hardware bug, framerate denominator can't exceed
 654                 * 536 (BRC overflow). Compute nearest framerate
 655                 */
 656                td->framerate_den = 1;
 657                td->framerate_num = (time_per_frame->denominator +
 658                                    (time_per_frame->numerator >> 1) - 1) /
 659                                    time_per_frame->numerator;
 660
 661                /*
 662                 * update bitrate to introduce a correction due to
 663                 * the new framerate
 664                 * new bitrate = (old bitrate * new framerate) / old framerate
 665                 */
 666                td->bit_rate /= time_per_frame->numerator;
 667                td->bit_rate *= time_per_frame->denominator;
 668                td->bit_rate /= td->framerate_num;
 669        } else {
 670                td->framerate_den = time_per_frame->numerator;
 671                td->framerate_num = time_per_frame->denominator;
 672        }
 673
 674        /* compute maximum bitrate depending on profile */
 675        if (ctrls->profile >= V4L2_MPEG_VIDEO_H264_PROFILE_HIGH)
 676                max_bitrate = h264_infos_list[level].max_bitrate *
 677                              H264_FACTOR_HIGH;
 678        else
 679                max_bitrate = h264_infos_list[level].max_bitrate *
 680                              H264_FACTOR_BASELINE;
 681
 682        /* check if bitrate doesn't exceed max size */
 683        if (td->bit_rate > max_bitrate) {
 684                dev_dbg(dev,
 685                        "%s   bitrate (%d) larger than level and profile allow, clip to %d\n",
 686                        pctx->name, td->bit_rate, max_bitrate);
 687                td->bit_rate = max_bitrate;
 688        }
 689
 690        /* convert cpb_buffer_size in bits */
 691        td->cpb_buffer_size = ctrls->cpb_size * 8000;
 692
 693        /* compute maximum cpb buffer size depending on profile */
 694        if (ctrls->profile >= V4L2_MPEG_VIDEO_H264_PROFILE_HIGH)
 695                max_cpb_buffer_size =
 696                    h264_infos_list[level].max_cpb_size * H264_FACTOR_HIGH;
 697        else
 698                max_cpb_buffer_size =
 699                    h264_infos_list[level].max_cpb_size * H264_FACTOR_BASELINE;
 700
 701        /* check if cpb buffer size doesn't exceed max size */
 702        if (td->cpb_buffer_size > max_cpb_buffer_size) {
 703                dev_dbg(dev,
 704                        "%s   cpb size larger than level %d allows, clip to %d\n",
 705                        pctx->name, td->cpb_buffer_size, max_cpb_buffer_size);
 706                td->cpb_buffer_size = max_cpb_buffer_size;
 707        }
 708
 709        /* enable skipping in the Bitrate Controller */
 710        td->brc_no_skip = 0;
 711
 712        /* initial delay */
 713        if ((ctrls->bitrate_mode == V4L2_MPEG_VIDEO_BITRATE_MODE_CBR) &&
 714            td->bit_rate)
 715                td->delay = 1000 * (td->cpb_buffer_size / td->bit_rate);
 716        else
 717                td->delay = 0;
 718
 719        switch (frame->info.pixelformat) {
 720        case V4L2_PIX_FMT_NV12:
 721                td->sampling_mode = SAMPLING_MODE_NV12;
 722                break;
 723        case V4L2_PIX_FMT_NV21:
 724                td->sampling_mode = SAMPLING_MODE_NV21;
 725                break;
 726        default:
 727                dev_err(dev, "%s   invalid source pixel format\n",
 728                        pctx->name);
 729                pctx->frame_errors++;
 730                return -EINVAL;
 731        }
 732
 733        /*
 734         * fill matrix color converter (RGB to YUV)
 735         * Y = 0,299 R + 0,587 G + 0,114 B
 736         * Cb = -0,1687 R -0,3313 G + 0,5 B + 128
 737         * Cr = 0,5 R - 0,4187 G - 0,0813 B + 128
 738         */
 739        td->rgb2_yuv_y_coeff = 0x12031008;
 740        td->rgb2_yuv_u_coeff = 0x800EF7FB;
 741        td->rgb2_yuv_v_coeff = 0x80FEF40E;
 742
 743        /* enable/disable transform mode */
 744        td->transform_mode = ctrls->dct8x8;
 745
 746        /* encoder complexity fix to 2, ENCODE_I_16x16_I_NxN_P_16x16_P_WxH */
 747        td->encoder_complexity = 2;
 748
 749        /* quant fix to 28, default VBR value */
 750        td->quant = 28;
 751
 752        if (td->framerate_den == 0) {
 753                dev_err(dev, "%s   invalid framerate\n", pctx->name);
 754                pctx->frame_errors++;
 755                return -EINVAL;
 756        }
 757
 758        /* if automatic framerate, deactivate bitrate controller */
 759        if (td->framerate_num == 0)
 760                td->brc_type = 0;
 761
 762        /* compliancy fix to true */
 763        td->strict_hrd_compliancy = 1;
 764
 765        /* set minimum & maximum quantizers */
 766        td->qp_min = clamp_val(ctrls->qpmin, 0, 51);
 767        td->qp_max = clamp_val(ctrls->qpmax, 0, 51);
 768
 769        td->addr_source_buffer = frame->paddr;
 770        td->addr_fwd_ref_buffer = fwd_ref_frame->paddr;
 771        td->addr_rec_buffer = loc_rec_frame->paddr;
 772
 773        td->addr_output_bitstream_end = (u32)stream->paddr + stream->size;
 774
 775        td->addr_output_bitstream_start = (u32)stream->paddr;
 776        td->bitstream_offset = (((u32)stream->paddr & 0xF) << 3) &
 777                               BITSTREAM_OFFSET_MASK;
 778
 779        td->addr_param_out = (u32)ctx->task->paddr +
 780                             offsetof(struct hva_h264_task, po);
 781
 782        /* swap spatial and temporal context */
 783        if (frame_num % 2) {
 784                paddr = seq_info->paddr;
 785                td->addr_spatial_context =  ALIGN(paddr, 0x100);
 786                paddr = seq_info->paddr + DATA_SIZE(frame_width,
 787                                                        frame_height);
 788                td->addr_temporal_context = ALIGN(paddr, 0x100);
 789        } else {
 790                paddr = seq_info->paddr;
 791                td->addr_temporal_context = ALIGN(paddr, 0x100);
 792                paddr = seq_info->paddr + DATA_SIZE(frame_width,
 793                                                        frame_height);
 794                td->addr_spatial_context =  ALIGN(paddr, 0x100);
 795        }
 796
 797        paddr = seq_info->paddr + 2 * DATA_SIZE(frame_width, frame_height);
 798
 799        td->addr_brc_in_out_parameter =  ALIGN(paddr, 0x100);
 800
 801        paddr = td->addr_brc_in_out_parameter + BRC_DATA_SIZE;
 802        td->addr_slice_header =  ALIGN(paddr, 0x100);
 803        td->addr_external_sw =  ALIGN(addr_esram, 0x100);
 804
 805        addr_esram += SEARCH_WINDOW_BUFFER_MAX_SIZE(frame_width);
 806        td->addr_local_rec_buffer = ALIGN(addr_esram, 0x100);
 807
 808        addr_esram += LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(frame_width);
 809        td->addr_lctx = ALIGN(addr_esram, 0x100);
 810
 811        addr_esram += CTX_MB_BUFFER_MAX_SIZE(max(frame_width, frame_height));
 812        td->addr_cabac_context_buffer = ALIGN(addr_esram, 0x100);
 813
 814        if (!(frame_num % ctrls->gop_size)) {
 815                td->picture_coding_type = PICTURE_CODING_TYPE_I;
 816                stream->vbuf.flags |= V4L2_BUF_FLAG_KEYFRAME;
 817        } else {
 818                td->picture_coding_type = PICTURE_CODING_TYPE_P;
 819                stream->vbuf.flags &= ~V4L2_BUF_FLAG_KEYFRAME;
 820        }
 821
 822        /* fill the slice header part */
 823        slice_header_vaddr = seq_info->vaddr + (td->addr_slice_header -
 824                             seq_info->paddr);
 825
 826        hva_h264_fill_slice_header(pctx, slice_header_vaddr, ctrls, frame_num,
 827                                   &td->slice_header_size_in_bits,
 828                                   &td->slice_header_offset0,
 829                                   &td->slice_header_offset1,
 830                                   &td->slice_header_offset2);
 831
 832        td->chroma_qp_index_offset = 2;
 833        td->slice_synchro_enable = 0;
 834        td->max_slice_number = 1;
 835
 836        /*
 837         * check the sps/pps header size for key frame only
 838         * sps/pps header was previously fill by libv4l
 839         * during qbuf of stream buffer
 840         */
 841        if ((stream->vbuf.flags == V4L2_BUF_FLAG_KEYFRAME) &&
 842            (payload > MAX_SPS_PPS_SIZE)) {
 843                dev_err(dev, "%s   invalid sps/pps size %d\n", pctx->name,
 844                        payload);
 845                pctx->frame_errors++;
 846                return -EINVAL;
 847        }
 848
 849        if (stream->vbuf.flags != V4L2_BUF_FLAG_KEYFRAME)
 850                payload = 0;
 851
 852        /* add SEI nal (video stereo info) */
 853        if (ctrls->sei_fp && hva_h264_fill_sei_nal(pctx, SEI_STEREO_VIDEO_INFO,
 854                                                   (u8 *)stream->vaddr,
 855                                                   &payload)) {
 856                dev_err(dev, "%s   fail to get SEI nal\n", pctx->name);
 857                pctx->frame_errors++;
 858                return -EINVAL;
 859        }
 860
 861        /* fill size of non-VCL NAL units (SPS, PPS, filler and SEI) */
 862        td->non_vcl_nalu_size = payload * 8;
 863
 864        /* compute bitstream offset & new start address of bitstream */
 865        td->addr_output_bitstream_start += ((payload >> 4) << 4);
 866        td->bitstream_offset += (payload - ((payload >> 4) << 4)) * 8;
 867
 868        stream->bytesused = payload;
 869
 870        return 0;
 871}
 872
 873static unsigned int hva_h264_get_stream_size(struct hva_h264_task *task)
 874{
 875        struct hva_h264_po *po = &task->po;
 876
 877        return po->bitstream_size;
 878}
 879
 880static u32 hva_h264_get_stuffing_bytes(struct hva_h264_task *task)
 881{
 882        struct hva_h264_po *po = &task->po;
 883
 884        return po->stuffing_bits >> 3;
 885}
 886
 887static int hva_h264_open(struct hva_ctx *pctx)
 888{
 889        struct device *dev = ctx_to_dev(pctx);
 890        struct hva_h264_ctx *ctx;
 891        struct hva_dev *hva = ctx_to_hdev(pctx);
 892        u32 frame_width = pctx->frameinfo.aligned_width;
 893        u32 frame_height = pctx->frameinfo.aligned_height;
 894        u32 size;
 895        int ret;
 896
 897        /* check esram size necessary to encode a frame */
 898        size = SEARCH_WINDOW_BUFFER_MAX_SIZE(frame_width) +
 899               LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(frame_width) +
 900               CTX_MB_BUFFER_MAX_SIZE(max(frame_width, frame_height)) +
 901               CABAC_CONTEXT_BUFFER_MAX_SIZE(frame_width);
 902
 903        if (hva->esram_size < size) {
 904                dev_err(dev, "%s   not enough esram (max:%d request:%d)\n",
 905                        pctx->name, hva->esram_size, size);
 906                ret = -EINVAL;
 907                goto err;
 908        }
 909
 910        /* allocate context for codec */
 911        ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
 912        if (!ctx) {
 913                ret = -ENOMEM;
 914                goto err;
 915        }
 916
 917        /* allocate sequence info buffer */
 918        ret = hva_mem_alloc(pctx,
 919                            2 * DATA_SIZE(frame_width, frame_height) +
 920                            SLICE_HEADER_SIZE +
 921                            BRC_DATA_SIZE,
 922                            "hva sequence info",
 923                            &ctx->seq_info);
 924        if (ret) {
 925                dev_err(dev,
 926                        "%s   failed to allocate sequence info buffer\n",
 927                        pctx->name);
 928                goto err_ctx;
 929        }
 930
 931        /* allocate reference frame buffer */
 932        ret = hva_mem_alloc(pctx,
 933                            frame_width * frame_height * 3 / 2,
 934                            "hva reference frame",
 935                            &ctx->ref_frame);
 936        if (ret) {
 937                dev_err(dev, "%s   failed to allocate reference frame buffer\n",
 938                        pctx->name);
 939                goto err_seq_info;
 940        }
 941
 942        /* allocate reconstructed frame buffer */
 943        ret = hva_mem_alloc(pctx,
 944                            frame_width * frame_height * 3 / 2,
 945                            "hva reconstructed frame",
 946                            &ctx->rec_frame);
 947        if (ret) {
 948                dev_err(dev,
 949                        "%s   failed to allocate reconstructed frame buffer\n",
 950                        pctx->name);
 951                goto err_ref_frame;
 952        }
 953
 954        /* allocate task descriptor */
 955        ret = hva_mem_alloc(pctx,
 956                            sizeof(struct hva_h264_task),
 957                            "hva task descriptor",
 958                            &ctx->task);
 959        if (ret) {
 960                dev_err(dev,
 961                        "%s   failed to allocate task descriptor\n",
 962                        pctx->name);
 963                goto err_rec_frame;
 964        }
 965
 966        pctx->priv = (void *)ctx;
 967
 968        return 0;
 969
 970err_rec_frame:
 971        hva_mem_free(pctx, ctx->rec_frame);
 972err_ref_frame:
 973        hva_mem_free(pctx, ctx->ref_frame);
 974err_seq_info:
 975        hva_mem_free(pctx, ctx->seq_info);
 976err_ctx:
 977        devm_kfree(dev, ctx);
 978err:
 979        pctx->sys_errors++;
 980        return ret;
 981}
 982
 983static int hva_h264_close(struct hva_ctx *pctx)
 984{
 985        struct hva_h264_ctx *ctx = (struct hva_h264_ctx *)pctx->priv;
 986        struct device *dev = ctx_to_dev(pctx);
 987
 988        if (ctx->seq_info)
 989                hva_mem_free(pctx, ctx->seq_info);
 990
 991        if (ctx->ref_frame)
 992                hva_mem_free(pctx, ctx->ref_frame);
 993
 994        if (ctx->rec_frame)
 995                hva_mem_free(pctx, ctx->rec_frame);
 996
 997        if (ctx->task)
 998                hva_mem_free(pctx, ctx->task);
 999
1000        devm_kfree(dev, ctx);
1001
1002        return 0;
1003}
1004
1005static int hva_h264_encode(struct hva_ctx *pctx, struct hva_frame *frame,
1006                           struct hva_stream *stream)
1007{
1008        struct hva_h264_ctx *ctx = (struct hva_h264_ctx *)pctx->priv;
1009        struct hva_h264_task *task = (struct hva_h264_task *)ctx->task->vaddr;
1010        u32 stuffing_bytes = 0;
1011        int ret = 0;
1012
1013        ret = hva_h264_prepare_task(pctx, task, frame, stream);
1014        if (ret)
1015                goto err;
1016
1017        ret = hva_hw_execute_task(pctx, H264_ENC, ctx->task);
1018        if (ret)
1019                goto err;
1020
1021        pctx->stream_num++;
1022        stream->bytesused += hva_h264_get_stream_size(task);
1023
1024        stuffing_bytes = hva_h264_get_stuffing_bytes(task);
1025
1026        if (stuffing_bytes)
1027                hva_h264_fill_data_nal(pctx, stuffing_bytes,
1028                                       (u8 *)stream->vaddr,
1029                                       stream->size,
1030                                       &stream->bytesused);
1031
1032        /* switch reference & reconstructed frame */
1033        swap(ctx->ref_frame, ctx->rec_frame);
1034
1035        return 0;
1036err:
1037        stream->bytesused = 0;
1038        return ret;
1039}
1040
1041const struct hva_enc nv12h264enc = {
1042        .name = "H264(NV12)",
1043        .pixelformat = V4L2_PIX_FMT_NV12,
1044        .streamformat = V4L2_PIX_FMT_H264,
1045        .max_width = H264_MAX_SIZE_W,
1046        .max_height = H264_MAX_SIZE_H,
1047        .open = hva_h264_open,
1048        .close = hva_h264_close,
1049        .encode = hva_h264_encode,
1050};
1051
1052const struct hva_enc nv21h264enc = {
1053        .name = "H264(NV21)",
1054        .pixelformat = V4L2_PIX_FMT_NV21,
1055        .streamformat = V4L2_PIX_FMT_H264,
1056        .max_width = H264_MAX_SIZE_W,
1057        .max_height = H264_MAX_SIZE_H,
1058        .open = hva_h264_open,
1059        .close = hva_h264_close,
1060        .encode = hva_h264_encode,
1061};
1062